In [13]:
import os
import numpy as np
import random
from environment import Environment
import copy
import configs
import pickle
import re
from concurrent.futures import ThreadPoolExecutor, as_completed
import concurrent.futures
import ollama
from collections import defaultdict
from collections import Counter

In [14]:
with open('./test_set/{}_{}agents.pth'.format('warehouse', 32), 'rb') as f:
    tests = pickle.load(f)

In [15]:
# negotiator를 구성하기 위한 union find 알고리즘
def find(parent, i):
    if parent[i] == i:
        return i
    else:
        return find(parent, parent[i])

def union(parent, rank, x, y):
    xroot = find(parent, x)
    yroot = find(parent, y)

    if xroot != yroot:
        if rank[xroot] < rank[yroot]:
            parent[xroot] = yroot
        elif rank[xroot] > rank[yroot]:
            parent[yroot] = xroot
        else:
            parent[yroot] = xroot
            rank[xroot] += 1

def merge_sets(lists):
    element_to_index = {}
    for i, subset in enumerate(lists):
        for element in subset:
            element_to_index[element] = i

    parent = [i for i in range(len(lists))]
    rank = [0] * len(lists)

    for subset in lists:
        first_element = subset[0]
        for element in subset[1:]:
            union(parent, rank, find(parent, element_to_index[first_element]), find(parent, element_to_index[element]))

    new_sets = {}
    for element in element_to_index:
        root = find(parent, element_to_index[element])
        if root not in new_sets:
            new_sets[root] = set()
        new_sets[root].add(element)

    return [list(s) for s in new_sets.values()]


def determine_direction(x, y, x_finish, y_finish):

    dx = int(x_finish) - x
    dy = int(y_finish) - y
    
    if dx > 0 and dy > 0:
        return "northeast"
    elif dx > 0 and dy < 0:
        return "southeast"
    elif dx < 0 and dy > 0:
        return "northwest"
    elif dx < 0 and dy < 0:
        return "southwest"
    elif dx > 0 and dy == 0:
        return "east"
    elif dx < 0 and dy == 0:
        return "west"
    elif dx == 0 and dy > 0:
        return "north"
    elif dx == 0 and dy < 0:
        return "south"
    else:
        return "same location"
    
def get_direction(center, max_position):
    
    row_diff = center[0] - max_position[0]
    col_diff = center[1] - max_position[1]

    if row_diff > 0 and col_diff > 0:
        return "northwest"
    elif row_diff > 0 and col_diff == 0:
        return "north"
    elif row_diff > 0 and col_diff < 0:
        return "northeast"
    elif row_diff == 0 and col_diff > 0:
        return "west"
    elif row_diff == 0 and col_diff < 0:
        return "east"
    elif row_diff < 0 and col_diff > 0:
        return "southwest"
    elif row_diff < 0 and col_diff == 0:
        return "south"
    elif row_diff < 0 and col_diff < 0:
        return "southeast"


# 방향 정의
directiondict = {
    'stay': 0, 'north': 1, 'south': 2, 'west': 3, 'east': 4
}

In [16]:
# 프롬프트
class gpt4pathfinding:
    def heuristic_navigate(self, agent, heuristic_direction, east, west, north, south, high_priority_agent):
        response = ollama.generate(
        model="llama3",
        prompt=f"""
                <s>[INST] <<SYS>>
                You're a directional path finder - you know which path to take to reach your goal.
                <</SYS>>
                You can only make one move at a time.
                Your actions have 4 options: east, west, south, north.
                
                {east}
                {west}
                {south}
                {north}

                The locations of other potentially conflicting agents are as follows.
                {high_priority_agent}

                Your map says going {heuristic_direction} will shorten the distance to the destination.
                Of the directions presented, choose the one that minimizes conflict with other potentially conflicting agents.
                
                You decide which direction to move next.

                Below are examples of answers.
                Answer with only one word.

                EXAMPLE 1

                south

                EXAMPLE 2

                north
                [/INST]
                """
        )
        return agent, response['response'].lower()
    
    def give_way(self, east, west, north, south, prioragentdirection, goaldirection):
        response = ollama.generate(
        model="llama3",
        prompt=f"""
                <s>[INST] <<SYS>>
                You're a directional path finder - you know which path to take to reach your goal.
                <</SYS>>

                The position of the prior path finder is {prioragentdirection} relative to you.

                You MUST avoid collisions with prior path finder.

                Your actions have five options: east, west, south, north, and stay.

                {east}
                {west}
                {south}
                {north}

                It is recommended to move away from the prior agent.
                It is recommended to going to the 90-degree turn rather than the other side of the prior pathfinder, but you can ignore this instruction depending on your situation.

                You decide which direction to move next.

                Below are examples of answers.
                Answer with only one word.

                EXAMPLE 1

                south

                EXAMPLE 2

                north
                """
        )
        return response['response'].lower()
    
pathfinder = gpt4pathfinding()

In [17]:
def run_one_example(example, max_steps):

  num_agents = len(example[1])
  map_width = len(example[0]) - 1

  env = Environment()
  env.load(np.array(example[0]), np.array(example[1]), np.array(example[2]))

  env.initialize_perceived_maps()
  env.get_perceived_heuri_map()

  def parameter_navigate(i):

    observe = env.observe()

    a_obs = observe[0][i][1]

    if a_obs[4][3] == 0:
      west = """To your west lies a path."""
    else:
      west = """Your west is blocked by a wall."""

    if a_obs[4][5] == 0:
      east = """To your east lies a path."""
    else:
      east = """Your east is blocked by a wall."""

    if a_obs[5][4] == 0:
      south = """To your south lies a path."""
    else:
      south = """Your south is blocked by a wall."""

    if a_obs[3][4] == 0:
      north = """To your north lies a path."""
    else:
      north = """Your north is blocked by a wall."""

    directions = []

    if env.observe()[0][i][6][4, 4] == 1:
      directions.append('north')
    if env.observe()[0][i][7][4, 4] == 1:
      directions.append('south')
    if env.observe()[0][i][8][4, 4] == 1:
      directions.append('west')
    if env.observe()[0][i][9][4, 4] == 1:
      directions.append('east')
    
    heuristic_direction = directions

    agent_map = env.observe()[0][i][0][2:7, 2:7]
    positions = np.argwhere(agent_map > i+1)
    center = (2, 2)
    directions = [get_direction(center, pos) for pos in positions]
    direction_counts = Counter(directions)
    direction_messages = [f"There are {count} obstacles to the {direction}." for direction, count in direction_counts.items() if count > 0]
    high_priority_agent = '\n'.join(direction_messages)

    return heuristic_direction, east, west, north, south, high_priority_agent
  
  def parameter_give_way(i):

    observe = env.observe()

    a_obs = observe[0][i][1]
    x = observe[1][i][1]
    y = map_width-observe[1][i][0]
    x_finish = example[2][i][1]
    y_finish = map_width-example[2][i][0]
    goaldirection = determine_direction(x, y, x_finish, y_finish)

    if a_obs[4][3] == 0:
        west = """To your west lies a path."""
    else:
        west = """Your west is blocked by a wall."""

    if a_obs[4][5] == 0:
        east = """To your east lies a path."""
    else:
        east = """Your east is blocked by a wall."""

    if a_obs[5][4] == 0:
        south = """To your south lies a path."""
    else:
        south = """Your south is blocked by a wall."""

    if a_obs[3][4] == 0:
        north = """To your north lies a path."""
    else:
        north = """Your north is blocked by a wall."""


    # prior agent의 direction 구하기
    agent_map = env.observe()[0][i][0][2:7, 2:7]
    center = (2, 2)
    max_position = np.unravel_index(np.argmax(agent_map), agent_map.shape)

    row_diff = center[0] - max_position[0]
    col_diff = center[1] - max_position[1]

    if row_diff > 0 and col_diff > 0:
        prioragentdirection = "northwest"
    elif row_diff > 0 and col_diff == 0:
        prioragentdirection = "north"
    elif row_diff > 0 and col_diff < 0:
        prioragentdirection = "northeast"
    elif row_diff == 0 and col_diff > 0:
        prioragentdirection = "west"
    elif row_diff == 0 and col_diff < 0:
        prioragentdirection = "east"
    elif row_diff < 0 and col_diff > 0:
        prioragentdirection = "southwest"
    elif row_diff < 0 and col_diff == 0:
        prioragentdirection = "south"
    elif row_diff < 0 and col_diff < 0:
        prioragentdirection = "southeast"

    return east, west, north, south, prioragentdirection, goaldirection

  
  #에이전트들의 지난번 액션들과 좌표
  action_list = [[] for _ in range(num_agents)]
  coordinate_list = [[] for _ in range(num_agents)]
  step_list = [0 for _ in range(num_agents)]
  dead_end_state = [0 for _ in range(num_agents)]
  steps = 0

  # turn
  while steps < max_steps:

    for i in range(num_agents):
      if not np.array_equal(env.agents_pos[i], env.goals_pos[i]):
        step_list[i] += 1

    for i in range(num_agents):
      x_coordinate = env.observe()[1][i][1]
      y_coordinate = map_width-env.observe()[1][i][0]

      coordinate_list[i].append([x_coordinate, y_coordinate])

    step = [0 for i in range(num_agents)]

    #각 에이전트들의 시야에 있는 자신과 다른 에이전트들
    FOV_agents = []
    for i in range(num_agents):
      if np.any(env.observe()[0][i][0][2:7, 2:7]):
        non_zero_elements = env.observe()[0][i][0][2:7, 2:7][env.observe()[0][i][0][2:7, 2:7] != 0].tolist()
        non_zero_elements = [element - 1 for element in non_zero_elements]
        non_zero_elements.append(i)
        FOV_agents.append(non_zero_elements)

    #알고리즘을 이용해 연결된 집합 찾기
    connected_sets = merge_sets(FOV_agents)

    #연결이 있는 모든 에이전트들
    deadlocked_agents = [item for sublist in connected_sets for item in sublist]

    for i in range(num_agents):

      #교착상태에 빠지지 않은 에이전트는 독립적으로 navigate
      if i not in deadlocked_agents:

        if np.array_equal(env.agents_pos[i], env.goals_pos[i]):
          step[i] = directiondict['stay']
          continue

        heuristic_direction, east, west, north, south, high_priority_agent = parameter_navigate(i)
        try:
          direction = random.choice(heuristic_direction)
        except:
          direction = 'stay'

        step[i] = directiondict[direction]

    
    # 교착상태에 빠진 에이전트에 대해
    for connected_set in connected_sets:
      
      results = []

      for i in connected_set: # 여기서 i는 agent의 번호들

        if np.array_equal(env.agents_pos[i], env.goals_pos[i]):
          direction = 'stay'
          results.append(direction)
          continue
      
      #각각의 에이전트에 대해 원래 계획된 경로를 계산

        heuristic_direction, east, west, north, south, high_priority_agent = parameter_navigate(i)

        _, result = pathfinder.heuristic_navigate(i, heuristic_direction, east, west, north, south, high_priority_agent)

        direction = result

        if direction not in directiondict:
          direction = 'stay'

        results.append(direction)

      env_copy = copy.deepcopy(env)
      planned_step = [0 for _ in range(num_agents)]
      for idx, agent_idx in enumerate(connected_set):
        planned_step[agent_idx] = directiondict[results[idx]]
      if -0.5 in env_copy.step(planned_step)[1]:
        collision = True
      else:
        collision = False

      if not collision:
        for idx, agent_idx in enumerate(connected_set):
          action_list[agent_idx].append(results[idx])
          step[agent_idx] = directiondict[results[idx]]
      else:
      # 먼저 prior agent는 원래대로 이동하게 함
        prior_agent_number = max(connected_set)
        prior_agent_idx = connected_set.index(prior_agent_number)
        action_list[prior_agent_number].append(results[prior_agent_idx])
        step[prior_agent_number] = directiondict[results[prior_agent_idx]]
        for i in connected_set:
          if i != prior_agent_number:
              
              east, west, north, south, prioragentdirection, goaldirection = parameter_give_way(i)

              result = pathfinder.give_way(east, west, north, south, prioragentdirection, goaldirection)

              direction = result

              if direction not in directiondict:
                direction = 'stay'

              action_list[i] = []
      
              step[i] = directiondict[direction]


    observation, rewards, done, info = env.step(step)

    env.update_perceived_maps()
    env.get_perceived_heuri_map()

    if done:
      break
    steps += 1

  return step_list

In [18]:
def simulate_all_tests(tests, max_steps):
    all_step_lists = []
    for example in tests:
        step_list = run_one_example(example, max_steps)
        all_step_lists.append(step_list)
        print(step_list)
    return all_step_lists

In [11]:
all_step_lists = simulate_all_tests(tests[0:10], 512)

[99, 147, 9, 18, 104, 108, 47, 95, 288, 152, 276, 169, 113, 157, 20, 100, 81, 79, 56, 150, 40, 132, 218, 145, 162, 80, 75, 143, 90, 148, 83, 44]
[231, 30, 74, 121, 25, 43, 139, 29, 77, 93, 67, 101, 142, 150, 90, 231, 22, 100, 16, 29, 66, 71, 22, 18, 8, 147, 93, 137, 128, 120, 63, 30]
[98, 72, 60, 17, 91, 54, 44, 149, 84, 32, 51, 156, 93, 183, 60, 158, 72, 168, 69, 34, 15, 124, 95, 154, 112, 190, 73, 183, 47, 139, 65, 17]
[89, 128, 58, 111, 39, 196, 79, 66, 46, 91, 24, 21, 136, 9, 79, 55, 86, 139, 87, 24, 154, 134, 125, 207, 39, 127, 130, 87, 21, 74, 21, 26]
[28, 192, 199, 46, 137, 29, 137, 73, 18, 65, 60, 52, 27, 178, 184, 132, 44, 189, 79, 54, 43, 22, 34, 128, 162, 33, 32, 10, 61, 162, 143, 100]
[85, 97, 146, 106, 169, 55, 177, 126, 94, 55, 142, 121, 53, 143, 86, 98, 86, 152, 71, 55, 132, 135, 22, 27, 43, 42, 19, 6, 76, 80, 49, 41]
[58, 43, 46, 132, 38, 167, 157, 53, 30, 234, 96, 79, 164, 9, 13, 58, 123, 149, 103, 103, 37, 56, 43, 171, 43, 151, 32, 60, 149, 98, 117, 106]
[265, 141, 33

In [12]:
all_step_lists = simulate_all_tests(tests[10:20], 512)

[133, 67, 33, 73, 26, 101, 38, 151, 132, 122, 86, 80, 33, 43, 75, 55, 91, 170, 142, 132, 112, 27, 45, 82, 95, 38, 76, 77, 45, 96, 31, 61]
[76, 117, 154, 178, 59, 88, 74, 54, 146, 35, 21, 35, 96, 98, 148, 65, 81, 72, 43, 49, 62, 133, 107, 159, 45, 110, 76, 44, 83, 143, 112, 114]
[81, 109, 84, 265, 205, 39, 155, 17, 88, 145, 77, 70, 143, 116, 108, 83, 12, 66, 33, 123, 139, 78, 95, 25, 130, 50, 135, 137, 53, 162, 56, 121]
[111, 159, 113, 67, 87, 153, 172, 66, 120, 27, 98, 101, 80, 16, 175, 98, 94, 95, 121, 158, 173, 156, 128, 56, 181, 117, 22, 87, 158, 31, 10, 133]
[146, 144, 155, 63, 88, 28, 39, 135, 88, 45, 41, 110, 197, 27, 163, 51, 142, 101, 86, 164, 95, 136, 72, 96, 143, 116, 55, 136, 94, 23, 59, 52]
[57, 69, 143, 53, 48, 111, 28, 35, 215, 156, 55, 20, 155, 17, 140, 76, 54, 140, 53, 10, 155, 195, 89, 51, 140, 132, 107, 30, 134, 46, 107, 56]
[56, 39, 23, 44, 125, 138, 112, 81, 187, 56, 133, 33, 170, 36, 103, 39, 42, 146, 170, 123, 66, 31, 50, 79, 156, 131, 32, 108, 31, 79, 131, 65]
[9

In [13]:
with open('./test_set/{}_{}agents.pth'.format('warehouse', 4), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 512)

[29, 118, 84, 6]
[89, 12, 40, 86]
[58, 55, 101, 94]
[104, 19, 134, 136]
[55, 34, 145, 107]
[80, 73, 314, 186]
[88, 147, 37, 32]
[92, 128, 115, 50]
[99, 91, 105, 59]
[47, 69, 34, 153]
[103, 29, 18, 43]
[68, 44, 78, 95]
[74, 28, 121, 14]
[155, 13, 124, 50]
[210, 121, 85, 34]
[53, 7, 94, 20]
[131, 85, 14, 130]
[33, 107, 76, 175]
[192, 191, 131, 84]
[50, 108, 93, 34]


In [14]:
with open('./test_set/{}_{}agents.pth'.format('warehouse', 8), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 512)

[99, 175, 41, 46, 138, 151, 29, 71]
[22, 65, 25, 95, 192, 62, 11, 80]
[46, 63, 126, 75, 56, 123, 47, 162]
[117, 104, 163, 91, 31, 26, 23, 38]
[79, 144, 61, 59, 155, 88, 18, 29]
[62, 99, 85, 40, 176, 137, 36, 19]
[181, 75, 41, 124, 91, 156, 22, 138]
[21, 17, 87, 49, 107, 146, 49, 93]
[80, 78, 49, 84, 74, 7, 90, 42]
[44, 49, 98, 56, 81, 72, 19, 126]
[81, 79, 50, 71, 16, 130, 48, 139]
[181, 24, 115, 46, 59, 36, 90, 53]
[42, 26, 67, 96, 30, 25, 85, 162]
[31, 22, 172, 107, 30, 144, 119, 96]
[201, 239, 189, 149, 68, 17, 42, 134]
[62, 21, 48, 128, 18, 4, 47, 165]
[59, 63, 54, 41, 45, 167, 20, 31]
[142, 30, 37, 118, 62, 22, 70, 139]
[18, 28, 146, 95, 146, 123, 27, 107]
[48, 26, 41, 64, 48, 20, 33, 175]


In [15]:
with open('./test_set/{}_{}agents.pth'.format('warehouse', 16), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 512)

[66, 120, 174, 36, 151, 45, 181, 27, 151, 36, 75, 62, 49, 104, 135, 40]
[44, 124, 157, 56, 71, 35, 68, 163, 145, 100, 116, 27, 130, 15, 41, 67]
[111, 80, 86, 145, 58, 41, 22, 79, 136, 111, 102, 72, 93, 108, 123, 106]
[23, 38, 54, 116, 71, 45, 86, 54, 131, 112, 145, 66, 102, 145, 62, 21]
[94, 47, 57, 40, 35, 31, 59, 41, 105, 54, 5, 142, 45, 147, 4, 46]
[4, 62, 75, 167, 71, 37, 204, 53, 58, 79, 34, 69, 134, 124, 136, 56]
[104, 162, 78, 41, 135, 70, 31, 44, 66, 31, 150, 76, 33, 130, 122, 24]
[154, 77, 103, 123, 139, 34, 108, 52, 52, 39, 40, 41, 149, 118, 80, 58]
[163, 150, 152, 123, 81, 148, 132, 73, 187, 16, 62, 49, 61, 37, 55, 135]
[130, 154, 113, 117, 111, 51, 46, 141, 135, 26, 36, 74, 53, 103, 146, 82]
[50, 35, 28, 62, 117, 36, 43, 144, 94, 102, 62, 70, 34, 61, 58, 75]
[79, 91, 36, 13, 72, 69, 53, 111, 74, 12, 23, 73, 112, 156, 73, 56]
[52, 162, 55, 152, 30, 103, 35, 126, 42, 136, 14, 144, 116, 70, 74, 38]
[104, 70, 51, 11, 9, 103, 26, 39, 63, 58, 154, 158, 180, 58, 202, 63]
[74, 117,

In [16]:
with open('./test_set/{}_{}agents.pth'.format('warehouse', 64), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 512)

[106, 102, 291, 86, 512, 138, 186, 118, 307, 34, 12, 106, 152, 159, 512, 35, 134, 88, 512, 386, 512, 148, 512, 296, 170, 92, 87, 123, 54, 150, 101, 56, 305, 12, 16, 15, 81, 95, 270, 79, 150, 73, 206, 86, 139, 135, 47, 65, 105, 188, 52, 86, 152, 54, 77, 30, 96, 169, 81, 57, 50, 59, 67, 85]
[160, 104, 99, 80, 193, 164, 162, 94, 96, 20, 112, 89, 48, 102, 214, 22, 128, 118, 81, 183, 37, 123, 172, 83, 138, 87, 47, 153, 107, 88, 29, 150, 160, 271, 53, 199, 100, 42, 16, 74, 154, 156, 63, 37, 14, 20, 58, 27, 125, 75, 173, 112, 71, 94, 85, 25, 127, 87, 76, 19, 77, 66, 82, 29]
[16, 168, 190, 76, 50, 183, 512, 209, 118, 155, 217, 4, 23, 185, 25, 202, 46, 44, 120, 512, 59, 187, 145, 95, 15, 228, 32, 12, 37, 109, 26, 8, 24, 55, 160, 99, 45, 165, 166, 55, 55, 201, 202, 6, 9, 44, 58, 46, 211, 45, 170, 111, 155, 36, 18, 50, 83, 31, 141, 147, 124, 34, 78, 135]
[59, 31, 59, 94, 196, 93, 14, 79, 123, 77, 48, 103, 15, 159, 57, 193, 119, 43, 151, 221, 163, 156, 137, 87, 111, 45, 49, 19, 224, 138, 73, 217, 

In [7]:
with open('./test_set/{}_{}agents.pth'.format('den312d', 4), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[23, 63, 77, 57]
[44, 87, 19, 20]
[72, 67, 131, 75]
[92, 41, 59, 34]
[62, 43, 34, 77]
[37, 32, 43, 107]
[69, 32, 95, 5]
[76, 95, 17, 48]
[59, 127, 33, 82]
[26, 18, 112, 51]
[29, 66, 94, 20]
[15, 42, 103, 31]
[100, 111, 51, 70]
[59, 104, 45, 44]
[76, 110, 110, 49]
[34, 94, 64, 79]
[99, 50, 72, 48]
[59, 37, 62, 52]
[59, 38, 42, 11]
[67, 81, 53, 41]


In [8]:
with open('./test_set/{}_{}agents.pth'.format('den312d', 8), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[40, 80, 46, 25, 71, 56, 57, 104]
[50, 130, 105, 7, 46, 82, 4, 13]
[77, 81, 80, 17, 65, 28, 61, 18]
[48, 8, 64, 14, 17, 95, 97, 54]
[59, 30, 56, 74, 43, 88, 75, 41]
[60, 75, 21, 23, 13, 64, 64, 30]
[14, 100, 127, 70, 71, 54, 46, 68]
[67, 14, 81, 85, 66, 24, 15, 90]
[90, 53, 44, 22, 92, 20, 54, 81]
[90, 99, 97, 84, 41, 73, 79, 64]
[39, 36, 54, 110, 39, 65, 69, 55]
[24, 18, 9, 75, 3, 30, 75, 53]
[39, 37, 50, 47, 62, 13, 77, 95]
[38, 70, 57, 31, 14, 69, 81, 31]
[81, 74, 71, 13, 36, 85, 28, 56]
[69, 68, 83, 38, 68, 27, 88, 15]
[89, 41, 27, 65, 41, 47, 62, 12]
[90, 65, 126, 34, 78, 42, 108, 107]
[87, 51, 105, 81, 23, 74, 14, 80]
[25, 15, 106, 73, 24, 33, 92, 104]


In [9]:
with open('./test_set/{}_{}agents.pth'.format('den312d', 16), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[90, 35, 18, 9, 93, 15, 75, 39, 30, 68, 54, 92, 66, 30, 40, 73]
[131, 65, 56, 141, 61, 78, 87, 50, 120, 111, 102, 78, 50, 72, 54, 103]
[84, 21, 27, 62, 34, 27, 47, 15, 66, 44, 54, 26, 63, 43, 94, 50]
[46, 78, 72, 41, 43, 87, 78, 112, 15, 74, 119, 94, 76, 62, 73, 45]
[53, 58, 95, 4, 43, 3, 114, 69, 60, 44, 38, 37, 40, 62, 34, 92]
[80, 76, 58, 57, 53, 69, 8, 68, 42, 48, 92, 41, 40, 32, 78, 33]
[6, 77, 82, 76, 30, 34, 72, 23, 57, 76, 64, 89, 20, 63, 6, 38]
[49, 104, 49, 8, 100, 67, 90, 6, 33, 42, 18, 41, 90, 105, 82, 78]
[14, 54, 30, 18, 81, 66, 73, 35, 71, 20, 55, 49, 5, 29, 66, 7]
[142, 31, 50, 102, 85, 55, 64, 83, 71, 81, 31, 59, 86, 83, 114, 51]
[60, 81, 44, 15, 20, 43, 40, 113, 8, 51, 17, 20, 56, 22, 13, 92]
[87, 79, 47, 89, 38, 4, 63, 56, 88, 17, 43, 58, 45, 57, 69, 87]
[100, 85, 31, 48, 54, 103, 89, 35, 65, 66, 68, 33, 50, 50, 67, 45]
[83, 101, 107, 100, 49, 88, 71, 84, 53, 94, 72, 92, 69, 65, 5, 68]
[68, 54, 112, 21, 76, 21, 67, 53, 20, 31, 63, 104, 10, 25, 45, 94]
[82, 92, 40, 10

In [10]:
with open('./test_set/{}_{}agents.pth'.format('den312d', 32), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[142, 73, 120, 47, 121, 87, 17, 95, 78, 80, 120, 83, 82, 29, 63, 41, 121, 80, 89, 49, 36, 43, 84, 15, 32, 29, 34, 55, 63, 32, 16, 58]
[26, 26, 89, 114, 59, 94, 78, 56, 17, 64, 82, 40, 89, 24, 23, 95, 40, 43, 62, 51, 2, 76, 38, 10, 24, 29, 81, 73, 87, 18, 65, 30]
[129, 70, 14, 129, 44, 29, 15, 11, 72, 68, 26, 59, 169, 107, 36, 40, 49, 81, 99, 72, 107, 16, 76, 93, 100, 39, 9, 67, 61, 9, 91, 75]
[54, 87, 105, 101, 16, 55, 90, 87, 113, 82, 61, 30, 3, 60, 70, 57, 85, 3, 20, 84, 33, 12, 120, 52, 15, 78, 58, 50, 37, 29, 52, 6]
[134, 80, 116, 66, 99, 84, 92, 53, 20, 26, 21, 142, 20, 86, 109, 120, 14, 40, 99, 37, 87, 85, 33, 98, 32, 58, 42, 58, 40, 47, 41, 71]
[8, 76, 144, 114, 162, 36, 32, 168, 113, 124, 142, 51, 40, 123, 39, 31, 50, 48, 137, 75, 74, 94, 58, 22, 45, 115, 13, 58, 78, 82, 2, 38]
[105, 32, 122, 62, 107, 124, 61, 6, 150, 37, 80, 13, 137, 127, 79, 48, 83, 84, 88, 106, 83, 49, 95, 107, 18, 36, 88, 44, 18, 46, 59, 73]
[99, 33, 97, 69, 39, 70, 42, 30, 107, 68, 81, 26, 99, 137, 35, 58,

In [11]:
with open('./test_set/{}_{}agents.pth'.format('den312d', 64), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[111, 97, 119, 90, 112, 94, 145, 16, 90, 68, 78, 118, 144, 66, 136, 126, 130, 23, 7, 95, 57, 65, 22, 105, 77, 79, 27, 131, 101, 45, 43, 32, 179, 85, 28, 24, 114, 149, 107, 118, 25, 87, 101, 119, 92, 77, 33, 42, 11, 119, 136, 61, 37, 69, 92, 73, 115, 101, 84, 22, 48, 49, 86, 76]
[64, 172, 133, 127, 163, 12, 85, 9, 184, 147, 20, 131, 149, 140, 31, 146, 151, 26, 55, 80, 151, 110, 51, 72, 195, 153, 26, 139, 44, 66, 181, 116, 111, 30, 19, 162, 115, 111, 47, 31, 40, 24, 77, 41, 28, 99, 157, 133, 19, 115, 33, 104, 62, 76, 34, 20, 107, 81, 91, 12, 98, 87, 21, 115]
[152, 130, 95, 125, 194, 132, 127, 156, 51, 62, 148, 147, 176, 15, 166, 184, 109, 36, 63, 154, 94, 221, 62, 40, 179, 65, 34, 35, 134, 147, 68, 218, 204, 137, 38, 160, 6, 152, 53, 86, 75, 164, 72, 170, 36, 22, 135, 94, 183, 160, 52, 35, 7, 36, 77, 73, 22, 38, 31, 124, 131, 84, 48, 81]
[256, 98, 256, 149, 256, 136, 173, 156, 9, 19, 226, 15, 233, 58, 12, 58, 232, 109, 158, 190, 100, 256, 256, 256, 39, 256, 201, 256, 162, 256, 141, 53, 2

IndexError: list index out of range

In [19]:
with open('./test_set/{}_{}agents.pth'.format('den312d', 64), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[4:20], 256)

[256, 256, 117, 4, 89, 96, 130, 52, 84, 242, 256, 231, 102, 98, 11, 53, 116, 256, 86, 73, 14, 256, 92, 141, 215, 75, 63, 121, 95, 218, 164, 246, 135, 237, 201, 90, 116, 48, 141, 222, 118, 80, 130, 34, 100, 60, 162, 19, 204, 166, 161, 153, 73, 98, 22, 59, 43, 145, 40, 38, 19, 86, 22, 79]
[154, 21, 53, 69, 166, 112, 110, 124, 107, 19, 92, 177, 106, 29, 205, 74, 51, 5, 103, 112, 20, 68, 213, 213, 55, 26, 24, 209, 73, 13, 133, 179, 28, 150, 94, 169, 126, 138, 63, 58, 60, 109, 125, 67, 46, 97, 36, 100, 130, 164, 79, 13, 55, 56, 114, 15, 105, 71, 33, 25, 11, 57, 112, 60]
[97, 55, 119, 89, 231, 111, 57, 195, 125, 122, 72, 126, 169, 68, 134, 76, 35, 211, 131, 93, 107, 44, 185, 47, 71, 35, 34, 190, 56, 169, 59, 128, 12, 125, 56, 16, 83, 154, 63, 135, 77, 41, 19, 57, 113, 52, 81, 139, 29, 52, 101, 55, 140, 134, 51, 90, 43, 31, 37, 48, 61, 62, 97, 76]
[173, 142, 157, 60, 44, 256, 92, 20, 214, 47, 27, 256, 174, 256, 226, 59, 200, 15, 60, 256, 223, 256, 81, 108, 74, 256, 162, 179, 33, 256, 157, 87,

In [20]:
with open('./test_set/{}_{}agents.pth'.format('random64', 4), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[80, 29, 42, 70]
[68, 28, 29, 74]
[10, 60, 57, 32]
[52, 37, 71, 45]
[63, 43, 47, 59]
[15, 57, 34, 28]
[56, 73, 24, 31]
[60, 25, 80, 32]
[100, 66, 51, 9]
[34, 5, 7, 39]
[18, 41, 57, 16]
[35, 78, 57, 15]
[28, 6, 29, 21]
[63, 45, 67, 44]
[33, 86, 31, 29]
[51, 77, 48, 63]
[89, 40, 39, 25]
[21, 44, 59, 64]
[25, 53, 28, 6]
[114, 39, 42, 33]


In [21]:
with open('./test_set/{}_{}agents.pth'.format('random64', 8), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[48, 28, 27, 8, 10, 10, 29, 37]
[28, 79, 65, 70, 33, 53, 23, 97]
[62, 48, 61, 90, 33, 23, 8, 16]
[33, 32, 25, 64, 81, 64, 65, 51]
[8, 52, 81, 35, 56, 36, 39, 30]
[61, 58, 29, 64, 88, 38, 69, 39]
[33, 22, 62, 54, 30, 49, 48, 60]
[77, 28, 51, 25, 81, 40, 19, 29]
[15, 37, 59, 62, 30, 24, 36, 30]
[64, 40, 17, 44, 24, 49, 47, 57]
[26, 39, 40, 20, 11, 41, 54, 55]
[61, 62, 44, 18, 14, 42, 71, 48]
[69, 33, 4, 8, 15, 52, 53, 81]
[38, 47, 34, 9, 41, 54, 64, 89]
[6, 47, 51, 8, 31, 31, 35, 27]
[33, 11, 97, 16, 12, 46, 13, 49]
[9, 59, 45, 35, 38, 79, 43, 12]
[41, 43, 23, 32, 15, 21, 28, 60]
[79, 36, 67, 54, 73, 28, 10, 36]
[24, 78, 57, 53, 26, 22, 71, 58]


In [22]:
with open('./test_set/{}_{}agents.pth'.format('random64', 16), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[18, 53, 85, 33, 29, 33, 14, 50, 3, 46, 45, 27, 32, 15, 49, 34]
[71, 53, 102, 40, 78, 46, 74, 56, 50, 33, 69, 16, 51, 56, 79, 40]
[15, 50, 22, 62, 58, 22, 77, 91, 67, 46, 38, 77, 63, 54, 40, 44]
[37, 8, 33, 43, 58, 6, 61, 51, 15, 24, 46, 36, 24, 53, 51, 23]
[37, 3, 31, 74, 62, 6, 79, 49, 85, 63, 46, 41, 63, 23, 56, 29]
[68, 42, 93, 52, 23, 65, 59, 24, 59, 113, 42, 53, 51, 59, 48, 12]
[64, 63, 67, 38, 20, 79, 48, 16, 10, 31, 46, 42, 4, 38, 57, 64]
[29, 36, 34, 8, 47, 56, 66, 13, 74, 52, 33, 53, 68, 19, 30, 39]
[20, 63, 66, 78, 14, 23, 89, 14, 58, 8, 13, 53, 25, 19, 48, 19]
[20, 13, 51, 16, 68, 27, 50, 50, 38, 25, 48, 44, 47, 40, 51, 27]
[38, 30, 76, 31, 69, 58, 8, 53, 45, 47, 35, 29, 61, 50, 34, 53]
[43, 55, 48, 21, 35, 48, 11, 86, 41, 9, 23, 7, 22, 31, 29, 60]
[49, 20, 80, 25, 15, 35, 60, 76, 35, 35, 20, 18, 23, 64, 80, 23]
[72, 39, 37, 26, 36, 59, 39, 77, 44, 21, 67, 32, 55, 45, 37, 54]
[110, 36, 71, 36, 51, 20, 20, 23, 105, 71, 10, 55, 99, 9, 60, 59]
[43, 40, 23, 11, 55, 28, 53, 44, 

In [23]:
with open('./test_set/{}_{}agents.pth'.format('random64', 32), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[55, 55, 16, 35, 39, 65, 71, 9, 34, 68, 66, 49, 65, 13, 89, 66, 34, 53, 43, 31, 24, 34, 61, 56, 70, 15, 36, 50, 44, 57, 37, 38]
[64, 39, 44, 49, 64, 17, 32, 37, 66, 58, 47, 58, 12, 19, 9, 36, 54, 56, 38, 35, 34, 68, 47, 16, 28, 52, 63, 21, 12, 23, 84, 65]
[70, 40, 8, 116, 75, 55, 53, 56, 51, 30, 54, 23, 48, 36, 81, 73, 42, 47, 57, 38, 47, 84, 63, 15, 18, 51, 9, 3, 5, 22, 57, 79]
[37, 58, 78, 24, 27, 34, 15, 32, 42, 47, 22, 28, 27, 41, 79, 53, 13, 75, 26, 39, 48, 34, 16, 61, 62, 65, 59, 68, 101, 81, 40, 39]
[60, 8, 39, 75, 22, 25, 35, 51, 5, 31, 54, 28, 73, 16, 61, 58, 73, 54, 46, 37, 55, 17, 19, 77, 69, 47, 50, 39, 70, 35, 78, 53]
[20, 74, 51, 79, 43, 55, 47, 51, 9, 35, 47, 17, 48, 59, 20, 57, 8, 62, 37, 25, 58, 40, 55, 64, 49, 14, 37, 46, 30, 23, 49, 2]
[56, 47, 50, 39, 55, 45, 45, 18, 76, 26, 24, 47, 29, 64, 37, 32, 50, 32, 30, 68, 37, 51, 41, 57, 41, 43, 38, 9, 42, 16, 58, 26]
[101, 54, 35, 27, 68, 27, 31, 40, 16, 42, 12, 50, 41, 57, 50, 56, 65, 50, 28, 24, 81, 45, 39, 32, 59, 58, 5

In [24]:
with open('./test_set/{}_{}agents.pth'.format('random64', 64), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[33, 62, 59, 45, 50, 42, 35, 41, 76, 54, 72, 42, 17, 48, 15, 35, 61, 66, 30, 49, 40, 38, 48, 35, 34, 24, 60, 49, 43, 81, 74, 46, 29, 21, 69, 47, 28, 51, 40, 51, 76, 74, 48, 46, 49, 15, 37, 11, 96, 23, 44, 25, 55, 69, 12, 69, 10, 39, 79, 34, 17, 70, 62, 47]
[9, 24, 22, 72, 40, 42, 35, 7, 4, 48, 67, 36, 26, 68, 47, 37, 94, 26, 33, 51, 32, 35, 66, 54, 62, 14, 61, 37, 14, 71, 29, 39, 22, 59, 44, 56, 78, 60, 48, 83, 38, 50, 49, 84, 28, 62, 30, 11, 56, 2, 18, 55, 11, 41, 73, 20, 34, 32, 54, 82, 18, 20, 61, 46]
[62, 49, 38, 61, 73, 47, 66, 37, 52, 51, 65, 89, 69, 23, 13, 41, 39, 39, 30, 69, 15, 96, 10, 43, 40, 53, 60, 4, 45, 115, 87, 37, 19, 25, 35, 31, 37, 60, 48, 23, 27, 15, 33, 60, 89, 36, 48, 26, 13, 41, 24, 56, 33, 54, 87, 30, 65, 31, 16, 18, 38, 56, 14, 25]
[31, 5, 19, 59, 58, 57, 49, 23, 63, 53, 256, 70, 44, 70, 31, 37, 73, 117, 53, 235, 23, 2, 40, 41, 43, 28, 38, 256, 18, 14, 40, 99, 31, 11, 6, 43, 59, 38, 53, 47, 85, 43, 67, 60, 42, 45, 20, 51, 44, 23, 43, 72, 24, 256, 254, 50, 26, 6

In [25]:
with open('./test_set/{}_{}agents.pth'.format('random32', 4), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[14, 41, 20, 20]
[37, 17, 12, 24]
[31, 27, 7, 11]
[23, 28, 22, 39]
[5, 41, 29, 16]
[40, 26, 16, 25]
[32, 23, 27, 49]
[9, 16, 14, 7]
[24, 20, 12, 15]
[24, 15, 11, 17]
[15, 8, 31, 16]
[38, 21, 36, 24]
[9, 33, 30, 21]
[34, 16, 26, 34]
[5, 13, 7, 28]
[17, 29, 34, 19]
[20, 24, 31, 14]
[18, 33, 30, 19]
[16, 50, 33, 20]
[23, 31, 20, 19]


In [26]:
with open('./test_set/{}_{}agents.pth'.format('random32', 8), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[19, 17, 22, 35, 26, 4, 22, 10]
[33, 17, 11, 6, 33, 31, 29, 19]
[21, 33, 18, 7, 23, 30, 26, 8]
[35, 44, 32, 19, 18, 17, 6, 23]
[10, 7, 21, 17, 14, 20, 7, 45]
[40, 24, 46, 30, 38, 23, 22, 32]
[28, 12, 22, 51, 14, 24, 18, 11]
[16, 21, 25, 27, 43, 8, 19, 25]
[5, 24, 19, 8, 13, 11, 32, 20]
[34, 18, 41, 28, 13, 35, 14, 11]
[23, 3, 17, 27, 18, 37, 35, 23]
[32, 23, 35, 26, 21, 18, 42, 2]
[30, 14, 14, 24, 22, 12, 17, 22]
[18, 35, 36, 39, 26, 30, 25, 42]
[35, 17, 6, 41, 50, 17, 31, 13]
[23, 15, 22, 7, 54, 7, 44, 18]
[16, 7, 5, 10, 35, 24, 30, 5]
[14, 16, 2, 21, 37, 12, 28, 11]
[15, 32, 18, 36, 26, 26, 1, 30]
[21, 37, 6, 28, 26, 23, 17, 9]


In [27]:
with open('./test_set/{}_{}agents.pth'.format('random32', 16), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[8, 26, 20, 10, 37, 27, 38, 24, 11, 23, 25, 43, 25, 32, 15, 11]
[30, 7, 19, 26, 18, 27, 18, 24, 17, 37, 20, 28, 2, 29, 12, 3]
[16, 31, 12, 40, 26, 53, 29, 39, 10, 13, 17, 19, 45, 29, 24, 7]
[19, 12, 42, 25, 18, 25, 7, 22, 32, 32, 5, 21, 29, 29, 50, 18]
[9, 8, 20, 17, 26, 45, 15, 29, 11, 17, 12, 17, 8, 28, 27, 13]
[26, 14, 9, 9, 15, 18, 17, 36, 20, 51, 22, 15, 19, 48, 14, 35]
[28, 12, 51, 33, 13, 43, 2, 14, 33, 41, 14, 21, 23, 31, 15, 17]
[47, 8, 30, 32, 23, 42, 39, 23, 47, 32, 24, 44, 18, 41, 38, 31]
[31, 38, 38, 9, 21, 20, 26, 21, 39, 6, 28, 23, 35, 19, 9, 10]
[12, 24, 35, 35, 14, 30, 14, 14, 18, 33, 56, 26, 5, 20, 24, 29]
[58, 17, 17, 49, 31, 5, 33, 27, 49, 43, 35, 6, 33, 5, 17, 10]
[7, 33, 32, 23, 45, 9, 12, 19, 17, 17, 26, 7, 17, 14, 17, 39]
[37, 14, 15, 30, 19, 19, 26, 17, 8, 43, 13, 37, 34, 26, 12, 40]
[31, 39, 44, 31, 17, 4, 11, 28, 31, 28, 6, 19, 32, 6, 23, 19]
[11, 18, 23, 38, 20, 54, 15, 33, 23, 34, 24, 11, 23, 56, 25, 29]
[12, 18, 15, 11, 15, 33, 9, 19, 23, 41, 28, 8, 19, 33

In [28]:
with open('./test_set/{}_{}agents.pth'.format('random32', 32), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[10, 39, 34, 43, 44, 23, 17, 8, 18, 57, 33, 1, 35, 4, 32, 69, 18, 24, 16, 21, 19, 14, 27, 23, 72, 8, 38, 12, 20, 13, 23, 16]
[25, 15, 26, 21, 20, 28, 11, 29, 11, 21, 40, 39, 8, 81, 32, 40, 26, 25, 59, 20, 11, 51, 18, 38, 12, 19, 11, 19, 52, 8, 34, 12]
[36, 33, 33, 30, 17, 93, 61, 3, 13, 25, 26, 23, 21, 22, 34, 24, 7, 5, 41, 23, 9, 6, 27, 24, 39, 27, 23, 12, 24, 13, 12, 16]
[52, 41, 51, 38, 17, 45, 42, 27, 24, 59, 47, 28, 5, 48, 50, 28, 31, 13, 22, 6, 39, 14, 15, 46, 18, 29, 30, 25, 23, 31, 9, 40]
[8, 54, 25, 20, 4, 19, 26, 27, 14, 27, 30, 26, 18, 34, 26, 20, 16, 29, 14, 17, 23, 10, 13, 13, 11, 38, 26, 6, 12, 25, 10, 21]
[23, 28, 12, 42, 37, 32, 40, 34, 9, 20, 21, 38, 8, 14, 29, 7, 10, 30, 4, 49, 1, 16, 19, 27, 4, 28, 15, 27, 11, 25, 29, 12]
[16, 21, 26, 12, 18, 10, 45, 136, 38, 25, 9, 139, 16, 256, 15, 16, 13, 13, 36, 41, 20, 37, 37, 24, 18, 21, 25, 2, 3, 41, 19, 37]
[33, 37, 40, 20, 37, 46, 40, 30, 41, 11, 28, 21, 25, 35, 36, 33, 31, 45, 34, 39, 20, 14, 32, 6, 18, 14, 22, 34, 19, 23, 

In [29]:
with open('./test_set/{}_{}agents.pth'.format('random32', 64), 'rb') as f:
    tests = pickle.load(f)

all_step_lists = simulate_all_tests(tests[0:20], 256)

[103, 61, 18, 73, 21, 143, 87, 70, 33, 146, 91, 27, 78, 82, 53, 19, 112, 52, 202, 195, 42, 78, 167, 97, 65, 11, 33, 31, 76, 95, 86, 9, 9, 96, 71, 115, 62, 100, 89, 72, 86, 14, 76, 56, 46, 91, 29, 38, 22, 59, 12, 90, 37, 80, 23, 25, 66, 42, 20, 82, 32, 11, 14, 21]
[93, 18, 111, 116, 155, 18, 23, 59, 34, 55, 70, 15, 35, 59, 43, 74, 40, 42, 163, 82, 24, 54, 31, 38, 19, 60, 77, 9, 19, 11, 18, 46, 40, 104, 42, 91, 74, 48, 34, 68, 69, 17, 71, 60, 41, 14, 76, 19, 34, 41, 23, 18, 36, 51, 104, 60, 121, 61, 34, 37, 38, 9, 14, 10]
[256, 27, 152, 46, 62, 256, 256, 15, 256, 49, 256, 20, 40, 70, 24, 256, 251, 256, 256, 19, 256, 256, 92, 256, 256, 89, 256, 256, 15, 99, 72, 19, 256, 15, 256, 43, 256, 77, 28, 256, 256, 12, 12, 256, 20, 52, 255, 116, 255, 241, 256, 96, 95, 92, 13, 46, 22, 9, 48, 12, 256, 44, 11, 6]
[26, 60, 162, 150, 73, 120, 92, 114, 77, 46, 86, 100, 96, 23, 52, 70, 81, 66, 40, 79, 82, 59, 18, 123, 53, 38, 19, 6, 47, 53, 54, 32, 54, 54, 91, 79, 55, 76, 53, 50, 87, 30, 48, 31, 38, 47, 2