In [18]:
import os
import random
import pickle
from typing import Tuple, Union
import warnings
warnings.simplefilter("ignore", UserWarning)
from tqdm import tqdm
import numpy as np
import torch
import torch.multiprocessing as mp
from environment import Environment
from model import Network
import config
import copy
import ollama
from openai import OpenAI

os.envirion["OPENAI_API_KEY"] = ""
client = OpenAI()

In [None]:
# 방향 정의
directiondict = {
    'stay': 4, 'north': 0, 'south': 1, 'west': 2, 'east': 3
}

# 프롬프트
class gpt4pathfinding:
    def navigate(self, east, west, north, south, heuristic_direction):
        response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a veteran adventurer with a great sense of direction. You understand which direction you need to move to get from where you are to where you want to go."},
            {"role": "user", "content":
                f"""
                You can only move east, west, south and north or stay.
                So, your answer will be {{east}}, {{west}}, {{south}}, {{north}}, or {{stay}}.
                You can only make one move at a time.
                {east}
                {west}
                {south}
                {north}
                Your heuristic map says going {heuristic_direction} will shorten the distance to the destination without any possibility of dead ends.
                Which direction do you move next?
                Answer in one word only: {{DIRECTION or ACTION}}"""
            }],
        )
        return response.choices[0].message.content
    
    def give_way(self, east, west, north, south, direction):
        response = client.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a veteran adventurer with a great sense of direction. You understand which direction you need to move to get from where you are to where you want to go."},
            {"role": "user", "content":
                f"""
                You MUST avoid collisions with prior path finder.
                You can only move east, west, south and north or stay.
                So, your answer will be {{east}}, {{west}}, {{south}}, {{north}}, or {{stay}}.
                You can only make one move at a time.
                {east}
                {west}
                {south}
                {north}
                To the {direction} of you is prior path finder.
                Which direction do you move next?
                I highly recommend going to the 90-degree turn rather than the other side of the pathfinder, but you can ignore this instruction depending on your situation.
                
                Think step by step.

                Answer in one word only: {{direction or action}}"""
            }],
        )
        return response.choices[0].message.content
    
pathfinder = gpt4pathfinding()

In [19]:
torch.manual_seed(config.test_seed)
np.random.seed(config.test_seed)
random.seed(config.test_seed)
DEVICE = torch.device('cpu')
torch.set_num_threads(1)

In [20]:
def create_test(test_env_settings: Tuple = config.test_env_settings, num_test_cases: int = config.num_test_cases):
    '''
    create test set
    '''

    for map_length, num_agents, density in test_env_settings:

        name = f'./test_set/{map_length}length_{num_agents}agents_{density}density.pth'
        print(f'-----{map_length}length {num_agents}agents {density}density-----')

        tests = []

        env = Environment(fix_density=density, num_agents=num_agents, map_length=map_length)

        for _ in tqdm(range(num_test_cases)):
            tests.append((np.copy(env.map), np.copy(env.agents_pos), np.copy(env.goals_pos)))
            env.reset(num_agents=num_agents, map_length=map_length)
        print()

        with open(name, 'wb') as f:
            pickle.dump(tests, f)

In [21]:
def code_test():
    env = Environment()
    network = Network()
    network.eval()
    obs, last_act, pos = env.observe()
    network.step(torch.as_tensor(obs.astype(np.float32)).to(DEVICE), 
                                                    torch.as_tensor(last_act.astype(np.float32)).to(DEVICE), 
                                                    torch.as_tensor(pos.astype(int)))

In [22]:
def test_model(model_range: Union[int, tuple], test_set=config.test_env_settings):
    '''
    test model in 'saved_models' folder
    '''
    network = Network()
    network.eval()
    network.to(DEVICE)

    pool = mp.Pool(mp.cpu_count()//2)

    if isinstance(model_range, int):
        state_dict = torch.load(os.path.join(config.save_path, f'{model_range}.pth'), map_location=DEVICE)
        network.load_state_dict(state_dict)
        network.eval()
        network.share_memory()

        
        print(f'----------test model {model_range}----------')

        instance_id = 0

        for case in test_set:
            print(f"test set: {case[0]} env {case[1]} agents")
            with open('./test_set/{}_{}agents.pth'.format(case[0], case[1]), 'rb') as f:
                tests = pickle.load(f)

            test = tests[0]
            ret = test_one_case((test, network, instance_id))

            success, steps, num_comm = ret

            # instance_id_base = instance_id
            # tests = [(test, network, instance_id_base + i) for i, test in enumerate(tests)]
            # ret = pool.map(test_one_case, tests)

            # success, steps, num_comm = zip(*ret)

            # print("success rate: {:.2f}%".format(sum(success)/len(success)*100))
            # print("average step: {}".format(sum(steps)/len(steps)))
            # print("communication times: {}".format(sum(num_comm)/len(num_comm)))
            # print()

            instance_id += len(tests)

    elif isinstance(model_range, tuple):

        for model_name in range(model_range[0], model_range[1]+1, config.save_interval):
            state_dict = torch.load(os.path.join(config.save_path, f'{model_name}.pth'), map_location=DEVICE)
            network.load_state_dict(state_dict)
            network.eval()
            network.share_memory()


            print(f'----------test model {model_name}----------')

            instance_id = 0

            for case in test_set:
                print(f"test set: {case[0]} length {case[1]} agents {case[2]} density")
                with open(f'./test_set/{case[0]}length_{case[1]}agents_{case[2]}density.pth', 'rb') as f:
                    tests = pickle.load(f)

                test = tests[0]
                ret = test_one_case((test, network, instance_id))

                success, steps, num_comm = ret

                # instance_id_base = instance_id
                # tests = [(test, network, instance_id_base + i) for i, test in enumerate(tests)]
                # ret = pool.map(test_one_case, tests)

                # success, steps, num_comm = zip(*ret)

                # print("success rate: {:.2f}%".format(sum(success)/len(success)*100))
                # print("average step: {}".format(sum(steps)/len(steps)))
                # print("communication times: {}".format(sum(num_comm)/len(num_comm)))
                # print()

                instance_id += 1

            print('\n')

In [40]:
def test_one_case(args):

    env_set, network, instance_id = args

    env = Environment()
    env.load(np.array(env_set[0]), np.array(env_set[1]), np.array(env_set[2]))
    obs, last_act, pos = env.observe()
    
    done = False
    network.reset()

    num_agents = len(env_set[1])

    step = 0
    num_comm = 0
    condition_met = False

    while not done and env.steps < config.max_episode_length // 2:
        actions, _, _, _, comm_mask = network.step(torch.as_tensor(obs.astype(np.float32)).to(DEVICE), 
                                                    torch.as_tensor(last_act.astype(np.float32)).to(DEVICE), 
                                                    torch.as_tensor(pos.astype(int)))
        (obs, last_act, pos), _, done, _ = env.step(actions)
        env.save_frame(step, instance_id)
        step += 1
        num_comm += np.sum(comm_mask)

    while not done and env.steps < config.max_episode_length:
        if (env.steps - (config.max_episode_length // 2)) % 30 == 0:
            # 아직 도착하지 않은 에이전트들 감별
            not_arrived = []
            for i in range(num_agents):
                if not np.array_equal(env.agents_pos[i], env.goals_pos[i]):
                    not_arrived.append(i)

            env_copy = copy.deepcopy(env)
            plan = []
            sim_obs, sim_last_act, sim_pos = env_copy.observe()

            for _ in range(30):
                if env_copy.steps >= config.max_episode_length:
                    break
                actions, _, _, _, comm_mask = network.step(torch.as_tensor(sim_obs.astype(np.float32)).to(DEVICE), 
                                                            torch.as_tensor(sim_last_act.astype(np.float32)).to(DEVICE), 
                                                            torch.as_tensor(sim_pos.astype(int)))
                plan.append((actions, comm_mask, copy.deepcopy(sim_pos)))
                (sim_obs, sim_last_act, sim_pos), _, sim_done, _ = env_copy.step(actions)

            # 에이전트들의 planned_steps를 저장할 딕셔너리 초기화
            planned_steps_dict = {i: [] for i in not_arrived}
            # 각 에이전트에 대해 계획된 스텝 저장
            for i in plan:
                actions, _, positions = i  # comm_mask는 사용하지 않음
                for agent_idx in not_arrived:
                    position = positions[agent_idx]
                    # 목표 위치와 현재 위치를 비교하여 도달 여부 판단
                    arrived_status = "arrived" if np.array_equal(position, env.goals_pos[agent_idx]) else "not arrived"
                    planned_steps_dict[agent_idx].append(f"Action: {actions[agent_idx]}, Position: {position}, Status: {arrived_status}")

            # 결과를 n: [], m: [], o: [] 형식으로 출력
            for agent_idx in planned_steps_dict:
                print(f"{agent_idx}: {planned_steps_dict[agent_idx]}")

            # 이 부분에 openai 입출력, 입력은 텍스트, 출력은 전체에서 데드락/라이브락 유무, 그리고 데드락과 라이브락에 해당하는 에이전트 번호

            # 데드락 판별로 대체
            if len(plan) > 0:
                condition_met = True
            
            # 데드락이 아닐 경우
            if condition_met:
                for actions, comm_mask, _ in plan:
                    if env.steps >= config.max_episode_length:
                        break
                    (obs, last_act, pos), _, done, _ = env.step(actions)
                    env.save_frame(step, instance_id)
                    step += 1
                    num_comm += np.sum(comm_mask)

                condition_met = False
            # 데드락일 경우
            else:
                # openai 출력에서 얻은 에이전트 번호 중, 맨해튼 거리가 가장 먼 에이전트를 super로 지정
                while not np.array_equal(position, env.goals_pos[agent_idx]):
                    # super를 heuristic guide에 따라 이동
                    manual_actions = [0 for _ in range(num_agents)]
                    directions = []
                    if obs[agent_super][2][4, 4] == 1:
                        directions.append('north')
                    if obs[agent_super][3][4, 4] == 1:
                        directions.append('south')
                    if obs[agent_super][4][4, 4] == 1:
                        directions.append('west')
                    if obs[agent_super][5][4, 4] == 1:
                        directions.append('east')
                    heuristic_direction = directions
                    direction = random.choice(heuristic_direction)
                    manual_actions[agent_super] = directiondict[direction]
                    # 상관없는 에이전트들은 그대로 멈춤
                    # 마주친 에이전트들은 피해야 함 openai 입출력
                    manual_actions[near_agent] = openai 출력
                    (obs, last_act, pos), _, done, _ = env.step(actions)
                    env.save_frame(step, instance_id)
                    step += 1
                    num_comm += np.sum(comm_mask)
                # 이제 super가 도착하면 super가 마주쳤던 에이전트들을 ml 정책을 써서 돌아가게 하고 종결
                    






    return np.array_equal(env.agents_pos, env.goals_pos), step, num_comm

In [41]:
test_model(128000)

  state_dict = torch.load(os.path.join(config.save_path, f'{model_range}.pth'), map_location=DEVICE)


----------test model 128000----------
test set: warehouse env 64 agents
24: ['Action: 3, Position: [  4 116], Status: not arrived', 'Action: 3, Position: [  4 117], Status: not arrived', 'Action: 3, Position: [  4 118], Status: not arrived', 'Action: 3, Position: [  4 119], Status: not arrived', 'Action: 3, Position: [  4 120], Status: not arrived', 'Action: 3, Position: [  4 121], Status: not arrived', 'Action: 3, Position: [  4 122], Status: not arrived', 'Action: 3, Position: [  4 123], Status: not arrived', 'Action: 3, Position: [  4 124], Status: not arrived', 'Action: 3, Position: [  4 125], Status: not arrived', 'Action: 3, Position: [  4 126], Status: not arrived', 'Action: 3, Position: [  4 127], Status: not arrived', 'Action: 3, Position: [  4 128], Status: not arrived', 'Action: 3, Position: [  4 129], Status: not arrived', 'Action: 3, Position: [  4 130], Status: not arrived', 'Action: 3, Position: [  4 131], Status: not arrived', 'Action: 3, Position: [  4 132], Status: no