In [1]:
import numpy as np
from torch.utils.tensorboard.writer import SummaryWriter
import concurrent.futures
from torch import optim
import torch
import os
import copy


%load_ext autoreload
%autoreload 2
import env
import network
import player


BOARD_XSIZE = env.BOARD_XSIZE
BOARD_YSIZE = env.BOARD_YSIZE

DIMS=(BOARD_XSIZE,BOARD_YSIZE)


EPISODES_PER_AGENT = 100
TRAIN_EPOCHS = 500000
MODEL_SAVE_INTERVAL = 100
MAKE_OPPONENT_INTERVAL = 1000
SUMMARY_STATS_INTERVAL = 10
RANDOM_SEED = 42

SUMMARY_DIR = './summary'
MODEL_DIR = './models'

# create result directory
if not os.path.exists(SUMMARY_DIR):
    os.makedirs(SUMMARY_DIR)

use_cuda = torch.cuda.is_available()
torch.manual_seed(RANDOM_SEED)

cuda = torch.device("cuda")
cpu = torch.device("cpu")

if use_cuda:
    device = cuda
else:
    device = cpu

In [2]:
# TODO: restore neural net parameters

impostor_actor = network.Actor().to(device)
impostor_critic = network.Critic().to(device)
impostor_actor_optimizer = optim.Adam(impostor_actor.parameters(), lr=network.ACTOR_LR)
impostor_critic_optimizer = optim.Adam(impostor_critic.parameters(), lr=network.CRITIC_LR)

crewmate_actor = network.Actor().to(device)
crewmate_critic = network.Critic().to(device)
crewmate_actor_optimizer = optim.Adam(crewmate_actor.parameters(), lr=network.ACTOR_LR)
crewmate_critic_optimizer = optim.Adam(crewmate_critic.parameters(), lr=network.CRITIC_LR)

# Get Writer
writer = SummaryWriter(log_dir=SUMMARY_DIR)

step=0

2023-03-16 00:56:29.607022: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-16 00:56:29.935301: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-03-16 00:56:30.881474: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2023-03-16 00:56:30.881754: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or 

In [3]:
entropy_buf:list[float] = []




AttributeError: module 'env' has no attribute 'PLAYER2'

In [None]:

def random_valid_location() -> tuple[int, int]:
    x = np.random.randint(0, BOARD_XSIZE)
    y = np.random.randint(0, BOARD_YSIZE)
    return (x, y)


def play(actor: player.ActorPlayer, actor_is_impostor: bool, others: list[player.Player]) -> tuple[
    list[env.Observation],
    list[env.Action],
    list[np.ndarray],
    list[env.Reward],
    list[env.Advantage],
    list[env.Reward],
]:
    e = env.Env()

    # create the players at random locations on the board.
    e.state.players = [env.PlayerState(
        random_valid_location(), actor_is_impostor, False)]
    e.state.players += [env.PlayerState(random_valid_location(), False, True)
                        for _ in others]
    # If the actor is not an impostor, then the impostor is randomly chosen from the others.
    if not actor_is_impostor:
        e.state.players[np.random.randint(
            1, len(e.state.players))].impostor = True

    players = [actor] + others

    s_t: list[env.Observation] = []
    a_t: list[env.Action] = []
    p_t: list[np.ndarray] = []
    r_t: list[env.Reward] = []
    # play the game
    while not e.game_over():
        for playerid, player in enumerate(players):
            if player == actor:
                obs, action_probs, chosen_action, reward = actor.play(env.Player(playerid), e)
                s_t += [obs]
                p_t += [action_probs]
                a_t += [chosen_action]
                r_t += [reward]
            else:
                player.play(env.Player(playerid), e)

    # compute advantage and value
    d_t = network.compute_advantage(actor.critic, s_t, r_t)
    v_t = network.compute_value(r_t)

    return s_t, a_t, p_t, r_t, d_t, v_t
