In [1]:
#%reload_ext dotenv
#%dotenv

# ML libraries
import torch
import torch.nn as nn

# Local imports
from env import Env
from agents.random_agent import RandomAgent
from agents.minimax_agent import MinimaxAgent
from agents.minimax_agent_old import OldMinimaxAgent
# from agents.deep_q_agent import DeepQAgent
from agents.deep_q_agent_modified import DeepQAgent
from agents.cql_agent import CQLAgent
import utils
from trainer import Trainer

In [2]:
##################
# INITIALIZATION #
##################

# Fix random seed
utils.seed_everything(42, deterministic=False)

# Use GPU if available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Define player and opponent IDs
AGENT = 1
OPPONENT = 2

# Define game environment, this should be passed to the agents and the trainer
env = Env()

# Define agent and opponent
state_shape = env.get_state(state_type='boolean', player=AGENT).shape
# agent = CQLAgent(env=env, state_size=state_shape, action_size=7, network_type='CNN')
# agent.load_model('./saved_models/CQLAgent_CNN_25783.pt')
# agent = CQLAgent(env=env, state_size=42, action_size=7, network_type='DDQN')
agent = DeepQAgent(env=env, epsilon_max=1, epsilon_min=0.1, epsilon_decay=0.9999, device=device)
opponent = MinimaxAgent(env=env, depth=3, epsilon=0.5, player=OPPONENT)
replacement_agent = MinimaxAgent(env=env, depth=3, epsilon=0.5, player=OPPONENT)

# Define options for training
options = {
           'UPDATE_OPPONENT': True,                         # Whether to enable self-play or not
           'OPPONENT_UPDATE_FREQUENCY': 100,                # After how many episodes the opponent will be replaced by the current agent
           'BOOTSTRAP_EPISODES': 0000,                      # During this time, the agent will not be replaced by itself
           'DECAY_RANDOMNESS_OPPONENT': False,              # Decay randomness of the opponent. Use only if the opponent acts with some randomness
           'DECAY_RANDOMNESS_FREQUENCY': 1000,              # Frequency of randomness decay
           'REPLACE_FOR_EVALUATION': True,                  # Whether to replace the training model at the end with another evaluation model
           'REPLACE_FOR_EVALUATION_BY': replacement_agent,  # Evalutation model to replace training model by
           'AUTOSAVE': True,                                # Whether to save the model at certain intervals
           'AUTOSAVE_TYPE': 'NUM_EPISODES',                 # One of ["NUM_OPTIMIZATIONS", "NUM_EPISODES"]
           'AUTOSAVE_PERIOD': 1000,                         # After how many _ to save the model
           }

# Initialize trainer
trainer = Trainer(env=env, agent=agent, opponent=opponent, agent_id=AGENT, opponent_id=OPPONENT, num_episodes={'TRAIN': 5000, 'EVAL': 100}, device=device, verbose=True, options=options)

In [3]:
############
# TRAINING #
############

# Train agent
trainer.train()

TRAIN: Running episode 500 of 5000. Current win ratio of AGENT is 8.20%.

AssertionError: 

In [None]:
####################
# QUICK EVALUATION #
####################

# Fix random seed
utils.seed_everything(42, deterministic=False)

# Define player and opponent IDs
AGENT = 1
OPPONENT = 2

# Fix random seed
utils.seed_everything(42, deterministic=False)

eval_env = Env()
# Load agent from save and set to eval mode
state_shape = env.get_state(state_type='boolean', player=AGENT).shape
agent = CQLAgent(env=eval_env, state_size=state_shape, action_size=7, network_type='CNN')
agent.load_model('./saved_models/CQLAgent_CNN_60936.pt')

# Define opponent
minmax_opponent = MinimaxAgent(env=eval_env, depth=3, epsilon=0.5, player=OPPONENT)

# Initialize trainer
trainer = Trainer(env=eval_env, agent=agent, opponent=opponent, agent_id=AGENT, opponent_id=OPPONENT, num_episodes={'TRAIN': 10000, 'EVAL': 100}, device=device, verbose=True, options=options)
# Run evaluation
trainer.eval(agent=agent, opponent=minmax_opponent, episodes=100, agent_start=None, print_last_n_games=0)

EVAL: Ran episode 100 of 100. Ratios are [WINS: 33.00% | LOSSES: 67.00% | TIES: 0]                                                
EVAL: Average turns per episode 10.58
EVAL: Average invalid moves per episode 0.0


