# Notebook for statistical analysis of model training and performance eval
## Imports

In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from tqdm import tqdm
from datetime import datetime, timedelta

from agents import MinimaxAgent, RandomAgent, AvoidNextLossAgent, ModelAgent, NeuroevolutionAgent
from environment import ConnectFourEnv
from evaluation_func import EvaluationFunction

## Training performance evaluation
The goal is to visualize the training performance of each model in a graph regarding loss and average return since those metrics were collected.
### Preparing the dataframe

In [None]:
#creating the initial dictionary by grid and loading the train data
dfs = {}
train_data = pd.read_csv('./training_data.csv',index_col=None)

#splitting the training data by model training data
dfs['6by7'] = train_data[train_data['PATH'] == './weights/d6by7.h5']
dfs['8by9'] = train_data[train_data['PATH'] == './weights/d8by9.h5']
dfs['10by11'] = train_data[train_data['PATH'] == './weights/d10by11.h5']
dfs['12by13'] = train_data[train_data['PATH'] == './weights/d12by13.h5']

#making the epoch the index for each dataframe
dfs['6by7'].set_index('EPOCH')
dfs['8by9'].set_index('EPOCH')
dfs['10by11'].set_index('EPOCH')
dfs['12by13'].set_index('EPOCH')

dfs['6by7'].head()

Unnamed: 0,NAME,MODEL,DATE,EPOCH,EPSILON,AVERAGE LOSS,AVERAGE RETURN,PATH


### Visualization functions
We write a basic function for visualizing training process in matplotlib and save the figure at a desired path.

In [None]:
def visualize_progress(df, name : str, col_val : str, path : str, show=False):
    '''
    Function to show and also save a graph containing the reward over all episodes.
    :param epoch_returns (list): A list of acerage returns for n epochs (defined as 1000 training steps).
    '''

    x = np.arange(0, len())
    y = df[col_val].numpy()

    plt.style.use('dark_background')

    fig, ax = plt.subplots()
    ax.plot(x,y)

    ax.set_title(f'Training progress for model {name}')
    ax.set_ylabel(f' {col_val} per epoch')
    ax.set_xlabel('Epochs')

    fig.savefig(path)
    
    if show:
        plt.show()

We then run the function for each model we trained:

In [None]:
visualize_progress(dfs['6by7'], 'DQN-6x7', 'AVERAGE LOSS', './figures/6by7avgloss.png')
visualize_progress(dfs['6by7'], 'DQN-6x7', 'AVERAGE RETURN', './figures/6by7avgret.png')

visualize_progress(dfs['8by9'], 'DQN-8x9', 'AVERAGE LOSS', './figures/8by9avgloss.png')
visualize_progress(dfs['8by9'], 'DQN-8x9', 'AVERAGE RETURN', './figures/8by9avgret.png')

visualize_progress(dfs['10by11'], 'DQN-10x11', 'AVERAGE LOSS', './figures/10by11avgloss.png')
visualize_progress(dfs['10by11'], 'DQN-10x11', 'AVERAGE RETURN', './figures/10by11avgret.png')

visualize_progress(dfs['12by13'], 'DQN-12x13', 'AVERAGE LOSS', './figures/12by13avgloss.png')
visualize_progress(dfs['12by13'], 'DQN-12x13', 'AVERAGE RETURN', './figures/12by13avgret.png')

TypeError: len() takes exactly one argument (0 given)

## Agent performance evaluation
Our goal is to evaluate the performance of each model against each other per grid size of the Environment.
If there is a significant advantage of one model over another based on statistics it can be called better.

### Agent initialization
We firstly initialize every agent we have.

In [None]:
minimax = MinimaxAgent()
random = RandomAgent()
lossavoid = AvoidNextLossAgent()

dqn = {}
dqn['6by7'] = ModelAgent('d6by7')
dqn['8by9'] = ModelAgent('d8by9')
dqn['10by11'] = ModelAgent('d10by11')
dqn['12by13'] = ModelAgent('d12by13')

neuroevo = NeuroevolutionAgent('Evolved_ANN.pickle')

### Evaluation function initialization
We also initialize the evaluation function with parameters which we want to use for minimax.

In [None]:
params = None #TODO
eval_func = EvaluationFunction(params)

### Writing the match functions and the pandas dataframe for storing the information

In [None]:
def match_agents(agent_a, agent_b, gridsize : tuple, playouts=200):
    '''
    Function for playout between two different agents on a Connect-Four grid of fixed size.
    Agents A and B will play against each other and statistics will be collected and stored in their object instance.
    Note that on uneven (even by index since it starts from 0) playouts A will be first to move and on even playouts B will have the first move.
    
    :param agent_a (): An agent who can play on the specified gridsize.
    :param agent_b (): An agent who can play on the specified gridsize.
    :param gridsize (tuple): A valid gridsize for the environment and agents, accepts (6,7), (8,9), (10,11), (12,13).
    :param playouts (int): The number of games to be played.
    '''
    
    env = ConnectFourEnv(gridsize)
    
    time_a = []
    time_b = []
    for i in tqdm(range(playouts),desc='Matches processed:'):
        env.reset() #reset environment once
        
        while not env.terminal:
            if (env.turn and (i % 2) == 0) or (not env.turn and (i % 2) == 1):
                starttime = datetime.now()
                action = agent_a.select_move(env, eval_func=eval_func)
                time_a += [(datetime.now()-starttime).microseconds*(10**-6)]
                env.step(action)
                
            elif (not env.turn and (i % 2) == 0) or (env.turn and (i % 2) == 1):
                starttime = datetime.now()
                action = agent_b.select_move(env, eval_func=eval_func)
                time_b += [(datetime.now()-starttime).microseconds*(10**-6)]
                env.step(action)
        
        #after each played match statistics of wins and time are gathered
        if (i % 2) == 0:
            if env.winner:
                agent_a.wins += 1
                agent_b.losses += 1
                
            else:
                agent_a.losses += 1
                agent_b.wins += 1
        else:
            if env.winner:
                agent_b.wins += 1
                agent_a.losses += 1
                
            else:
                agent_b.losses += 1
                agent_a.wins += 1
    
    #after every match was played the time statistics are added
    agent_a.average_speed = float(np.mean(time_a))
    agent_b.average_speed = float(np.mean(time_b))

### Now for each agent we can start to collect data and store it in a DataFrame

In [None]:
columns = ['AGENT A','AGENT B', 'GRID', 'A', 'B']
df = pd.DataFrame(columns=columns)
df

Unnamed: 0,AGENT A,AGENT B,GRID,A,B


In [None]:
name_for_idx = {0 : 'RandomAgent', 1 : 'AvoidNextLossAgent', 2 : 'MinimaxAgent', 3 : 'ModelAgent', 4 : 'NeuroevolutionAgent'}
for gridsize in [(6, 7), (8, 9), (10, 11), (12, 13)]:
    for i, agent_a in enumerate([random, lossavoid, minimax, dqn, neuroevo]):
        for j, agent_b in enumerate([random, lossavoid, minimax, dqn, neuroevo]):
            if i != j and j > i and (not (((i or j) == 4) and gridsize != (6, 7))):
                print(f'Putting {name_for_idx[i]} vs {name_for_idx[j]}:')
                match_agents(agent_a, agent_b, gridsize)
                appendix = pd.DataFrame([[name_for_idx[i],name_for_idx[j],str(gridsize),agent_a.wins,agent_b.wins]],columns=columns)
                print(agent_a)
                print(agent_b)
                print('\n')
                agent_a.reset()
                agent_b.reset()
                df = df.append(appendix)

Putting RandomAgent vs AvoidNextLossAgent:


Matches processed:: 100%|███████████████████████████████████████████████████████████| 200/200 [00:01<00:00, 131.10it/s]
  df = df.append(appendix)


Agent performance (16, 184, 200) with avg speed 4.312729498164015e-05 secs/move.
Agent performance (184, 16, 200) with avg speed 0.000777435975609756 secs/move.


Putting RandomAgent vs MinimaxAgent:


Matches processed::   4%|██▏                                                           | 7/200 [00:24<11:09,  3.47s/it]

KeyboardInterrupt



In [12]:
match_agents(random, dqn['8by9'], (8,9))

Matches processed::   0%|                                                                      | 0/200 [00:00<?, ?it/s]


ValueError: Model does not support input shape of passed environment.

In [None]:
df.to_csv('./performance_data.csv',index=None)

In [None]:
# ADD ANALYSIS FUNCTIONS HERE

In [None]:
# ADD VISUALIZATION FUNCTIONS HERE

In [None]:
# CALL VISUALIZATION AND ANALYSIS FUNCTIONS HERE