# <center>ANALYSIS OF PERFORMANCE VARIABLES</center>

<h3>Reading all CSV files related to the experiments</h3>

In [1]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

In [2]:
def get_rewards(vdf, tdf):
     """
    This function reads the two csv files which contains data about 
    the performance variables (rewards, Manhattan distance, terminal state steps )
    """
    vdf['Male Rewards'] = vdf['Rewards']
    vdf['Female Rewards'] = vdf['Rewards']
    m_total = 0
    f_total = 0
    j = 0
    terminal_points = tdf.cumsum()
    for i in range(vdf.shape[0]):
        if vdf['Agent'][i] == 'F':
            f_total += vdf['Rewards'][i]
        else:
            m_total += vdf['Rewards'][i]
        vdf.loc[i, 'Male Rewards'] = m_total
        vdf.loc[i, 'Female Rewards'] = f_total
        if j < terminal_points.shape[0] and i + 1 == terminal_points.loc[j][0]:
            m_total = 0
            f_total = 0
            j += 1
    vdf['Total Rewards'] = vdf['Male Rewards'] + vdf['Female Rewards']
#Initializing
cols = ['n', 'Rewards', 'Distance', 'Agent']
var_data = [pd.read_csv(filepath_or_buffer='viz/visualization.csv', header=None)]
terminal_state_data = [pd.read_csv(filepath_or_buffer='viz/terminal_states.csv')]
var_data[0].columns = cols
get_rewards(var_data[-1], terminal_state_data[-1])
# Loop
for i in range(1,42):
    var_data.append(pd.read_csv(filepath_or_buffer=f'viz/visualization{i}.csv', header=None))
    var_data[-1].columns = cols
    terminal_state_data.append(pd.read_csv(filepath_or_buffer=f'viz/terminal_states{i}.csv'))
    get_rewards(var_data[-1], terminal_state_data[-1])

<h3> Processing commands to look up different datasets</h3>

In [3]:
def index(rl, exp, seed):
    """
    Get index of experiment results
    """
    rl = ['ss', 'vs', 'ms'].index(rl)
    exp = ['1a', '1b', '1c', '2', '3a', '3b', '4'].index(exp)
    seed = [1, 42].index(seed)
    return 14*rl+2*exp+seed

In [4]:
def data(rl, exp, seed):
    """
    Return experiment data
    """
    return var_data[index(rl, exp, seed)]
def terminals(rl, exp, seed):
    """
    Return terminal state data
    """
    return terminal_state_data[index(rl, exp, seed)]

<h3>Auxilliary plotting functions for visualization</h3>

In [5]:
def reward_plot_compare_rl(exp, seed):
    """
    This function compares the three RL models for the given experiment and seed
    """
    df1 = data('ms',exp,seed)
    df2 = data('vs',exp,seed)
    df3 = data('ss',exp,seed)
    df = df1.join(df2, on='n', how='outer', lsuffix=' ms', rsuffix=' vs')
    df = df.join(df3, on='n', how='outer', rsuffix=' ss')
    df = df1.merge(df2, on='n', how='outer', suffixes=(' ms', ' vs'))
    df = df.merge(df3, on='n', how='outer', suffixes=(None, ' ss'))
    assert(df.shape[0] == max(df1.shape[0], df2.shape[0], df3.shape[0]))
    df['Male Rewards ss'] = df['Male Rewards']
    df['Female Rewards ss'] = df['Female Rewards']
    fig = px.line(df, x='n', y=['Male Rewards ms', 'Female Rewards ms',
                               'Male Rewards vs', 'Female Rewards vs',
                               'Male Rewards ss', 'Female Rewards ss'])
    fig.update_xaxes(title_text='Number of steps')
    fig.update_yaxes(title_text='Agent reward')
    fig.update_layout(title_text=f'Comparison of agent rewards for all RL spaces for experiment {exp} and seed {seed}')
    fig.show()
    pio.write_image(fig, f'rpcr_{exp}_{seed}.png')
    

In [6]:
def terminal_state_compare_rl(exp, seed):
    """
    This function compares the terminal state steps for the three RL states for the given experiment and seed
    """
    df1 = terminals('ms',exp,seed)
    df1['i'] = df1.index
    df2 = terminals('vs',exp,seed)
    df2['i'] = df2.index
    df3 = terminals('ss',exp,seed)
    df3['i'] = df3.index
    df = df1.merge(df2, on='i', how='outer', suffixes=(' ms', ' vs'))
    df = df.merge(df3, on='i', how='outer', suffixes=(None, ' ss'))
    assert(df.shape[0] == max(df1.shape[0], df2.shape[0], df3.shape[0]))
    df['Steps ss'] = df['Steps']
    fig1 = px.bar(df, y=['Steps ms', 'Steps ss', 'Steps vs'], orientation = "v", barmode = 'group')    
    fig1.update_xaxes(title_text='Terminal State')
    fig1.update_yaxes(title_text='Steps to reach state')
    fig1.update_layout(title_text=f'Comparison of terminal state steps for all RL spaces for experiment {exp} and seed {seed}')
    fig1.show()
    pio.write_image(fig1, f'tscr_{exp}_{seed}.png')

In [18]:
def terminal_state_compare_exp3(rl, seed):
    """
    This function compares the terminal state steps for the Experiment 3 ones
    """
    df1 = terminals(rl,'1c',seed)
    df1['i'] = df1.index
    df2 = terminals(rl,'3a',seed)
    df2['i'] = df2.index
    df3 = terminals(rl,'3b',seed)
    df3['i'] = df3.index
    df = df1.merge(df2, on='i', how='outer', suffixes=(' 1c', ' 3a'))
    df = df.merge(df3, on='i', how='outer', suffixes=(None, ' 3b'))
    assert(df.shape[0] == max(df1.shape[0], df2.shape[0], df3.shape[0]))
    df['Steps 3b'] = df['Steps']
    fig1 = px.bar(df, y=['Steps 1c', 'Steps 3a', 'Steps 3b'], orientation = "v", barmode = 'group')
    fig1.update_xaxes(title_text='Terminal State')
    fig1.update_yaxes(title_text='Steps to reach state')
    fig1.update_layout(title_text=f'Comparison of terminal state steps for experiment 3 versions and seed {seed} using {rl} space')
    fig1.show()
    pio.write_image(fig1, f'tsc3_{rl}_{seed}.png')

In [8]:
def terminal_state_compare_exp2(rl, seed):
    """
    This function compares the terminal state steps for the Experiment 3 ones
    """
    df1 = terminals(rl,'1c',seed)
    df1['i'] = df1.index
    df2 = terminals(rl,'2',seed)
    df2['i'] = df2.index
    df = df1.merge(df2, on='i', how='outer', suffixes=(' 1c', ' 2'))
    fig1 = px.bar(df, y=['Steps 1c', 'Steps 2'], orientation = "v", barmode = 'group')
    fig1.update_xaxes(title_text='Terminal State')
    fig1.update_yaxes(title_text='Steps to reach state')
    fig1.update_layout(title_text=f'Comparison of terminal state steps for experiment 2 versions and seed {seed} using {rl} space')
    fig1.show()
    pio.write_image(fig1, f'tsc2_{rl}_{seed}.png')

In [9]:
def reward_compare_total_exp1(rl, seed):
    """
    This function is for comparing the three parts of Experiment 1 for the given RL model and seed
    How does the policy affect the performance of the agent?
    This version looks at the total reward (it is less busy)
    """
    df1 = data(rl,'1a',seed)
    df2 = data(rl,'1b',seed)
    df3 = data(rl,'1c',seed)
    df = df1.merge(df2, on='n', how='outer', suffixes=(' 1a', ' 1b'))
    df = df.merge(df3, on='n', how='outer', suffixes=(None, ' 1c'))
    assert(df.shape[0] == max(df1.shape[0], df2.shape[0], df3.shape[0]))
    df['Total Rewards 1c'] = df['Total Rewards']
    fig = px.line(df, x='n', y=['Total Rewards 1a', 'Total Rewards 1b', 'Total Rewards 1c'])
    fig.update_xaxes(title_text='Number of steps')
    fig.update_yaxes(title_text='Agent reward')
    fig.update_layout(title_text=f'Comparison of total reward for experiment 1 versions and seed {seed} using {rl} space')
    fig.show()
    pio.write_image(fig, f'rct1_{rl}_{seed}.png')

In [10]:
def reward_compare_exp1(rl, seed):
    """
    This function is for comparing the three parts of Experiment 1 for the given RL model and seed
    How does the policy affect the performance of the agent?
    """
    df1 = data(rl,'1a',seed)
    df2 = data(rl,'1b',seed)
    df3 = data(rl,'1c',seed)
    df = df1.merge(df2, on='n', how='outer', suffixes=(' 1a', ' 1b'))
    df = df.merge(df3, on='n', how='outer', suffixes=(None, ' 1c'))
    assert(df.shape[0] == max(df1.shape[0], df2.shape[0], df3.shape[0]))
    df['Male Rewards 1c'] = df['Male Rewards']
    df['Female Rewards 1c'] = df['Female Rewards']
    fig = px.line(df, x='n', y=['Male Rewards 1a', 'Female Rewards 1a',
                               'Male Rewards 1b', 'Female Rewards 1b',
                               'Male Rewards 1c', 'Female Rewards 1c'])
    fig.update_xaxes(title_text='Number of steps')
    fig.update_yaxes(title_text='Agent reward')
    fig.update_layout(title_text=f'Comparison of agent rewards for experiment 1 versions and seed {seed} using {rl} space')
    fig.show()
    pio.write_image(fig, f'rc1_{rl}_{seed}.png')

In [11]:
def reward_compare_total_exp3(rl, seed):
    """
    This function is for comparing the three parts of Experiment 3 for the given RL model and seed
    How does the policy affect the performance of the agent?
    This version looks at the total reward (it is less busy)
    """
    df1 = data(rl,'1c',seed)
    df2 = data(rl,'3a',seed)
    df3 = data(rl,'3b',seed)
    df = df1.merge(df2, on='n', how='outer', suffixes=(' 1c', ' 3a'))
    df = df.merge(df3, on='n', how='outer', suffixes=(None, ' 3b'))
    assert(df.shape[0] == max(df1.shape[0], df2.shape[0], df3.shape[0]))
    df['Total Rewards 3b'] = df['Total Rewards']
    fig = px.line(df, x='n', y=['Total Rewards 1c', 'Total Rewards 3a', 'Total Rewards 3b'])
    fig.update_xaxes(title_text='Number of steps')
    fig.update_yaxes(title_text='Agent reward')
    fig.update_layout(title_text=f'Comparison of total reward for experiment 3 versions and seed {seed} using {rl} space')
    fig.show()
    pio.write_image(fig, f'rct3_{rl}_{seed}.png')

In [12]:
def reward_compare_total_exp2(rl, seed):
    """
    This function is for comparing Exp 2 and Exp 1.c for the given RL model and seed
    How does the learning method affect the performance of the agent?
    This version looks at the total reward (it is less busy)
    """
    df1 = data(rl,'1c',seed)
    df2 = data(rl,'2',seed)
    df = df1.merge(df2, on='n', how='outer', suffixes=(' 1c', ' 2'))
    assert(df.shape[0] == max(df1.shape[0], df2.shape[0]))
    fig = px.line(df, x='n', y=['Total Rewards 1c', 'Total Rewards 2'])
    fig.update_xaxes(title_text='Number of steps')
    fig.update_yaxes(title_text='Agent reward')
    fig.update_layout(title_text=f'Comparison of total reward for experiment 2 versions and seed {seed} using {rl} space')
    fig.show()
    pio.write_image(fig, f'rct2_{rl}_{seed}.png')

In [13]:
def distance_compare_exp(exp, seed):
    """
    This function is for comparing the Manhattan distances of the agents for the given experiment/seed
    This is most important for determining bad behavior of VS
    """
    df1 = data('ms',exp,seed)
    df2 = data('vs',exp,seed)
    df3 = data('ss',exp,seed)
    df = df1.merge(df2, on='n', how='outer', suffixes=(' ms', ' vs'))
    df = df.merge(df3, on='n', how='outer', suffixes=(None, ' ss'))
    assert(df.shape[0] == max(df1.shape[0], df2.shape[0], df3.shape[0]))
    df['Distance ss'] = df['Distance']
    fig = px.histogram(df, x=['Distance ms', 'Distance ss', 'Distance vs'], barmode='group')
    fig.update_xaxes(title_text='Manhattan Distance')
    fig.update_yaxes(title_text='Frequency')
    fig.update_layout(title_text=f'Comparison of agent L1 distances for experiment {exp} and seed {seed}')
    fig.show()
    pio.write_image(fig, f'dc_{exp}_{seed}.png')

We called the auxiliary plotting functions to generate the Plotly graphs for the analysis of the performance variables for the experiments.