# Plotting Rideshare results for MOHITO

imports

In [None]:
import pandas as pd
from scipy import stats
import math
import matplotlib.pyplot as plt
from functools import reduce
import operator

parameters

In [None]:
#should contain the concatenated data from rideshare (across checkpoints) + baselines + TaO-PG-ELLA
result_file = 'mohitoR/gayathri/TaO-Thesis/spreadsheets/performance_evaluation_mohito-fcfs-ntf.csv'
colors = ['#009ADE','#AF58BA','#FFC61E','#F28522']
order = ['MOHITO','FCFS','NTF']

## Preprocessing

In [None]:
df = pd.read_csv(result_file)

#confirm the correct number of unique seeds across all ag/ol combinations
print(df[['ag','ol','seed']].groupby(['ag','ol'])['seed'].nunique())

#confirm the correct policies
#numerical values here are for mohito policies (see TaO-PG-ELLA for checkpoint nomenclature)
print(df['type'].unique())

Determine the best performing mohito policy

In [None]:
mohito_df = df[~df['type'].isin(['random','simple_time','simple_distance','tao-pgella', 'NTF', 'FCFS', 'Random'])]

group = mohito_df.groupby(['ag', 'ol'])

best_policy = {}

for (ag, ol), group_df in group:
    #sum over all agents and all steps
    agg = group_df.groupby(['type','seed'])['reward'].sum().reset_index()
    #mean over all episodes
    agg = group_df.groupby(['type'])['reward'].mean()
    agg_std = group_df.groupby(['type'])['reward'].std()
    #find the best policy
    best_policy[(ag, ol)] = (agg.idxmax(), agg.max(), agg_std[agg.idxmax()])

print("Best policies (ag, ol) -> (policy, mean reward, std)")
print(best_policy)

Generate plotting df

In [None]:
df = pd.read_csv(result_file)

#is a baseline
#can include random here...
baseline_mask = df['type'].isin(['simple_time','simple_distance','tao-pgella', 'FCFS', 'NTF', 'Random'])

#is a best mohito policy
mohito_mask = [
    (df['type'] == best_policy[(ag, ol)][0]) & (df['ag'] == ag) & (df['ol'] == ol)
    for (ag, ol) in best_policy.keys()
]
mohito_mask = reduce( operator.or_ , mohito_mask)

df['type'][mohito_mask] = 'MOHITO'

plotting_df = df[baseline_mask | mohito_mask]

rename = {
    'random': 'Random',
    'simple_time': 'FCFS',
    'simple_distance': 'NTF',
    'MOHITO': 'MOHITO',
    'FCFS': 'FCFS',
    'NTF': 'NTF',
    'mohito': 'MOHITO'
}

plotting_df['type'] = plotting_df['type'].apply(lambda x: rename[x])

display(plotting_df)

## Plotting

### Rewards

In [None]:
rdf = plotting_df.copy()

fig, ax = plt.subplots(3, 1,figsize=(10, 6))


for ol in [1, 2, 3]:
    rdfol = rdf[rdf['ol'] == ol]
    rdfol = rdfol.groupby(['ag', 'type', 'seed'])['reward'].sum().reset_index()

    rdpiv = rdfol.pivot_table(index='ag', columns='type', values='reward',aggfunc='mean').reindex(columns=order)
    rdpiv_std = rdfol.pivot_table(index='ag', columns='type', values='reward',aggfunc='std').reindex(columns=order)
    rdpiv_std = rdpiv_std * 2.04523 / math.sqrt(10) # 95% ci

    rdpiv.plot(kind='bar', yerr=rdpiv_std, ax=ax[ol-1], color=colors, capsize=5)
    ax[ol-1].set_title(f'Openness Level {ol}')
    ax[ol-1].get_legend().remove()
    ax[ol-1].set_xticks([])
    ax[ol-1].set_xlabel('')

ax[2].set_xticks(range(len(order)))
ax[2].set_xticklabels([f'{ag} Agents' for ag in [2,3,4]], rotation=0)
ax[2].set_xlabel('Agent Group')
ax[1].set_ylabel('Mean Reward (with CI)')
plt.tight_layout()


### Pooling efficacy

(will not group properly if not all ols and ag are present)

In [None]:
pdf = plotting_df.copy()

pdf = pdf[(pdf['type'] == 'MOHITO') & (pdf['pooling_status'] != 'none')][['ag', 'agent','ol','seed','pooling_status', 'type']]

pdfg = pdf.groupby(['ag', 'agent', 'ol', 'seed', 'pooling_status']).count().reset_index()

pdfg['pooling_status'] = pdfg['pooling_status'].str.replace('single', 'Single task execution')
pdfg['pooling_status'] = pdfg['pooling_status'].str.replace('pooling', 'Pooled task execution')

# print(pdfg)

table = pdfg.copy().pivot_table(index=['ag','ol'], columns=['pooling_status'], values='type', aggfunc='mean', fill_value=0)
stdtable = pdfg.copy().pivot_table(index=['ag','ol'], columns=['pooling_status'], values='type', aggfunc='std', fill_value=0)

stdtable = 2.04523 * stdtable / math.sqrt(30)

colors = ['#009ADE','#AF58BA']

table.plot(kind='bar', stacked=False, yerr=stdtable, capsize=3, color=colors, figsize=(7,4), width=0.7)

plt.legend(title='',framealpha=0.0)

plt.xticks([
    0,1,2,3,4,5,6,7,8
], [
    '1',
    '2',
    '3',
    '1',
    '2',
    '3',
    '1',
    '2',
    '3',
], rotation=0, size=12, )

plt.xlabel('')
plt.ylabel('Average steps per agent per episode', size=12, )

# plt.tight_layout()
plt.yticks(size=12, )

ax = plt.gca()
l = plt.legend(prop={'size':14}, title='', framealpha=0.0, bbox_to_anchor=(2.5, 45.0), bbox_transform=ax.transData)
l.get_texts()[0].set_text('Pooled task')
l.get_texts()[1].set_text('Single task')

plt.axvline(x=2.5, color='black', linestyle='--', linewidth=2, label='_nolegend_')
plt.axvline(x=5.5, color='black', linestyle='--', linewidth=2, label='_nolegend_')

plt.text(0.15, 62, '2 Agents', size=14, )
plt.text(3.15, 62, '3 Agents', size=14, )
plt.text(6.15, 62, '4 Agents', size=14, )

plt.xlabel('Openness Level', size=14, )
plt.tight_layout()

plt.savefig('pooling-efficacy-updated.pdf')

### Task duration

In [None]:
tdf = plotting_df.copy()

# handle string tensor logs
tdf['mohito-entry-step'] = tdf['entry-step'].str.extract(
    r'tensor\((.*)\)').astype(float)
tdf = tdf[tdf['ride_status'] == 'done']

random_mask = tdf['type'] == 'Random'
mohito_mask = tdf['type'] == 'MOHITO'

tdf.loc[mohito_mask, 'entry-step'] = tdf.loc[mohito_mask, 'mohito-entry-step']
tdf.loc[random_mask, 'entry-step'] = tdf.loc[random_mask, 'mohito-entry-step']
tdf.drop(columns=['mohito-entry-step'], inplace=True)
tdf['step'] = tdf['step'].astype(float)
tdf['entry-step'] = tdf['entry-step'].astype(float)
#---------

# filter out the noop actions
tdf['duration'] =  tdf['step'] - tdf['entry-step']
tdf = tdf[tdf['action-label'] != 'noop']

gp_filt = tdf[['ol', 'ag', 'type', 'seed', 'duration', 'action-label']].groupby(['ol', 'type', 'ag', 'action-label']).mean().reset_index()

gp_piv = gp_filt.pivot_table(index='ol',
                             columns='type',
                             values='duration',
                             aggfunc='mean').reindex(columns=order)
gp_std = gp_filt.pivot_table(index='ol',
                             columns='type',
                             values='duration',
                             aggfunc='std').reindex(columns=order)


gp_std = 2.04523 * gp_std / math.sqrt(30)

gp_piv.plot(kind='bar', stacked=False, color=['#009ADE','#AF58BA','#FFC61E','red'], yerr=gp_std, capsize=5, figsize=(7, 4), fontsize=14, width=0.8)
plt.ylabel('Mean Ride Duration (in timesteps)', fontsize=14)
plt.xlabel('Openness Level', fontsize=16,)
# plt.legend(title='', fontsize=16, loc='upper left', labels=['MOHITO', 'FCFS', 'NTF'], framealpha=0.0, ncols=3, prop={'size':13})
plt.xticks(rotation=0, fontsize=14)
plt.yticks(fontsize=14)
plt.tight_layout()
plt.savefig('ride_duration.pdf', bbox_inches='tight')