Plotting validation

Plotting testing

In [None]:
import pandas as pd
import os, math

policy_eval_outputs = {
    1: 'results/OL1_frank_no_drop/testing_eval',
    2: 'results/OL2_frank_no_drop/testing_eval',
    3: 'results/OL3_frank_no_drop/testing_eval',

    -1: 'results/OL1_frank_no_drop_ablation/testing_eval',
    -2: 'results/OL2_frank_no_drop_ablation/testing_eval',
    -3: 'results/OL3_frank_no_drop_ablation/testing_eval',
}

baseline_output_folder = 'baseline_output'

policy_best_checkpoints = {
    1: {},
    2: {},
    3: {},
    -1: {},
    -2: {},
    -3: {},
}

for openness_level, file_path in policy_eval_outputs.items():

    dfs = [
        (pd.read_csv(os.path.join(root, file)), root.split('/')[-1])
        for root, _, files in os.walk(file_path)
        for file in files if file.endswith('.csv')
    ]

    dfs = pd.concat([df.assign(policy=file.split(';')[1].split('_')[0]) for df, file in dfs], ignore_index=True)

    reward_cols = [col for col in dfs.columns if 'rewards' in col]

    # dfs['policy'] = dfs['description'].apply(lambda x: x.split('_')[0].split(';')[1])
    dfs['openness level'] = openness_level
    dfs['starting state'] = dfs['description'].apply(lambda x: int(x.split('_')[2].split(';')[1]))
    dfs['episodes'] = dfs['description'].apply(lambda x: int(x.split('_')[3].split(';')[1]))

    original_dfs = dfs.copy()

    dfs = dfs[['description', 'step', 'policy', 'openness level','starting state', 'episodes'] + reward_cols]
    dfs.drop(columns=['description'], inplace=True)

    #sum over steps
    dfs = dfs.groupby(['policy', 'openness level', 'episodes', 'starting state']).sum().reset_index()
    dfs['final_rewards'] = dfs[reward_cols].mean(axis=1)


    #average over starting states / episodes generally speaking
    pivot = pd.pivot_table(dfs, index=['openness level'], columns=['policy'], values='final_rewards', aggfunc='mean')
    best_policy = pivot.idxmax(axis=1)
    pivot = pivot[best_policy]


    pivot_std = pd.pivot_table(dfs, index=['openness level'], columns=['policy'], values='final_rewards', aggfunc='std')
    pivot_std = 2.04523 * pivot_std / math.sqrt(60)
    pivot_std = pivot_std[best_policy]

    policy_best_checkpoints[openness_level]['mean'] = pivot
    policy_best_checkpoints[openness_level]['ci'] = pivot_std
    policy_best_checkpoints[openness_level]['checkpoint'] = best_policy
    policy_best_checkpoints[openness_level]['dfs'] = original_dfs

Loading baselines

In [None]:
import pandas as pd
import math

dfs = pd.concat([
    pd.read_csv(os.path.join(root, file))
    for root, _, files in os.walk(baseline_output_folder) for file in files
    if file.endswith('.csv')
], ignore_index=True)

reward_cols = [col for col in dfs.columns if 'rewards' in col]

dfs = dfs[['description', 'step'] + reward_cols]
dfs['policy'] = dfs['description'].apply(
    lambda x: x.split('_')[0].split(';')[1])
dfs['openness level'] = dfs['description'].apply(
    lambda x: int(x.split('_')[1].split(';')[1]))
dfs['starting state'] = dfs['description'].apply(
    lambda x: int(x.split('_')[2].split(';')[1]))
dfs['episodes'] = dfs['description'].apply(
    lambda x: int(x.split('_')[3].split(';')[1]))
dfs.drop(columns=['description'], inplace=True)

#sum over steps
dfs = dfs.groupby(['policy', 'openness level', 'episodes',
                   'starting state']).sum().reset_index()
dfs['final_rewards'] = dfs[reward_cols].mean(axis=1)

#average over starting states / episodes generally speaking
pivot = pd.pivot_table(dfs,
                       index=['openness level'],
                       columns=['policy'],
                       values='final_rewards',
                       aggfunc='mean')

pivot_std = pd.pivot_table(dfs,
                           index=['openness level'],
                           columns=['policy'],
                           values='final_rewards',
                           aggfunc='std')
pivot_std = 2.04523 * pivot_std / math.sqrt(60)

In [None]:
mohito_best_policy_df = pd.concat([
    policy_best_checkpoints[openness_level]['mean']
    for openness_level in policy_best_checkpoints
])

print(mohito_best_policy_df)

#merge MOHITOX columns of nonNaNs into one column
mohito_best_policy_df = mohito_best_policy_df.reset_index()
mohito_best_policy_df = mohito_best_policy_df.melt(
    id_vars=['openness level'],
    var_name='policy',
    value_name='final_rewards'
).dropna()

mohito_best_policy_df['policy'] = 'MOHITO'
mohito_best_policy_df.set_index('openness level', inplace=True)

#incorporate ablation results
mohito_original_policy_df = mohito_best_policy_df[mohito_best_policy_df.index > 0]
mohito_ablation_df = mohito_best_policy_df[mohito_best_policy_df.index < 0]

#flip
mohito_ablation_df.index = -mohito_ablation_df.index
mohito_ablation_df['policy'] = mohito_ablation_df['policy'] + ' (Ablation)'
mohito_best_policy_df = pd.concat([mohito_original_policy_df, mohito_ablation_df])


print(mohito_best_policy_df)

# Merge MOHITO and baseline dataframes
pivot['mohito'] = mohito_best_policy_df['final_rewards'][mohito_best_policy_df['policy'] == 'MOHITO']
pivot['mohito (Ablation)'] = mohito_best_policy_df['final_rewards'][mohito_best_policy_df['policy'] == 'MOHITO (Ablation)']
print(pivot)

and ci

In [None]:
mohito_best_std_df = pd.concat([
    policy_best_checkpoints[openness_level]['ci']
    for openness_level in policy_best_checkpoints
])

print(mohito_best_std_df)

#merge MOHITOX columns of nonNaNs into one column
mohito_best_std_df = mohito_best_std_df.reset_index()
mohito_best_std_df = mohito_best_std_df.melt(
    id_vars=['openness level'],
    var_name='policy',
    value_name='final_rewards'
).dropna()

mohito_best_std_df['policy'] = 'MOHITO'
mohito_best_std_df.set_index('openness level', inplace=True)


#incorporate ablation results
mohito_original_std_df = mohito_best_std_df[mohito_best_std_df.index > 0]
mohito_ablation_df = mohito_best_std_df[mohito_best_std_df.index < 0]

#flip
mohito_ablation_df.index = -mohito_ablation_df.index
mohito_ablation_df['policy'] = mohito_ablation_df['policy'] + ' (Ablation)'
mohito_best_std_df = pd.concat([mohito_original_std_df, mohito_ablation_df])



print(mohito_best_std_df)

# Merge MOHITO and baseline dataframes
pivot_std['mohito'] = mohito_best_std_df['final_rewards'][mohito_best_std_df['policy'] == 'MOHITO']
pivot_std['mohito (Ablation)'] = mohito_best_std_df['final_rewards'][mohito_best_std_df['policy'] == 'MOHITO (Ablation)']
print(pivot_std)

In [None]:
colors = ['#009ADE','red','#AF58BA','#FFC61E', '#F28522']

print(pivot.columns)

pivot.rename(columns={'mohito': 'MOH', 'mohito (Ablation)':'MOH2d', 'FifoBaseline':'FCFS', 'WeakestBaseline': 'NTF', 'RandomBaseline': 'Random'}, inplace=True)
pivot_std.rename(columns={'mohito': 'MOH', 'mohito (Ablation)':'MOH2d', 'FifoBaseline':'FCFS', 'WeakestBaseline': 'NTF', 'RandomBaseline': 'Random'}, inplace=True)

#filter to just the renamed columns
pivot = pivot[['MOH', 'MOH2d', 'FCFS', 'NTF', 'Random']]
pivot_std = pivot_std[['MOH', 'MOH2d', 'FCFS', 'NTF', 'Random']]

p1 = pivot.copy()
p1_std = pivot_std.copy()


In [None]:
from copy import deepcopy


p1.plot(kind='bar',
           yerr=p1_std,
           capsize=5,
           figsize=(7, 3),
           title='Average Final Rewards by Openness Level and Policy',
           color=colors,
           width=0.8,)

import matplotlib.pyplot as plt
plt.title('')
plt.yticks(size=12)
plt.xticks(size=12, rotation=0)
plt.ylim(-380, 350)
plt.xlabel('Openness Level', size=14)
plt.ylabel('Mean Reward (with CI)', size=14)
plt.legend(title=None, framealpha=0.0, fontsize=12, labels=["MOHITO","MOHITO-NoTaskNodes","FCFS","NTF", "Random"], ncol=3, prop={'size':10}, loc='lower left')
plt.tight_layout()
plt.savefig('wildfire_rewards.pdf')

wilcoxon

In [None]:
from scipy import stats

#add MOHITO
policy_eval_outputs = {
    1: ('results/OL1_frank_no_drop/testing_eval', policy_best_checkpoints[1]['checkpoint']),
    2: ('results/OL2_frank_no_drop/testing_eval', policy_best_checkpoints[2]['checkpoint']),
    3: ('results/OL3_frank_no_drop/testing_eval', policy_best_checkpoints[3]['checkpoint']),

    -1: ('results/OL1_frank_no_drop_ablation/testing_eval', policy_best_checkpoints[-1]['checkpoint']),
    -2: ('results/OL2_frank_no_drop_ablation/testing_eval', policy_best_checkpoints[-2]['checkpoint']),
    -3: ('results/OL3_frank_no_drop_ablation/testing_eval', policy_best_checkpoints[-3]['checkpoint']),
}

mohito_dfs = pd.concat([
    policy_best_checkpoints[openness_level]['dfs'][policy_best_checkpoints[openness_level]['dfs']['policy'] == policy_best_checkpoints[openness_level]['checkpoint'].item()].copy()
    for openness_level in policy_eval_outputs
])


#handle the ablation
mohito_ablation_dfs = mohito_dfs['openness level'] < 0
mohito_dfs['policy'] = 'mohito'
mohito_ablation_policy_labels = mohito_dfs[mohito_ablation_dfs]['policy'].apply(lambda x: f'{x} (Ablation)')
mohito_dfs.loc[mohito_ablation_dfs, 'policy'] = mohito_ablation_policy_labels
mohito_dfs['openness level'] = mohito_dfs['openness level'].abs()

#add MOHITO to the dfs
dfs = pd.concat([dfs, mohito_dfs], ignore_index=True).copy()


#sum over steps
dfs = dfs.groupby(['policy', 'openness level', 'episodes',
                   'starting state'])[reward_cols].sum().reset_index()

dfs['final_rewards'] = dfs[reward_cols].mean(axis=1)

#sort values
dfs.sort_values(by=['policy','openness level','episodes', 'starting state'], inplace=True)

dfg = dfs.groupby(['policy'])

for (policy, group) in dfg:

    if policy == 'mohito':
        continue

    print(f'Wilcoxon test: {policy} - MOHITO')
    stat, p_value = stats.wilcoxon(
        group['final_rewards'],
        dfg.get_group('mohito')['final_rewards']
    )
    print(f'Statistic: {stat}, p-value: {p_value}')



In [None]:
import numpy as np, os
import pandas as pd
from ast import literal_eval
import matplotlib.pyplot as plt

dfs = pd.concat([
    pd.read_csv(os.path.join(root, file))
    for root, _, files in os.walk(baseline_output_folder) for file in files
    if file.endswith('.csv')
], ignore_index=True)

#get groups
dfs['policy'] = dfs['description'].apply(
    lambda x: x.split('_')[0].split(';')[1])
dfs['openness level'] = dfs['description'].apply(
    lambda x: int(x.split('_')[1].split(';')[1]))
dfs['starting state'] = dfs['description'].apply(
    lambda x: int(x.split('_')[2].split(';')[1]))
dfs['episodes'] = dfs['description'].apply(
    lambda x: int(x.split('_')[3].split(';')[1]))
dfs.drop(columns=['description'], inplace=True)

#add MOHITO
policy_eval_outputs = {
    1: ('results/OL1_frank_no_drop/testing_eval', policy_best_checkpoints[1]['checkpoint']),
    2: ('results/OL2_frank_no_drop/testing_eval', policy_best_checkpoints[2]['checkpoint']),
    3: ('results/OL3_frank_no_drop/testing_eval', policy_best_checkpoints[3]['checkpoint']),

    -1: ('results/OL1_frank_no_drop_ablation/testing_eval_perm', policy_best_checkpoints[-1]['checkpoint']),
    -2: ('results/OL2_frank_no_drop_ablation/testing_eval_perm', policy_best_checkpoints[-2]['checkpoint']),
    -3: ('results/OL3_frank_no_drop_ablation/testing_eval_perm', policy_best_checkpoints[-3]['checkpoint']),
}

mohito_dfs = pd.concat([
    policy_best_checkpoints[openness_level]['dfs'][policy_best_checkpoints[openness_level]['dfs']['policy'] == policy_best_checkpoints[openness_level]['checkpoint'].item()].copy()
    for openness_level in policy_eval_outputs
])


#handle the ablation
mohito_ablation_dfs = mohito_dfs['openness level'] < 0
mohito_dfs['policy'] = 'mohito'
mohito_ablation_policy_labels = mohito_dfs[mohito_ablation_dfs]['policy'].apply(lambda x: f'{x} (Ablation)')
mohito_dfs.loc[mohito_ablation_dfs, 'policy'] = mohito_ablation_policy_labels
mohito_dfs['openness level'] = mohito_dfs['openness level'].abs()

#add MOHITO to the dfs
dfs = pd.concat([dfs, mohito_dfs], ignore_index=True)


#get time column as np array
dfs = dfs[['policy', 'step', 'openness level', 'episodes', 'starting state', 'infos/just_put_out_time', 
    'infos/just_put_out_ftype', 'infos/just_burned_out_time', 'infos/just_burned_out_ftype']]

grouping = dfs.groupby(['policy', 'openness level', 'starting state', 'episodes'])


arrayify = lambda x: np.array(literal_eval(x))


grouped_times = {p:[] for p in dfs['policy'].unique()}


# Iterate through policies and openness levels to get the times and types of put out and burned out fires
for (name, group) in grouping:

    try:
        group['just_put_out_time'] = group['infos/just_put_out_time'].apply(arrayify)
    except:
        print('hh')

    group['just_burned_out_time'] = group['infos/just_burned_out_time'].apply(arrayify)
    group['just_put_out_ftype'] = group['infos/just_put_out_ftype'].apply(arrayify)
    group['just_burned_out_ftype'] = group['infos/just_burned_out_ftype'].apply(arrayify)


    #create stacked_cols
    put_outs = pd.DataFrame({
        'time': np.concatenate(group['just_put_out_time'].values),
        'fire_type': np.concatenate(group['just_put_out_ftype'].values),
    })
    put_outs['burned_out'] = False

    burn_outs = pd.DataFrame({
        'time': np.concatenate(group['just_burned_out_time'].values),
        'fire_type': np.concatenate(group['just_burned_out_ftype'].values),
    })
    burn_outs['burned_out'] = True

    stacked_cols = pd.concat([put_outs, burn_outs], ignore_index=True)  

    stacked_cols['policy'] = name[0]
    stacked_cols['openness_level'] = name[1]
    stacked_cols['starting_state'] = name[2]
    stacked_cols['episodes'] = name[3]
    grouped_times[name[0]].append(stacked_cols)


grouped_times = {
    policy: pd.concat(times, ignore_index=True)
    for policy, times in grouped_times.items()
}



plot time

In [None]:
import math

gdf = pd.concat(grouped_times.values(), ignore_index=True)

gdf = gdf[gdf['policy'] != 'NoopBaseline']
gdf = gdf[['time', 'openness_level', 'policy']]

print(gdf['policy'].unique())

pivot = pd.pivot_table(gdf,
    index = 'openness_level',
    columns = 'policy',
    values = 'time',
    aggfunc='mean'
)

pivot_std = pd.pivot_table(gdf,
    index = 'openness_level',
    columns = 'policy',
    values = 'time',
    aggfunc='std'
)
pivot_std = 2.04523 * pivot_std / math.sqrt(60)

pivot.rename(columns={'FifoBaseline': 'FCFS', 'WeakestBaseline': 'NTF', 'RandomBaseline': 'Random', 'mohito': 'MOH', 'mohito (Ablation)': 'MOH-NoTaskNodes'}, inplace=True)
pivot_std.rename(columns={'FifoBaseline': 'FCFS', 'WeakestBaseline': 'NTF', 'RandomBaseline': 'Random', 'mohito': 'MOH', 'mohito (Ablation)': 'MOH-NoTaskNodes'}, inplace=True)


pivot = pivot.reindex(columns=['MOH', 'MOH-NoTaskNodes', 'FCFS', 'NTF', 'Random'], fill_value=0)
pivot_std = pivot_std.reindex(columns=['MOH', 'MOH-NoTaskNodes', 'FCFS', 'NTF', 'Random'], fill_value=0)

pivot.plot(kind='bar',
           yerr=pivot_std,
           capsize=5,
           figsize=(7, 4),
           width=.8,
           title='Average Time to Put Out Fires by Openness Level and Policy',
           color=colors)


import matplotlib.pyplot as plt
plt.title('')
plt.legend(title=None, framealpha=0.0, fontsize=12, labels=["MOHITO", "MOHITO-NoTaskNodes", "FCFS","NTF", "Random"], ncol=3, prop={'size':12}, loc='upper left')
plt.yticks(size=14)
plt.xticks(size=14, rotation=0)
plt.ylim(0, 10)
plt.ylabel("Fire Duration (in timesteps)", size=14)
plt.xlabel("Openness Level",size=16)
plt.tight_layout()
plt.savefig('wildfire_duration.pdf')

Plot the burn/put outs

In [None]:
counted_grouped_times = pd.concat([
    g.groupby(['policy','openness_level', 'burned_out', 'episodes', 'starting_state', 'fire_type'])['time'].count().reset_index()
    for policy, g in grouped_times.items()
])

for is_burned_out in [True, False]:

    these_grouped_times = counted_grouped_times[counted_grouped_times['burned_out'] == is_burned_out]

    pivot = pd.pivot_table(these_grouped_times,
        index= ['fire_type', 'openness_level'],
        columns=['policy'],
        values='time',
        aggfunc='mean'
    )
    pivot_std = pd.pivot_table(these_grouped_times,
        index= ['fire_type', 'openness_level'],
        columns=['policy'],
        values='time',
        aggfunc='std'
    )
    pivot_std = 2.04523 * pivot_std / math.sqrt(60)

    pivot.rename(columns={'FifoBaseline': 'FCFS', 'WeakestBaseline': 'NTF', 'RandomBaseline': 'Random', 'mohito': 'MOHITO', 'mohito (Ablation)': 'MOHITO-NoTaskNodes'}, inplace=True)
    pivot_std.rename(columns={'FifoBaseline': 'FCFS', 'WeakestBaseline': 'NTF', 'RandomBaseline': 'Random', 'mohito': 'MOHITO', 'mohito (Ablation)': 'MOHITO-NoTaskNodes'}, inplace=True)

    pivot = pivot.reindex(columns=['MOHITO', 'MOHITO-NoTaskNodes', 'FCFS', 'NTF', 'Random'], fill_value=0)

    pivot.plot(kind='bar',
            yerr=pivot_std,
            capsize=5,
            figsize=(7, 4),
            width=.8,
            title='',
            color=colors)

    plt.ylabel(f"Number of Fires {'Burned Out' if is_burned_out else 'Put Out'}", size=14)
    plt.xlabel("Openness Level", size=14)
    locs = plt.xticks()[0]
    plt.xticks(size=14, ticks=locs, labels=['1', '2', '3', '1', '2', '3'], rotation=0)
    plt.legend(title=None, framealpha=0.0, fontsize=12, labels=["MOHITO","MOHITO-NoTaskNodes","FCFS","NTF", "Random"], ncol=1, prop={'size':10})
    plt.axvline(x=2.5, color='black', linestyle='--', linewidth=2, label='_nolegend_')

    plt.text(1, 0.03, 'Small Fires', size=14, va='bottom', ha='center',bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.1'))
    plt.text(4, 0.03, 'Medium Fires', size=14, va='bottom', ha='center',bbox=dict(facecolor='white', edgecolor='black', boxstyle='round,pad=0.1'))

    plt.tight_layout()

    plt.savefig(f'wildfire_fires_{is_burned_out}.pdf')