In [22]:
import pandas as pd
import math
from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

In [23]:
results_df = pd.read_csv('output/results.csv')
results_df = results_df.round(3).set_index('run')
results_df

Unnamed: 0_level_0,real_verbatim,predicted_verbatim,test_case,category,algorithm_name
run,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,0.000,0.000,Full random,full_random,Mean Heat: d=1 | r=1
0,0.000,0.000,Full random,full_random,Mean Heat: d=2 | r=2
0,0.000,0.000,Full random,full_random,Mean Heat: d=1 | r=2
0,0.000,0.000,Full random,full_random,Mean Heat: d=2 | r=1
0,0.000,0.000,Full random,full_random,Prop Neigh r=1 | T=0.001
...,...,...,...,...,...
9,0.149,0.149,Long range 0.15,long_range,Prop Neigh r=max | T=0.01
9,0.149,0.149,Long range 0.15,long_range,Prop Neigh r=max | T=0.001
9,0.149,0.149,Long range 0.15,long_range,Prop Neigh r=max | T=0.001
9,0.149,0.149,Long range 0.15,long_range,Prop Neigh r=max | T=10mnh


In [24]:
def mean_relative_error(true, expected):
    if np.allclose(true, 0):
        return np.mean(expected)
    relative_error = np.abs(expected - true) / true
    return np.mean(relative_error)

In [25]:
results_table_data = []

for algorithm_name in results_df['algorithm_name'].unique():
    algorithm_df = results_df.loc[results_df['algorithm_name'] == algorithm_name]
    algorithm_table_row = {
        'name': algorithm_name
    }
    for category in algorithm_df['category'].unique():
        category_results = algorithm_df.loc[algorithm_df['category'] == category]
        mre = mean_relative_error(category_results['real_verbatim'], category_results['predicted_verbatim'])
        algorithm_table_row[category] = round(mre, 3)

    results_table_data.append(algorithm_table_row)

In [26]:
results_table = pd.DataFrame(results_table_data).set_index('name')
results_table

Unnamed: 0_level_0,full_random,full_verbatim,checkerboard,patches,long_range
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Mean Heat: d=1 | r=1,0.0,0.0,0.555,0.096,0.9
Mean Heat: d=2 | r=2,0.0,0.0,0.464,0.104,0.899
Mean Heat: d=1 | r=2,0.0,0.0,0.446,0.107,0.9
Mean Heat: d=2 | r=1,0.0,0.0,0.555,0.096,0.9
Prop Neigh r=1 | T=0.001,0.0,0.0,0.333,0.003,0.666
Prop Neigh r=2 | T=0.001,0.0,0.0,0.0,0.003,0.323
Prop Neigh r=3 | T=0.001,0.001,0.0,0.0,0.004,0.096
Prop Neigh r=max | T=0.1,0.0,0.0,0.0,1.0,0.596
Prop Neigh r=max | T=0.01,0.0,0.0,0.0,0.152,0.0
Prop Neigh r=max | T=0.001,0.0,0.0,0.0,0.001,0.0


In [27]:
results_table.to_csv('output/results_mae_category_table.csv')

In [28]:
results_table_data = []

for algorithm_name in results_df['algorithm_name'].unique():
    algorithm_df = results_df.loc[results_df['algorithm_name'] == algorithm_name]
    algorithm_table_row = {
        'name': algorithm_name
    }
    for test_case in algorithm_df['test_case'].unique():
        test_case_results = algorithm_df.loc[algorithm_df['test_case'] == test_case]
        mre = mean_relative_error(test_case_results['real_verbatim'], test_case_results['predicted_verbatim'])
        algorithm_table_row[test_case] = round(mre, 3)

    results_table_data.append(algorithm_table_row)

In [29]:
results_table = pd.DataFrame(results_table_data).set_index('name')
results_table

Unnamed: 0_level_0,Full random,Full verbatim,Checkerboard 3x3,Checkerboard 1x1,Checkerboard 5x5,Patches 5,Patches 10,Patches 20,Long range 0.05,Long range 0.10,Long range 0.15
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
Mean Heat: d=1 | r=1,0.0,0.0,0.332,1.0,0.332,0.102,0.087,0.099,0.948,0.9,0.852
Mean Heat: d=2 | r=2,0.0,0.0,0.41,0.573,0.41,0.11,0.094,0.109,0.946,0.901,0.851
Mean Heat: d=1 | r=2,0.0,0.0,0.442,0.454,0.442,0.111,0.098,0.112,0.948,0.901,0.851
Mean Heat: d=2 | r=1,0.0,0.0,0.332,1.0,0.332,0.102,0.087,0.099,0.948,0.9,0.852
Prop Neigh r=1 | T=0.001,0.0,0.0,0.0,1.0,0.0,0.0,0.002,0.008,0.813,0.659,0.526
Prop Neigh r=2 | T=0.001,0.0,0.0,0.0,0.0,0.0,0.005,0.001,0.005,0.538,0.287,0.145
Prop Neigh r=3 | T=0.001,0.001,0.0,0.0,0.0,0.0,0.01,0.003,0.001,0.227,0.053,0.009
Prop Neigh r=max | T=0.1,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,0.787,0.0
Prop Neigh r=max | T=0.01,0.0,0.0,0.0,0.0,0.0,0.229,0.093,0.133,0.0,0.0,0.0
Prop Neigh r=max | T=0.001,0.0,0.0,0.0,0.0,0.0,0.002,0.001,0.0,0.0,0.0,0.0


In [30]:
results_table.drop(['Patches 5', 'Patches 10',
       'Patches 20', 'Long range 0.05', 'Long range 0.10',
       'Long range 0.15'], axis=1).to_csv('output/results_mae_test_case_table_1.csv')
results_table.drop(['Full random', 'Full verbatim', 'Checkerboard 3x3',
       'Checkerboard 1x1', 'Checkerboard 5x5'], axis=1).to_csv('output/results_mae_test_case_table_2.csv')

In [31]:
results_df['test_case'].unique()

array(['Full random', 'Full verbatim', 'Checkerboard 3x3',
       'Checkerboard 1x1', 'Checkerboard 5x5', 'Patches 5', 'Patches 10',
       'Patches 20', 'Long range 0.05', 'Long range 0.10',
       'Long range 0.15'], dtype=object)