In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
% matplotlib inline

In [None]:
% mv /Users/codetesting/Downloads/benchmark.zip benchmark.zip

In [None]:
summary = pd.read_csv("benchmark_data/index.csv")

In [None]:
hedder = pd.read_csv("benchmark_data/header.csv")

In [None]:
summary.columns = hedder.columns

In [None]:
summary.tail(5)

In [None]:
completed_frame = summary.loc[summary[' performance'] != ' None']
completed_frame = completed_frame.copy()

## Number of successful calls

Algorithms:

In [None]:
completed_frame[' algorithm'].value_counts()

Models:

In [None]:
completed_frame[' model'].value_counts()

In [None]:
# This is not used
useful = [' algorithm', ' model', ' final_accuracy', ' performance']

In [None]:
# Create a column called 'perf' with the float values of the ' perfomance' column
compare_success = completed_frame.loc[:, (' performance')].apply(lambda x: float(x.split()[0]))
completed_frame['perf'] = compare_success

## Analysis: Useful functions

In [None]:
def get_rows_by_col(pdframe, colname, val):
    """Gets the rows where the colname meets some value"""
    return pdframe.loc[pdframe[colname]==val]

def get_nontrivial_columns(pdframe):
    """Gets the nontrivial columns from the pdframe"""
    nontrivial = []
    for column in pdframe.columns:
        if pdframe[column].unique().size > 1:
            nontrivial.append(column)
    return nontrivial

def get_trans_col(frame, grouped, func_name, colname):
    """Gets the row with the value given by grouped.transform(func_name)"""
    idx = grouped.transform(func_name) == frame[colname]
    return frame.loc[idx]

def print_basic_stats(pdframe, group_col, colname):
    """Prints basic statistics about grouped columns
    
    i.e. conditions underwhich max & min are achieved
    
    group_col –– the column we want to groupby (e.g. ' model')
    colname   –– the values we want to compare (e.g. 'perf')
    """
    pdgrouped = pdframe.groupby(group_col)[colname]
    max_row = get_trans_col(pdframe, pdgrouped, 'max', colname)
    min_row = get_trans_col(pdframe, pdgrouped, 'min', colname)
    
    print("\nMean performance of models is: ")
    print(pdgrouped.mean())
    print("\nStd performance of models is: ")
    print(pdgrouped.std())
    print("\nMax performance of models is: ")
    print(max_row)
    print("\nMin performance of models is: ")
    print(min_row)

## Analyzing: given_rewards

In [None]:
given_rewards = get_rows_by_col(completed_frame, ' algorithm', ' given_rewards')

In [None]:
# Get nontrivial columns of given_rewards, and compare by performance
cols = get_nontrivial_columns(given_rewards)
print_basic_stats(given_rewards[cols], ' model', 'perf')

## Analyzing: no_rewards

In [None]:
no_rewards = get_rows_by_col(completed_frame, ' algorithm', ' no_rewards')

In [None]:
cols = get_nontrivial_columns(no_rewards)
print_basic_stats(no_rewards[cols], ' model', 'perf')

## Analyzing: boltzmann_planner

In [None]:
b_planner = get_rows_by_col(completed_frame, ' algorithm', ' boltzmann_planner')

In [None]:
cols = get_nontrivial_columns(b_planner)
print_basic_stats(b_planner[cols], ' model', 'perf')

## Analyzing: vi_inference

In [None]:
vi_alg = get_rows_by_col(completed_frame, ' algorithm', ' vi_inference')
cols = get_nontrivial_columns(vi_alg)
print_basic_stats(vi_alg[cols], ' model', 'perf')