In [1]:
import pickle
import os
import pandas as pd
import numpy as np
from collections import defaultdict
from tensorboard.backend.event_processing.event_accumulator import EventAccumulator

  from ._conv import register_converters as _register_converters


In [2]:
# Architecture: want to have a data loader class 
# and a function which takes in a certain object, 
# ideally relatively standard tensorflow file pointer
# Then constructs this analysis

# Seperate note, for the graph/dataset code, we want to be able 
# to have a data loader and a graph class too. 

In [3]:
# walk the experiment directory
all_experiments = {}
for root, dirs, files in os.walk('../experiments/experiments'):
    experiment = defaultdict(dict)
    for f in files:
        key = '/'.join(root.split('/')[1:-1])    
        trial = root.split('/')[-1]
        if f == "options.pkl":
            opts = vars(pickle.load(open(os.path.join(root, f), 'rb')))
            experiment = all_experiments.get(key, defaultdict(dict))
            experiment[trial]['opts'] = opts
            all_experiments[key] = experiment
        elif f.startswith("event"):
            experiment = all_experiments.get(key, defaultdict(dict))
            experiment[trial]['event_accumulator'] = EventAccumulator(os.path.join(root, f))
            experiment[trial]['experiment_hash'] = root.split('/')[-2]
            all_experiments[key] = experiment


In [4]:
def filter_experiments(experiments):
    accumulators = []
    for exp in experiments.values():
        if exp.values()[0].get('event_accumulator') is not None:
            accumulators.append(exp)
    return accumulators

In [5]:
experiments = filter_experiments(all_experiments)

In [6]:
def find_best_epoch(events):
    x = np.array([e.value for e in events])
    best_epoch = x.argmax(axis=0)
    best_value = x[best_epoch]
    return best_epoch, best_value

In [7]:
my_tags = ['auc_train', 'auc_valid', 'auc_test', 'acc_train', 'acc_valid', 'acc_test']
options_tags = ['weight_decay', 'epoch', 'lr', 'train_ratio', 'dataset', 'dropout', 'batch_size', 'pool_graph', 'use_gate', 'num_channel', 'add_self', 'l1_loss_lambda', 'num_layer', 'graph', 'model', 'nb_nodes']


""" This function parses the dataframe for a given trial and generates a little dataframe"""
def append_trial_df(trial, df=None):
    # Setup the tags
    scalar_tags = trial['event_accumulator'].Reload().Tags()['scalars']
    # opt_tags = trial['opts'].keys() # ALL OPTIONS TAGS
    event_accumulator = trial['event_accumulator'].Reload()

    if df.empty:
        df = pd.DataFrame(columns=options_tags + my_tags)

    # Get the epoch with the best AUC for the valid set
    best_epoch, value = find_best_epoch(event_accumulator.Scalars("auc_valid"))
    row_number = len(df.index)
    df.loc[row_number, "auc_valid"] = value
    df.loc[row_number, "best_epoch"] = best_epoch
    df.loc[row_number, "experiment_hash"] = trial['experiment_hash']

    # set the other values from that epoch
    for tag in my_tags:
        df.loc[row_number, tag] = event_accumulator.Scalars(tag)[best_epoch].value
        df[tag] = df[tag].astype(float)
    for tag in options_tags:
        df.loc[row_number, tag] = trial['opts'][tag]

    return df

In [8]:
df = pd.DataFrame()
for exp in experiments:
    for key, trial in exp.iteritems():
        df = append_trial_df(trial, df)

In [31]:
g1 = df[my_tags].groupby(df["experiment_hash"]).mean()
g2 = df[options_tags].groupby(df['experiment_hash']).mean()
group_stats = pd.concat([mean_groups, opts], axis=1, join='inner')
group_stats

Unnamed: 0_level_0,auc_train,auc_valid,auc_test,acc_train,acc_valid,acc_test,weight_decay,epoch,dropout,batch_size,use_gate,num_channel,add_self,l1_loss_lambda,num_layer,nb_nodes
experiment_hash,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
19759111840262d2b340fe140a26cc5b,0.965735,0.904863,0.892541,0.941111,0.846667,0.846667,0.0,10.0,False,100.0,0.0,32.0,True,0.0,1.0,100.0
878758a6e47d6f4c3f50b491f8a837ee,1.0,0.705086,0.729268,1.0,0.663333,0.668333,0.0,10.0,False,100.0,0.0,32.0,True,0.0,1.0,1000.0
