# General Imports

In [41]:
import pandas as pd
import pickle
import numpy as np
import seaborn as sns

from scipy.stats import ttest_ind
from itertools import combinations

# Loading Data

In [None]:
def load_dict(filename, verbose=False):
    '''
    Loads dictionary of metrics from given filename
    
    Args:
    - filename (str): file to load
    - verbose=False (bool): sepcifies if exact filename should be used. if False, .pickle extension appended to filename if not already present
    Return
    - dictionary (dict): data found in file
    - None (None): return None val in case exception is raised and dictionary file does not exist
    '''
    if (not verbose) and ('.pickle' not in filename):
        filename += '.pickle'

    try:
        with open(filename, 'rb') as pickle_file: 
            dictionary = pickle.load(pickle_file) 
    except FileNotFoundError as e:
        print(e)
        return None
    
    return dictionary

In [None]:
# Initialize loading variables
data_dict = {}
datasets = ['yelp', 'sub_ob', 'clickbait']
models = ['svm', 'logreg', 'randomforest']
index = 2 # Loads final (of 3) data checkpoint for data

# Load data
for dataset in datasets:
    for model in models:
        data_dict[(model, dataset)] = load_dict('../checkpoints/{model}/{model}_{dataset}_{index}'.format(model=model, dataset=dataset, index=index))

# Analysis

In [None]:
TEST_METRICS = ['acc_test', 'precision_test', 'recall_test', 'f1_test']
TRAIN_METRICS = ['acc_train', 'precision_train', 'recall_train', 'f1_train']
DATASETS = ['yelp', 'sub_ob', 'clickbait']

## Test/train set performance (across 3 trials) for each algorithm/dataset combo (Raw + Mean)

In [None]:
algo_data_test_dict = {}
algo_data_train_dict = {}

for (algorithm, dataset), metric_dict in data_dict.items():
    for metric in TEST_METRICS:
        values = [value for index in range(3) for value in metric_dict[(dataset, index)][metric]]
        algo_data_test_dict[(algorithm, dataset, metric)] = {'mean': np.mean(values), 'values': values}
    for metric in TRAIN_METRICS:
        values = [value for index in range(3) for value in metric_dict[(dataset, index)][metric]]
        algo_data_train_dict[(algorithm, dataset, metric)] = {'mean': np.mean(values), 'values': values}

## Mean test set performance (across 3 trials x 3 data sets) for each algorithm

In [None]:
mean_algo_dict = {}

for (algorithm, dataset, metric), value_dict in mean_algo_data_dict.items():
    try:
        mean_algo_dict[(algorithm, metric)].append(value_dict['values'])
    except KeyError:
        mean_algo_dict[(algorithm, metric)] = value_dict['values']
        
for key in mean_algo_dict.keys():
    mean_algo_dict[key] = np.mean(mean_algo_dict[key])

## t-test

In [None]:
[ttest_ind(combo1['values'], combo2['values']) for combo1, combo2 in combinations(algo_data_test_dict,r=2)]

keys, values = algo_data_test_dict.items()

for key_1, value_dict_1 in algo_data_test_dict.items():
    for key_2, value_dict_2 in algo_data_test_dict.items():
        if key_1 != key_2:
            stat, p = ttest_ind(value_dict_1['values'], value_dict_2['values'])
            print('test: {}\nstat: {}\np: {}'.format((key_1, key_2), stat, p))

## Heat Map

In [9]:
def draw_heatmap(errors, D_list, title):
    plt.figure(figsize = (2,4))
    ax = sns.heatmap(errors, annot=True, fmt='.3f', yticklabels=D_list, xticklabels=[])
    ax.collections[0].colorbar.set_label('error')
    ax.set(ylabel='max depth D')
    bottom, top = ax.get_ylim()
    ax.set_ylim(bottom + 0.5, top - 0.5)
    plt.title(title)
    plt.show()

In [29]:
def get_mean(*lists):
    '''
    Computes element-wise mean for each element of the passed in lists. 
    If lists are uneven, shrinks lists to shortest size
    Args: 
    - *lists (list-like): lists to perform averaging over
    Returns:
    - list of element-wise means
    '''
    return [np.mean(values) for values in zip(*lists)]

assert get_mean([1,2,3], [4,5,6]) == [2.5,3.5,4.5]
assert get_mean([1,1,1], []) == []
assert get_mean([0,0,0], [2,4,6]) == [1,2,3]

### SVM

In [None]:
# Gets the list of validation performance for each paremeter combo across all data sets and trials for SVM
svm_validation_means = get_mean(*[data_dict[('svm', dataset)][('svm', i)]['cv_results']['mean_test_score'] 
                    for dataset in DATASETS for i in range(3)])

### Logistic Regression

### Random Forest