In [1]:
import pickle
from collections import defaultdict
from framework.losses import interpoint_distance
import os
import pandas as pd

In [24]:
RESULTS_DIR = "./results/exp1"
N_RESULT_FILES = 10

def _load(name):
    with open(name, "rb") as f:
        return pickle.load(f)
K_VALUES = [5, 10, 20, 30, 40, 50, 60]
EPSILON_VALUES = [0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1]

In [70]:
def create_avg_data(RESULTS_DIR, N_RESULT_FILES):
    print("Listing all result files...")
    for filename in os.listdir(RESULTS_DIR):
        print(filename)
    
    if len(os.listdir(RESULTS_DIR)) != N_RESULT_FILES:
        print("Some runs are missing please check the results folder")
    
    print("---------------------------------------------------------")
    
    print("Combining files now....")
    exp2_runs = {}
    i=0
    print("Loading Data...")
    for run in os.listdir(RESULTS_DIR):
        result = _load(os.path.join(RESULTS_DIR, run))
        result = pd.DataFrame(result)
        result = result.drop(['emb_x', 'labels'], axis=1)
        
        exp2_runs[i] = result
        i +=1
        
    # ---------------------------------------------------------------------------------------- 
    
    # Scatter Table
    avg_scatter = exp2_runs[0].copy()
    avg_scatter = avg_scatter[avg_scatter['convergence'] == 'scatter']
    avg_scatter['loss'] = 0
    
    #Epsilon Precision-Recall Table:
    avg_epsilon_pr = exp2_runs[0].copy()
    avg_epsilon_pr = avg_epsilon_pr[avg_epsilon_pr['convergence'] == 'epsilon_precision_recall']
    for entry in range(len(avg_epsilon_pr)):
        for j in range(len(EPSILON_VALUES)):
            avg_epsilon_pr['loss'].iloc[entry][0][j] = 0
            avg_epsilon_pr['loss'].iloc[entry][1][j] = 0
            
    #NN and FN Precision Table
    avg_nn_pr = exp2_runs[0].copy()
    avg_nn_pr = avg_nn_pr[avg_nn_pr['convergence'] == 'nn_precision']
    avg_fn_pr = exp2_runs[0].copy()
    avg_fn_pr = avg_fn_pr[avg_fn_pr['convergence'] == 'fn_precision']
    
    for entry in range(len(avg_nn_pr)):
        for j in range(len(K_VALUES)):
            avg_nn_pr['loss'].iloc[entry][j] = 0
            avg_fn_pr['loss'].iloc[entry][j] = 0
    
    
    # Summing up
    for ind in range(N_RESULT_FILES):
        df = exp2_runs[ind]
        
        # Combine Scatter:
        avg_scatter['loss'] += df[df['convergence'] == 'scatter']['loss']
        
        # Combine Epsilon Precision Recall:
        df_ep = df[df['convergence'] == 'epsilon_precision_recall']
        
        for entry in range(len(avg_epsilon_pr)):
            for j in range(len(EPSILON_VALUES)):
                avg_epsilon_pr['loss'].iloc[entry][0][j] += df_ep['loss'].iloc[entry][0][j]
                avg_epsilon_pr['loss'].iloc[entry][1][j] += df_ep['loss'].iloc[entry][1][j]
                
        # Combine NN and FN Precision:
        df_nn = df[df['convergence'] == 'nn_precision']
        df_fn = df[df['convergence'] == 'fn_precision']
        
        for entry in range(len(avg_nn_pr)):
            for j in range(len(K_VALUES)):
                avg_nn_pr['loss'].iloc[entry][j] += df_nn['loss'].iloc[entry][j]
                avg_fn_pr['loss'].iloc[entry][j] += df_fn['loss'].iloc[entry][j]
    
    #Scaling down to get average:
    avg_scatter['loss'] = avg_scatter['loss'] / N_RESULT_FILES
    
    for entry in range(len(avg_epsilon_pr)):
        for j in range(len(EPSILON_VALUES)):
            avg_epsilon_pr['loss'].iloc[entry][0][j] = avg_epsilon_pr['loss'].iloc[entry][0][j]/N_RESULT_FILES
            avg_epsilon_pr['loss'].iloc[entry][1][j] = avg_epsilon_pr['loss'].iloc[entry][1][j]/N_RESULT_FILES
            
    for entry in range(len(avg_nn_pr)):
            for j in range(len(K_VALUES)):
                avg_nn_pr['loss'].iloc[entry][j] = avg_nn_pr['loss'].iloc[entry][j]/N_RESULT_FILES
                avg_fn_pr['loss'].iloc[entry][j] = avg_fn_pr['loss'].iloc[entry][j]/N_RESULT_FILES
    

    
    
        
    return avg_scatter, avg_epsilon_pr, avg_nn_pr, avg_fn_pr
        
        

In [71]:
t,s,u,v = create_avg_data(RESULTS_DIR, N_RESULT_FILES)

Listing all result files...
experimentone_9.pkl
experimentone_8.pkl
experimentone_10.pkl
experimentone_5.pkl
experimentone_4.pkl
experimentone_6.pkl
experimentone_7.pkl
experimentone_3.pkl
experimentone_2.pkl
experimentone_1.pkl
---------------------------------------------------------
Combining files now....
Loading Data...


In [74]:
t.to_csv('exp1_scatter.csv')
s.to_csv('exp1_epsilon_pr.csv')
u.to_csv('exp1_nn_pr.csv')
v.to_csv('exp1_fn_pr.csv')

In [90]:
s

Unnamed: 0,size,sampling,dataset,algorithm,loss,convergence
3,0.3,random,mnist,umap,"([0.8907001559267632, 0.8395912859999799, 0.67...",epsilon_precision_recall
7,0.3,random,mnist,tsne,"([0.89329308380461, 0.852466086788907, 0.69762...",epsilon_precision_recall
11,0.3,random,olivetti,umap,"([0.8924999999999974, 0.8924999999999974, 0.89...",epsilon_precision_recall
15,0.3,random,olivetti,tsne,"([0.8924999999999974, 0.8924999999999974, 0.89...",epsilon_precision_recall
19,0.3,stratified,mnist,umap,"([0.8894795901156856, 0.8344085281270537, 0.66...",epsilon_precision_recall
...,...,...,...,...,...,...
239,1.0,random,olivetti,tsne,"([0.8977500000000018, 0.8977500000000018, 0.89...",epsilon_precision_recall
243,1.0,stratified,mnist,umap,"([0.8751954425990973, 0.7710669702704275, 0.53...",epsilon_precision_recall
247,1.0,stratified,mnist,tsne,"([0.8751954425990973, 0.7710669702704275, 0.53...",epsilon_precision_recall
251,1.0,stratified,olivetti,umap,"([0.8977500000000018, 0.8977500000000018, 0.89...",epsilon_precision_recall
