# Compare results

## Setup

In [1]:
import pandas as pd
import numpy as np
import pickle 
from functools import reduce
import utils
import os

RESULTS_DIR = "results"

## Load and merge dataframes with predictions

In [3]:
results_lst = []
for filename in os.listdir(RESULTS_DIR):
    path = f"{RESULTS_DIR}/{filename}"
    with open(path, "rb") as f:
        print(f"Reading data from {path}")
        results_lst.append(pickle.load(f))

Reading data from results/dtw_results_for_HMM_1658502416_00.pkl
Reading data from results/hmm_results_for_ARIMA_1658502456_00.pkl
Reading data from results/hmm_results_for_HMM_1658502416_00.pkl
Reading data from results/dtw_results_for_ARIMA_1658502456_00.pkl
dict_keys(['data_filename', 'time_index', 'split_id', 'results_df', 'accuracies', 'total_time'])
dict_keys(['data_filename', 'time_index', 'split_id', 'results_df', 'accuracies', 'total_time'])
dict_keys(['data_filename', 'time_index', 'split_id', 'results_df', 'accuracies', 'total_time'])
dict_keys(['data_filename', 'time_index', 'split_id', 'results_df', 'accuracies', 'total_time'])


In [6]:
# Make sure all results are in the same format
for r in results_lst:
    print(r.keys())

dict_keys(['data_filename', 'time_index', 'split_id', 'results_df', 'accuracies', 'total_time'])
dict_keys(['data_filename', 'time_index', 'split_id', 'results_df', 'accuracies', 'total_time'])
dict_keys(['data_filename', 'time_index', 'split_id', 'results_df', 'accuracies', 'total_time'])
dict_keys(['data_filename', 'time_index', 'split_id', 'results_df', 'accuracies', 'total_time'])


In [4]:
df_lst = [r["results_df"] for r in results_lst]
merged_results_df = reduce(
    lambda df1, df2: pd.merge(
                              df1, df2, 
                              on = ["sample_id", "true_label", "sample_len"], 
                              how = "outer"), 
                    df_lst)

In [5]:
merged_results_df

Unnamed: 0,sample_id,true_label,sample_len,pred_label_1NN_x,pred_label_5NN_x,pred_AIC_x,pred_BIC_x,pred_AIC_y,pred_BIC_y,pred_label_1NN_y,pred_label_5NN_y
0,0,0,32,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2,0,20,1.0,0.0,,,0.0,0.0,,
2,4,0,23,1.0,0.0,,,0.0,0.0,,
3,5,0,41,0.0,0.0,,,0.0,0.0,,
4,6,0,20,0.0,0.0,,,1.0,1.0,,
...,...,...,...,...,...,...,...,...,...,...,...
506,309,8,50,,,8.0,8.0,,,7.0,7.0
507,310,8,44,,,7.0,7.0,,,6.0,7.0
508,312,8,41,,,6.0,6.0,,,6.0,6.0
509,313,8,35,,,8.0,8.0,,,8.0,8.0


## Show accuracy and train/test subsets size

In [None]:
for r in results_lst:
    for lab, acc in r["accuracies"].items():
        print(f"{lab} accuracy: {acc:.3f}")

In [None]:
data_filename = results_lst[0]["data_filename"]
with open(data_filename, "rb") as f:
    data = pickle.load(f)
print(data["metadata"])

## Find wrong labels

In [None]:
wrong_samples = {}
for col in utils.find_colnames_with(merged_results_df):
    is_wrong = merged_results_df[col] != merged_results_df["true_label"]
    ids = np.where(is_wrong)[0]
    wrong_samples[col] = sorted(merged_results_df.iloc[ids]["sample_id"].values)

for col, ids in wrong_samples.items():
    print(f"Ids of {len(ids)} misclassified samples in column {col}:")
    print(ids)

### Plots