# Compare results

## Setup

In [8]:
import pandas as pd
import numpy as np
import pickle 
from functools import reduce
import utils
import os

RESULTS_DIR = "results"

## Load and merge dataframes with predictions

In [9]:
results_lst = []
for filename in os.listdir(RESULTS_DIR):
    path = f"{RESULTS_DIR}/{filename}"
    with open(path, "rb") as f:
        print(f"Reading data from {path}")
        results_lst.append(pickle.load(f))

results_lst[0].keys()

Reading data from results/dtw_results_1658228687_00.pkl
Reading data from results/hmm_results_1658228687_00.pkl


dict_keys(['data_filename', 'time_index', 'split_id', 'results_df', 'accuracies', 'total_time'])

In [10]:
df_lst = [r["results_df"] for r in results_lst]
merged_results_df = reduce(
    lambda df1, df2: pd.merge(
                              df1, df2, 
                              on = ["sample_id", "true_label", "sample_len"], 
                              how = "outer"), 
                    df_lst)

In [11]:
merged_results_df

Unnamed: 0,sample_id,true_label,sample_len,pred_label_1NN,pred_label_5NN,pred_AIC,pred_BIC
0,0,0,32,0,0,0,0
1,1,0,20,0,0,0,0
2,2,0,20,0,0,0,0
3,3,0,23,0,0,1,1
4,4,0,26,0,0,0,0
...,...,...,...,...,...,...,...
265,310,8,23,8,8,8,8
266,311,8,29,8,8,8,8
267,312,8,38,8,8,8,8
268,313,8,44,8,8,8,8


## Show accuracy and train/test subsets size

In [12]:
for r in results_lst:
    for lab, acc in r["accuracies"].items():
        print(f"{lab} accuracy: {acc:.3f}")

pred_label_1NN accuracy: 0.819
pred_label_5NN accuracy: 0.800
pred_AIC accuracy: 0.970
pred_BIC accuracy: 0.967


In [13]:
data_filename = results_lst[0]["data_filename"]
with open(data_filename, "rb") as f:
    data = pickle.load(f)
print(data["metadata"])

{'N_TRAIN_SAMPLES_PER_MODEL': 5, 'N_TEST_SAMPLES_PER_MODEL': 30, 'MAX_SAMPLE_LEN': 50, 'MIN_SAMPLE_LEN': 20}


## Find wrong labels

In [14]:
wrong_samples = {}
for col in utils.find_colnames_with(merged_results_df):
    is_wrong = merged_results_df[col] != merged_results_df["true_label"]
    ids = np.where(is_wrong)[0]
    wrong_samples[col] = sorted(merged_results_df.iloc[ids]["sample_id"].values)

for col, ids in wrong_samples.items():
    print(f"Ids of {len(ids)} misclassified samples in column {col}:")
    print(ids)

Ids of 49 misclassified samples in column pred_label_1NN:
[6, 11, 13, 21, 28, 34, 37, 40, 41, 46, 47, 105, 107, 108, 109, 110, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122, 124, 125, 127, 128, 129, 130, 131, 132, 133, 136, 137, 138, 139, 211, 241, 243, 249, 250, 251, 252, 256, 267, 278]
Ids of 54 misclassified samples in column pred_label_5NN:
[30, 34, 37, 38, 40, 41, 45, 46, 47, 49, 55, 61, 100, 102, 105, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 120, 121, 122, 124, 125, 127, 128, 129, 130, 131, 132, 133, 136, 137, 138, 139, 211, 222, 241, 249, 250, 252, 260, 267, 276, 277, 278]
Ids of 8 misclassified samples in column pred_AIC:
[3, 13, 15, 28, 30, 252, 263, 265]
Ids of 9 misclassified samples in column pred_BIC:
[3, 13, 15, 16, 28, 30, 252, 263, 265]


### Plots