# Compare results

## Setup

In [1]:
import pandas as pd
import numpy as np
import pickle 
from functools import reduce
import utils
import os

RESULTS_DIR = "results"

## Load results of classification

In [2]:
results_lst = []
for filename in os.listdir(RESULTS_DIR):
    path = f"{RESULTS_DIR}/{filename}"
    with open(path, "rb") as f:
        print(f"Reading data from {path}")
        results_lst.append(pickle.load(f))

Reading data from results/dtw_results_for_HMM_1658502416_00.pkl
Reading data from results/hmm_results_for_ARIMA_1658502456_00.pkl
Reading data from results/hmm_results_for_HMM_1658502416_00.pkl
Reading data from results/dtw_results_for_ARIMA_1658502456_00.pkl


In [3]:
# Make sure all results are in the same format
for r in results_lst:
    print(r.keys())

dict_keys(['data_filename', 'classificator', 'time_index', 'split_id', 'predictions_dfs', 'accuracies', 'total_time'])
dict_keys(['data_filename', 'classificator', 'time_index', 'split_id', 'predictions_dfs', 'accuracies', 'total_time'])
dict_keys(['data_filename', 'classificator', 'time_index', 'split_id', 'predictions_dfs', 'accuracies', 'total_time'])
dict_keys(['data_filename', 'classificator', 'time_index', 'split_id', 'predictions_dfs', 'accuracies', 'total_time'])


In [4]:
for i, r in enumerate(results_lst):
    with open(r["data_filename"], "rb") as f:
        data = pickle.load(f)
        print(f"Info about generated data in {i}:")
        print(data.keys())

Info about generated data in 0:
dict_keys(['generating_model', 'data_filename', 'time_index', 'models_lst', 'labels_df', 'all_X_samples', 'all_Z_samples', 'indices_splits_lst', 'metadata'])
Info about generated data in 1:
dict_keys(['generating_model', 'data_filename', 'time_index', 'models_lst', 'labels_df', 'all_X_samples', 'indices_splits_lst', 'metadata'])
Info about generated data in 2:
dict_keys(['generating_model', 'data_filename', 'time_index', 'models_lst', 'labels_df', 'all_X_samples', 'all_Z_samples', 'indices_splits_lst', 'metadata'])
Info about generated data in 3:
dict_keys(['generating_model', 'data_filename', 'time_index', 'models_lst', 'labels_df', 'all_X_samples', 'indices_splits_lst', 'metadata'])


## Compare accuracies

- `gen_with`: type of model the data was generated with ("HMM" / "ARIMA" / "ARIMA_all_statio" etc)
- `n_train`: number of train samples per model
- `n_test`: number of test samples per model
- `min_len`: min sample size parameter used in generation
- `max_len`: max sample size parameter used in generation
- `cls_with`: type of model used to classify samples ("HMM" / "DTW")
- `variant`: variant of classification, e.g. for HMM: "AIC" or "BIC", for DTW: "1NN" or "5NN" derived as a key from `predictions_dfs` list
- `acc`: accuracy of predictions


In [12]:
results_time_id = []
gen_with = []
n_train = []
n_test = []
min_len = []
max_len = []
cls_with = []
variants = []
accs = []

for r in results_lst:
    for variant, acc in r["accuracies"].items():
        with open(r["data_filename"], "rb") as f:
            data = pickle.load(f)
            results_time_id.append(r["time_index"])
            gen_with.append(data["generating_model"])
            n_train.append(data["metadata"]["N_TRAIN_SAMPLES_PER_MODEL"])
            n_test.append(data["metadata"]["N_TEST_SAMPLES_PER_MODEL"])
            min_len.append(data["metadata"]["MIN_SAMPLE_LEN"])
            max_len.append(data["metadata"]["MAX_SAMPLE_LEN"])
            cls_with.append(r["classificator"])
            variants.append(variant)
            accs.append(acc)

pd.DataFrame({
    "results_time_id": results_time_id,
    "gen_id": gen_with,
    "n_train": n_train,
    "n_test": n_test,
    "min_len": min_len,
    "max_len": max_len,
    "cls_with": cls_with,
    "variant": variants,
    "acc": accs
})
            

Unnamed: 0,results_time_id,gen_id,n_train,n_test,min_len,max_len,cls_with,variant,acc
0,1658502416,HMM,5,30,20,50,DTW,1NN,0.82963
1,1658502416,HMM,5,30,20,50,DTW,5NN,0.733333
2,1658502456,ARIMA,5,30,20,50,HMM,AIC,0.67037
3,1658502456,ARIMA,5,30,20,50,HMM,BIC,0.67037
4,1658502416,HMM,5,30,20,50,HMM,AIC,0.97037
5,1658502416,HMM,5,30,20,50,HMM,BIC,0.97037
6,1658502456,ARIMA,5,30,20,50,DTW,1NN,0.555556
7,1658502456,ARIMA,5,30,20,50,DTW,5NN,0.522222
