In [43]:
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio
import pandas as pd
import numpy as np


import sys
sys.path.insert(0, "../scripts")
import ml_window as m 
import helpers as h

import json 


In [44]:
# macro f1 score...
with open("../constants/svm_metrics_230704.json") as f:
    svm_data =json.load(f)

with open("../constants/ewm_metrics_230704.json") as f:
    ewm_data =json.load(f)

In [45]:
def safeget(dct, keys_arr):
    assert type(keys_arr) == list
    keys = keys_arr
    for key in keys:
        try:
            dct = dct[key]
        except KeyError:
            return False
    return dct

In [46]:
safeget(svm_data["exp_A"]["0"], ["metrics"])

{'standard': {'0.0': {'precision': 0.4896907216494845,
   'recall': 0.5974842767295597,
   'f1-score': 0.5382436260623229,
   'support': 159},
  '1.0': {'precision': 0.6649214659685864,
   'recall': 0.5619469026548672,
   'f1-score': 0.6091127098321343,
   'support': 226},
  'accuracy': 0.5766233766233766,
  'macro avg': {'precision': 0.5773060938090354,
   'recall': 0.5797155896922135,
   'f1-score': 0.5736781679472286,
   'support': 385},
  'weighted avg': {'precision': 0.5925534442887495,
   'recall': 0.5766233766233766,
   'f1-score': 0.5798446986129135,
   'support': 385}},
 'drdr': {'# true open instance': 9,
  '# false open instance': 9,
  'true open time': '2 days 07:30:00',
  'false open time': '1 days 16:45:00',
  'bounded acc': 0.1,
  'unbounded acc': -10.88},
 'custom': {'hits/guesses': 0.108,
  'hits/actions': 0.308,
  '(hits + near hits)/guesses': 0.595,
  '(hits + near hits)/actions': 1.692,
  'misses/guesses': 0.405,
  'guesses/actions': 2.846,
  'actions': 13,
  'guess

In [54]:
sort_key_dict = {
        "f1": ["standard", "macro avg", "f1-score"],
        "hits/guesses": ["custom", "(hits + near hits)/guesses",], #hits and near hits/guesses
        "guesses/actions": ["custom", "guesses/actions",], #guesses/actions,
        "opening_acc": ["drdr", "bounded acc"] ,#opening accuracy
        "unbounded_opening_acc": ["drdr", "unbounded acc"], 
        "true_time": ["drdr", "true open time"],
        "false_time": ["drdr", "false open time"] 
    } 

def sort_and_return_quick_result(all_results, data_type="svm", sort_key="f1", key_dict=sort_key_dict):   

    key_arr = key_dict[sort_key]

    
    if data_type=="svm":
        sorted_results = sorted(all_results.items(), key=lambda x:safeget(x[1]["metrics"], key_arr), reverse=True)
        quick_res = [
            {
            "name": v["name"],
            "val": safeget(v["metrics"], key_arr)
            }
            for k,v in sorted_results]
    else:
        sorted_results = sorted(all_results.items(), key=lambda x:safeget(x[1], key_arr), reverse=True)
        quick_res = [
        {
            "name": k,
            "val":  safeget(v, key_arr),
        }
        for k,v in sorted_results]
    return quick_res, sorted_results 

In [55]:
quick, slow = sort_and_return_quick_result(svm_data["exp_A"], "svm", "f1")
quick[0:3]

[{'name': ['dt_meas_temp', 'dt_meas_rh'], 'val': 0.7094221079901271},
 {'name': ['meas_temp', 'meas_rh', 'dt_meas_temp', 'dt_amb_rh'],
  'val': 0.6989508326023839},
 {'name': ['meas_temp', 'meas_rh', 'dt_meas_temp', 'dt_amb_temp'],
  'val': 0.6937151949085122}]

In [56]:
quick, slow = sort_and_return_quick_result(ewm_data["exp_A"], "ewm")
quick

[{'name': 'z=1', 'val': 0.7242424242424242},
 {'name': 'z=2', 'val': 0.5824526404952461}]

In [57]:
dif_sort_dfs = {}

for sort_key in sort_key_dict.keys():
    exps = list(ewm_data.keys())

    data_pts = {}
    for exp in exps:
        data_pts[exp] = []
        for data_type, data in zip(["ewm", "svm"], [ewm_data, svm_data]):
            quick, slow = sort_and_return_quick_result(data[exp], data_type, sort_key)
            if data_type == "ewm":
                res = list(filter(lambda x: True if x["name"] == "z=1" else False, quick))
                data_pts[exp].append(res[0])
                data_pts[exp][0]["chart_name"] = "EWM"
            else:
                data_pts[exp].extend(quick[0:3])
                for i in range(3):
                    data_pts[exp][i+1]["chart_name"] = f"SVM_{i+1}"

    for k,v in data_pts.items():
        for small_dict in v:
            small_dict["exp"] = k

    data_list = sum(data_pts.values(), [])

    df = pd.DataFrame(data_list)

    dif_sort_dfs[sort_key] = df


In [74]:
dif_sort_dfs["true_time"]["val"] = pd.to_timedelta(dif_sort_dfs["true_time"]["val"])

dif_sort_dfs["false_time"]["val"] = pd.to_timedelta(dif_sort_dfs["false_time"]["val"])

In [73]:
dif_sort_dfs["true_time"]["val"]

0    2 days 22:15:00
1    2 days 20:30:00
2    2 days 19:30:00
3    2 days 19:00:00
4    9 days 02:45:00
5    8 days 07:30:00
6    8 days 01:30:00
7    7 days 20:00:00
8    1 days 09:30:00
9    1 days 23:30:00
10   1 days 22:45:00
11   1 days 22:15:00
Name: val, dtype: timedelta64[ns]

In [75]:
dif_sort_dfs["false_time"]["val"]

0    1 days 02:00:00
1    2 days 09:00:00
2    2 days 04:30:00
3    2 days 03:45:00
4    5 days 08:15:00
5    7 days 22:00:00
6    7 days 19:45:00
7    7 days 19:00:00
8    1 days 14:45:00
9    2 days 07:45:00
10   1 days 15:00:00
11   1 days 14:45:00
Name: val, dtype: timedelta64[ns]

In [77]:
dif_sort_dfs["f1"]

Unnamed: 0,name,val,chart_name,exp
0,z=1,0.724242,EWM,exp_A
1,"[dt_meas_temp, dt_meas_rh]",0.709422,SVM_1,exp_A
2,"[meas_temp, meas_rh, dt_meas_temp, dt_amb_rh]",0.698951,SVM_2,exp_A
3,"[meas_temp, meas_rh, dt_meas_temp, dt_amb_temp]",0.693715,SVM_3,exp_A
4,z=1,0.461721,EWM,exp_B
5,[amb_rh],0.444785,SVM_1,exp_B
6,[amb_temp],0.423841,SVM_2,exp_B
7,"[amb_temp, dt_meas_temp, dt_amb_temp, dt_amb_rh]",0.419435,SVM_3,exp_B
8,z=1,0.437793,EWM,exp_C
9,"[amb_temp, amb_rh]",0.625829,SVM_1,exp_C


In [78]:
dif_sort_dfs["true_time"]

Unnamed: 0,name,val,chart_name,exp
0,z=1,2 days 22:15:00,EWM,exp_A
1,"[dt_meas_temp, dt_meas_rh]",2 days 20:30:00,SVM_1,exp_A
2,"[meas_temp, meas_rh, dt_meas_temp, dt_amb_rh]",2 days 19:30:00,SVM_2,exp_A
3,"[meas_temp, meas_rh, dt_meas_temp, dt_amb_temp]",2 days 19:00:00,SVM_3,exp_A
4,z=1,9 days 02:45:00,EWM,exp_B
5,[amb_rh],8 days 07:30:00,SVM_1,exp_B
6,[dt_amb_rh],8 days 01:30:00,SVM_2,exp_B
7,[amb_temp],7 days 20:00:00,SVM_3,exp_B
8,z=1,1 days 09:30:00,EWM,exp_C
9,"[amb_temp, amb_rh]",1 days 23:30:00,SVM_1,exp_C


In [79]:
for k, v in dif_sort_dfs.items():

    fig = px.strip(v, x="chart_name", y="val", color="exp", stripmode='overlay')
    fig.update_layout(title=k)
    fig.show()
