In [1]:
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio
import pandas as pd
import numpy as np

from IPython.display import display, Latex


import sys
sys.path.insert(0, "../scripts")
import ml_window as m 
import helpers as h

import json 
# import streamlit as st


In [2]:
# macro f1 score...
with open("../constants/svm_metrics_230704.json") as f:
    svm_data =json.load(f)

with open("../constants/ewm_metrics_230704.json") as f:
    ewm_data =json.load(f)

In [3]:
def safeget(dct, keys_arr):
    assert type(keys_arr) == list
    keys = keys_arr
    for key in keys:
        try:
            dct = dct[key]
        except KeyError:
            return False
    return dct

In [4]:
safeget(svm_data["exp_A"]["0"], ["metrics"])

{'standard': {'0.0': {'precision': 0.4896907216494845,
   'recall': 0.5974842767295597,
   'f1-score': 0.5382436260623229,
   'support': 159},
  '1.0': {'precision': 0.6649214659685864,
   'recall': 0.5619469026548672,
   'f1-score': 0.6091127098321343,
   'support': 226},
  'accuracy': 0.5766233766233766,
  'macro avg': {'precision': 0.5773060938090354,
   'recall': 0.5797155896922135,
   'f1-score': 0.5736781679472286,
   'support': 385},
  'weighted avg': {'precision': 0.5925534442887495,
   'recall': 0.5766233766233766,
   'f1-score': 0.5798446986129135,
   'support': 385}},
 'drdr': {'# true open instance': 9,
  '# false open instance': 9,
  'true open time': '2 days 07:30:00',
  'false open time': '1 days 16:45:00',
  'bounded acc': 0.1,
  'unbounded acc': -10.88},
 'custom': {'hits/guesses': 0.108,
  'hits/actions': 0.308,
  '(hits + near hits)/guesses': 0.595,
  '(hits + near hits)/actions': 1.692,
  'misses/guesses': 0.405,
  'guesses/actions': 2.846,
  'actions': 13,
  'guess

In [5]:
sort_key_dict = {
        "f1": ["standard", "macro avg", "f1-score"],
        "hits_guesses_ratio": ["custom", "(hits + near hits)/guesses",], #hits and near hits/guesses
        "guesses_actions_ratio": ["custom", "guesses/actions",], #guesses/actions,
        "opening_acc": ["drdr", "bounded acc"] ,#opening accuracy
        "unbounded_opening_acc": ["drdr", "unbounded acc"], 
        "true_time": ["drdr", "true open time"],
        "false_time": ["drdr", "false open time"] 
    } 

def sort_and_return_quick_result(all_results, data_type="svm", sort_key="f1", key_dict=sort_key_dict):   

    key_arr = key_dict[sort_key]

    
    if data_type=="svm":
        sorted_results = sorted(all_results.items(), key=lambda x:safeget(x[1]["metrics"], key_arr), reverse=True)
        quick_res = [
            {
            "name": v["name"],
            "val": safeget(v["metrics"], key_arr)
            }
            for k,v in sorted_results]
    else:
        sorted_results = sorted(all_results.items(), key=lambda x:safeget(x[1], key_arr), reverse=True)
        quick_res = [
        {
            "name": k,
            "val":  safeget(v, key_arr),
        }
        for k,v in sorted_results]
    return quick_res, sorted_results 

In [6]:
quick, slow = sort_and_return_quick_result(svm_data["exp_A"], "svm", "f1")
quick[0:3]

[{'name': ['dt_meas_temp', 'dt_meas_rh'], 'val': 0.7094221079901271},
 {'name': ['meas_temp', 'meas_rh', 'dt_meas_temp', 'dt_amb_rh'],
  'val': 0.6989508326023839},
 {'name': ['meas_temp', 'meas_rh', 'dt_meas_temp', 'dt_amb_temp'],
  'val': 0.6937151949085122}]

In [7]:
quick, slow = sort_and_return_quick_result(ewm_data["exp_A"], "ewm")
quick

[{'name': 'z=1', 'val': 0.7242424242424242},
 {'name': 'z=2', 'val': 0.5824526404952461}]

In [8]:
dif_sort_dfs = {}

for sort_key in sort_key_dict.keys():
    exps = list(ewm_data.keys())

    data_pts = {}
    for exp in exps:
        data_pts[exp] = []
        for data_type, data in zip(["ewm", "svm"], [ewm_data, svm_data]):
            quick, slow = sort_and_return_quick_result(data[exp], data_type, sort_key)
            if data_type == "ewm":
                res = list(filter(lambda x: True if x["name"] == "z=1" else False, quick))
                data_pts[exp].append(res[0])
                data_pts[exp][0]["chart_name"] = "EWM"
            else:
                data_pts[exp].extend(quick[0:3])
                for i in range(3):
                    data_pts[exp][i+1]["chart_name"] = f"SVM_{i+1}"

    for k,v in data_pts.items():
        for small_dict in v:
            small_dict["exp"] = k

    data_list = sum(data_pts.values(), [])

    df = pd.DataFrame(data_list)

    dif_sort_dfs[sort_key] = df


In [9]:
dif_sort_dfs["true_time"]["val"] = pd.to_timedelta(dif_sort_dfs["true_time"]["val"])

dif_sort_dfs["false_time"]["val"] = pd.to_timedelta(dif_sort_dfs["false_time"]["val"])

In [10]:
latex_names = {
    "meas_temp": "$T_{meas}$",
    "meas_rh": "$RH_{meas}$",

    "amb_temp":"$T_{amb}$",
    "amb_rh": "$RH_{amb}$",

    "dt_meas_temp": "$\\frac{\mathrm{d}}{\mathrm{d}t}T_{meas}$",
    "dt_meas_rh": "$\\frac{\mathrm{d}}{\mathrm{d}t}RH_{meas}$",
    "dt_amb_temp": "$\\frac{\mathrm{d}}{\mathrm{d}t}T_{amb}$",
    "dt_amb_rh": "$\\frac{\mathrm{d}}{\mathrm{d}t}RH_{amb}$",

    "amb_minus_meas_temp": "$T_{amb} - T_{meas}$",
    "amb_minus_meas_rh": "$RH_{amb} - RH_{meas}$",
    "meas_minus_deriv_temp": "$T_{meas} - \\frac{\mathrm{d}}{\mathrm{d}t}T_{meas}$",
    "meas_minus_deriv_rh": "$RH_{meas} - \\frac{\mathrm{d}}{\mathrm{d}t}RH_{meas}$",
}

experiment_names = {
    "exp_A": "Exp A",
    "exp_B": "Exp B",
    "exp_C": "Exp C",
}

In [11]:
def change_name(x):
    if type(x) is list:
        # print(x)
        for ix, item in enumerate(x):
            try:
                x[ix] = latex_names[item] 
            except KeyError:
                print(f"passed on {x}")   
        return x 
    else: 
        # smoothing technique
        return "$\\text{ST}$" 

def join_names(x):
    if type(x) is not list:
        # smoothing technique 
        return x
    if len(x) < 2:
            temp = x[0]
    else:
        temp = "/".join(x)
        temp = temp.replace("$/$", ", ")
    temp = temp.replace("$", "$\\text{SVM: }", 1)
    
    return temp

In [25]:
for k, v in dif_sort_dfs.items():
    print(f"\n {k}")
    # change name to be latex friendly 
    # v["name"] = v["name"].apply(change_name)
    # v["name"] = v["name"].apply(join_names)
    v["exp"] = v["exp"].apply(lambda x: experiment_names[x])


 f1

 hits_guesses_ratio

 guesses_actions_ratio

 opening_acc

 unbounded_opening_acc

 true_time

 false_time


In [26]:
dif_sort_dfs["f1"]

Unnamed: 0,name,val,chart_name,exp
0,$\text{ST}$,0.724242,EWM,Exp A
1,$\text{SVM: }\frac{\mathrm{d}}{\mathrm{d}t}T_{...,0.709422,SVM_1,Exp A
2,"$\text{SVM: }T_{meas}, RH_{meas}, \frac{\mathr...",0.698951,SVM_2,Exp A
3,"$\text{SVM: }T_{meas}, RH_{meas}, \frac{\mathr...",0.693715,SVM_3,Exp A
4,$\text{ST}$,0.461721,EWM,Exp B
5,$\text{SVM: }RH_{amb}$,0.444785,SVM_1,Exp B
6,$\text{SVM: }T_{amb}$,0.423841,SVM_2,Exp B
7,"$\text{SVM: }T_{amb}, \frac{\mathrm{d}}{\mathr...",0.419435,SVM_3,Exp B
8,$\text{ST}$,0.437793,EWM,Exp C
9,"$\text{SVM: }T_{amb}, RH_{amb}$",0.625829,SVM_1,Exp C


In [None]:
proper_chart_titles_list = ["Macro Average F1 Score",
                    "Hits/Guesses",
                    "Guesses/Actions",
                    "Opening Accuracy",
                    "Unbounded Opening Accuracy",
                    "True Opening Time",
                    "False Opening Time"]
proper_chart_titles = {k:v for k,v in zip(sort_key_dict.keys(), proper_chart_titles_list)}
proper_chart_titles

In [30]:
for k, v in dif_sort_dfs.items():

    fig = px.strip(v, x="chart_name", y="val", color="exp", stripmode='overlay')

    txt="r'{info}'"
    lnames = list(v["name"].iloc[0:4])
    fig.update_layout(title=proper_chart_titles[k],
        xaxis= dict(
            tickmode="array",
            tickvals= list(range(4)),
            ticktext=[txt.format(info=l) for l in lnames]
        ),
        xaxis_title='Models',
        yaxis_title='Metric Values',
        
    )
    # fig.show()
    fig.update_layout(legend_title_text='Experiments')
    fig.write_image(f"../../summary/230613_buildsys/figs/{k}.png")
