In [1]:
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio
import pandas as pd
import numpy as np

from IPython.display import display, Latex


import sys
sys.path.insert(0, "../scripts")
import ml_window as m 
import helpers as h

import json 
# import streamlit as st


### import data

In [2]:
# macro f1 score...
with open("../constants/svm_metrics_230704.json") as f:
    svm_data =json.load(f)

with open("../constants/ewm_metrics_230704.json") as f:
    ewm_data =json.load(f)

### transform times to hours in data sets

In [4]:
# need to go through all the data and turn false and true open time and turn into hours

def get_nice_time(td_string):
    return pd.to_timedelta(td_string)/ np.timedelta64(1, 'h')

for exp in svm_data.values():
    for model in exp.values():
        model["metrics"]["drdr"]["true open time"] = get_nice_time(model["metrics"]["drdr"]["true open time"])
        model["metrics"]["drdr"]["false open time"] = get_nice_time(model["metrics"]["drdr"]["false open time"])

for exp in ewm_data.values():
    for model in exp.values():
        model["drdr"]["true open time"] = get_nice_time(model["drdr"]["true open time"])
        model["drdr"]["false open time"] = get_nice_time(model["drdr"]["false open time"])


### assemble data from svm and ewm datasets and sort 

In [3]:
def safeget(dct, keys_arr):
    assert type(keys_arr) == list
    keys = keys_arr
    for key in keys:
        try:
            dct = dct[key]
        except KeyError:
            return False
    return dct

In [9]:
sort_key_dict = {
        "f1": ["standard", "macro avg", "f1-score"],
        "hits_guesses_ratio": ["custom", "(hits + near hits)/guesses",], #hits and near hits/guesses
        "guesses_actions_ratio": ["custom", "guesses/actions",], #guesses/actions,
        "opening_acc": ["drdr", "bounded acc"] ,#opening accuracy
        "unbounded_opening_acc": ["drdr", "unbounded acc"], 
        "true_time": ["drdr", "true open time"],
        "false_time": ["drdr", "false open time"] 
    } 

def sort_and_return_quick_result(all_results, data_type="svm", sort_key="f1", key_dict=sort_key_dict):   

    key_arr = key_dict[sort_key]
    reverse_direction = False if sort_key == "false_time" else True

    # svm 
    if data_type=="svm":
        sorted_results = sorted(all_results.items(), key=lambda x:safeget(x[1]["metrics"], key_arr), reverse=reverse_direction)
        quick_res = [
            {
            "name": v["name"],
            "val": safeget(v["metrics"], key_arr)
            }
            for k,v in sorted_results]
    # ewm 
    else:
        sorted_results = sorted(all_results.items(), key=lambda x:safeget(x[1], key_arr), reverse=reverse_direction)
        quick_res = [
        {
            "name": k,
            "val":  safeget(v, key_arr),
        }
        for k,v in sorted_results]
    return quick_res, sorted_results 

In [10]:
# quick, slow = sort_and_return_quick_result(svm_data["exp_A"], "svm", "f1")
# quick[0:3]

# quick, slow = sort_and_return_quick_result(ewm_data["exp_A"], "ewm")
# quick

[{'name': ['dt_meas_temp', 'dt_meas_rh'], 'val': 0.7094221079901271},
 {'name': ['meas_temp', 'meas_rh', 'dt_meas_temp', 'dt_amb_rh'],
  'val': 0.6989508326023839},
 {'name': ['meas_temp', 'meas_rh', 'dt_meas_temp', 'dt_amb_temp'],
  'val': 0.6937151949085122}]

In [12]:
dif_sort_dfs = {}

for sort_key in sort_key_dict.keys():
    exps = list(ewm_data.keys())

    data_pts = {}
    for exp in exps:
        data_pts[exp] = []
        for data_type, data in zip(["ewm", "svm"], [ewm_data, svm_data]):
            quick, slow = sort_and_return_quick_result(data[exp], data_type, sort_key)
            if data_type == "ewm":
                res = list(filter(lambda x: True if x["name"] == "z=1" else False, quick))
                data_pts[exp].append(res[0])
                data_pts[exp][0]["chart_name"] = "EWM"
            else:
                data_pts[exp].extend(quick[0:3])
                for i in range(3):
                    data_pts[exp][i+1]["chart_name"] = f"SVM_{i+1}"

    for k,v in data_pts.items():
        for small_dict in v:
            small_dict["exp"] = k

    data_list = sum(data_pts.values(), [])

    df = pd.DataFrame(data_list)

    dif_sort_dfs[sort_key] = df


### make nicer names for plotting 

In [13]:
latex_names = {
    "meas_temp": "$T_{meas}$",
    "meas_rh": "$RH_{meas}$",

    "amb_temp":"$T_{amb}$",
    "amb_rh": "$RH_{amb}$",

    "dt_meas_temp": "$\\frac{\mathrm{d}}{\mathrm{d}t}T_{meas}$",
    "dt_meas_rh": "$\\frac{\mathrm{d}}{\mathrm{d}t}RH_{meas}$",
    "dt_amb_temp": "$\\frac{\mathrm{d}}{\mathrm{d}t}T_{amb}$",
    "dt_amb_rh": "$\\frac{\mathrm{d}}{\mathrm{d}t}RH_{amb}$",

    "amb_minus_meas_temp": "$T_{amb} - T_{meas}$",
    "amb_minus_meas_rh": "$RH_{amb} - RH_{meas}$",
    "meas_minus_deriv_temp": "$T_{meas} - \\frac{\mathrm{d}}{\mathrm{d}t}T_{meas}$",
    "meas_minus_deriv_rh": "$RH_{meas} - \\frac{\mathrm{d}}{\mathrm{d}t}RH_{meas}$",
}

experiment_names = {
    "exp_A": "Exp A",
    "exp_B": "Exp B",
    "exp_C": "Exp C",
}

In [14]:
def change_name(x):
    if type(x) is list:
        # print(x)
        for ix, item in enumerate(x):
            try:
                x[ix] = latex_names[item] 
            except KeyError:
                print(f"passed on {x}")   
        return x 
    else: 
        # smoothing technique
        return "$\\text{ST}$" 

def join_names(x):
    if type(x) is not list:
        # smoothing technique 
        return x
    if len(x) < 2:
            temp = x[0]
    else:
        temp = "/".join(x)
        temp = temp.replace("$/$", ", ")
    temp = temp.replace("$", "$\\text{SVM: }", 1)
    
    return temp

In [None]:
# change tick names and legend names for plotting 
for k, v in dif_sort_dfs.items():
    print(f"\n {k}")
    v["name"] = v["name"].apply(change_name)
    v["name"] = v["name"].apply(join_names)
    v["exp"] = v["exp"].apply(lambda x: experiment_names[x])

In [16]:
proper_chart_titles_list = ["Macro Average F1 Score",
                    "Hits/Guesses",
                    "Guesses/Actions",
                    "Opening Accuracy",
                    "Unbounded Opening Accuracy",
                    "True Opening Time",
                    "False Opening Time"]
proper_chart_titles = {k:v for k,v in zip(sort_key_dict.keys(), proper_chart_titles_list)}
proper_chart_titles

{'f1': 'Macro Average F1 Score',
 'hits_guesses_ratio': 'Hits/Guesses',
 'guesses_actions_ratio': 'Guesses/Actions',
 'opening_acc': 'Opening Accuracy',
 'unbounded_opening_acc': 'Unbounded Opening Accuracy',
 'true_time': 'True Opening Time',
 'false_time': 'False Opening Time'}

In [18]:
for k, v in dif_sort_dfs.items():

    fig = px.strip(v, x="chart_name", y="val", color="exp", stripmode='overlay')

    txt="r'{info}'"
    lnames = list(v["name"].iloc[0:4])
    fig.update_layout(title=proper_chart_titles[k],
        xaxis= dict(
            tickmode="array",
            tickvals= list(range(4)),
            ticktext=[txt.format(info=l) for l in lnames]
        ),
        xaxis_title='Models',
        yaxis_title='Metric Values',
        
    )
    # fig.show()
    fig.update_layout(legend_title_text='Experiments')
    fig.write_image(f"../../summary/230613_buildsys/figs/{k}.png", scale=4)
