In [52]:
import pandas as pd
import numpy as np

import sys
sys.path.insert(0, "../scripts")
import helpers as h
import window_detect2 as w
import ml_window as m
from icecream import ic

import itertools
import pickle


## get data and create variations of it 

In [2]:
arrs = {}

# measured data
a00, a01 = h.import_desired_data("A", "15T")
arrs["meas"] = np.array(h.normalize(a01["Temp C"])) 
window_sched = np.array(a01["Window Open"])

# ambient data 
arrs["amb"] = np.array(h.normalize(a01["Ambient Temp"]))

# simulation data
sim_data_path = "../../data/energy_model/230307_and_smoothed.pkl"
sim_data = pd.read_pickle(sim_data_path)
arrs["sim"] = np.array(h.normalize(sim_data["Temp C"]))

In [3]:
# meausred/ambient temp difference 
diffs = {}
diffs["amb_minus_meas"] = arrs["amb"] - arrs["meas"]
diffs["meas_minus_amb"] = arrs["meas"] - arrs["amb"] 
diffs["abs_amb_meas_diff"] = np.abs(arrs["amb"] - arrs["meas"])

diffs = {k: h.normalize(v) for k,v in diffs.items()}

diff_keys = list(diffs.keys())

In [4]:
# derivaties
derivs = {}
derivs["dt_meas"] = np.gradient(arrs["meas"])
derivs["dt_sim"] = np.gradient(arrs["sim"])
derivs["dt_amb"] = np.gradient(arrs["amb"] )

derivs = {k: h.normalize(v) for k,v in derivs.items()}

# # derivatives of differences
# dt_amm = np.gradient(amb_minus_meas)
# dt_mma = np.gradient(meas_minus_amb)
# dt_aamd = np.gradient(abs_amb_meas_diff)

## ewm smoothing technique

In [47]:
w0 = w.Window_Detect2(a01)
w0.analyze_window_change(w.make_ewm_smooth)

smooth_data = {
    "smooth_series": w0.smooth_series.to_numpy(),
    "dif": w0.dif.to_numpy(),
    "dt_dif": w0.deriv.to_numpy(),
    "dt2_dif": w0.deriv2.to_numpy()
}

In [49]:
smooth_test_dict = {}
for k, v in smooth_data.items():
    print(k)
    obj = m.ML_Window_Detect([v], window_sched)
    obj.run_all()
    smooth_test_dict[k] = obj.accuracy

smooth_series
dif
dt_dif
dt2_dif


In [50]:
smooth_test_dict

{'smooth_series': 0.4935064935064935,
 'dif': 0.5792207792207792,
 'dt_dif': 0.5714285714285714,
 'dt2_dif': 0.5558441558441558}

In [59]:
w0.make_guesses()

In [60]:
w0.guess_times

3     2022-07-20 08:00:00
4     2022-07-20 08:15:00
5     2022-07-20 08:30:00
6     2022-07-20 08:45:00
7     2022-07-20 09:00:00
15    2022-07-20 11:00:00
16    2022-07-20 11:15:00
21    2022-07-20 12:30:00
61    2022-07-20 22:30:00
62    2022-07-20 22:45:00
102   2022-07-21 08:45:00
103   2022-07-21 09:00:00
104   2022-07-21 09:15:00
138   2022-07-21 17:45:00
139   2022-07-21 18:00:00
158   2022-07-21 22:45:00
159   2022-07-21 23:00:00
190   2022-07-22 06:45:00
191   2022-07-22 07:00:00
192   2022-07-22 07:15:00
193   2022-07-22 07:30:00
197   2022-07-22 08:30:00
204   2022-07-22 10:15:00
205   2022-07-22 10:30:00
224   2022-07-22 15:15:00
256   2022-07-22 23:15:00
291   2022-07-23 08:00:00
292   2022-07-23 08:15:00
293   2022-07-23 08:30:00
294   2022-07-23 08:45:00
303   2022-07-23 11:00:00
339   2022-07-23 20:00:00
Name: DateTime, dtype: datetime64[ns]

## test other combinations of data..

### creating combos -> dont need to run again..

In [5]:
meas_keys = list(arrs.keys())
meas_set = meas_keys.copy()

# has to be the exact list of keys so dont duplicate existing 
for comb in itertools.combinations(meas_keys, 3):
    meas_set.append(comb)


meas_set

['meas', 'amb', 'sim', ('meas', 'amb', 'sim')]

In [6]:
deriv_keys = list(derivs.keys())
deriv_set = deriv_keys.copy()

# has to be the exact list of keys so dont duplicate existing 
for comb in itertools.combinations(deriv_keys, 3):
    deriv_set.append(comb)


deriv_set

['dt_meas', 'dt_sim', 'dt_amb', ('dt_meas', 'dt_sim', 'dt_amb')]

In [7]:
meas_and_deriv_keys = deriv_keys + meas_keys
meas_and_deriv_set = []

for comb in itertools.combinations(meas_and_deriv_keys, 2):
    meas_and_deriv_set.append(comb)
meas_and_deriv_set

[('dt_meas', 'dt_sim'),
 ('dt_meas', 'dt_amb'),
 ('dt_meas', 'meas'),
 ('dt_meas', 'amb'),
 ('dt_meas', 'sim'),
 ('dt_sim', 'dt_amb'),
 ('dt_sim', 'meas'),
 ('dt_sim', 'amb'),
 ('dt_sim', 'sim'),
 ('dt_amb', 'meas'),
 ('dt_amb', 'amb'),
 ('dt_amb', 'sim'),
 ('meas', 'amb'),
 ('meas', 'sim'),
 ('amb', 'sim')]

In [8]:
all_data = arrs | diffs | derivs
all_data.keys()

dict_keys(['meas', 'amb', 'sim', 'amb_minus_meas', 'meas_minus_amb', 'abs_amb_meas_diff', 'dt_meas', 'dt_sim', 'dt_amb'])

In [9]:
test_set = deriv_set + meas_set + meas_and_deriv_set + diff_keys
test_set

['dt_meas',
 'dt_sim',
 'dt_amb',
 ('dt_meas', 'dt_sim', 'dt_amb'),
 'meas',
 'amb',
 'sim',
 ('meas', 'amb', 'sim'),
 ('dt_meas', 'dt_sim'),
 ('dt_meas', 'dt_amb'),
 ('dt_meas', 'meas'),
 ('dt_meas', 'amb'),
 ('dt_meas', 'sim'),
 ('dt_sim', 'dt_amb'),
 ('dt_sim', 'meas'),
 ('dt_sim', 'amb'),
 ('dt_sim', 'sim'),
 ('dt_amb', 'meas'),
 ('dt_amb', 'amb'),
 ('dt_amb', 'sim'),
 ('meas', 'amb'),
 ('meas', 'sim'),
 ('amb', 'sim'),
 'amb_minus_meas',
 'meas_minus_amb',
 'abs_amb_meas_diff']

In [15]:
test_dict = {}
for item in test_set:

    if type(item) == tuple:
        key = "+".join(item)
        test_dict[key] = {}
        test_dict[key]["data"] =  [all_data[k] for k in item]
    else: # if not a tuple, then a string 
        test_dict[item] = {}
        test_dict[item]["data"] = [all_data[item]]
        

In [56]:
# with open("../constants/data_combos_230638.pkl", "wb") as fp:
#     pickle.dump(test_dict, fp )

### testing combos 

In [17]:
for k, v in test_dict.items():
    print(k)
    obj = m.ML_Window_Detect(v["data"], window_sched)
    obj.run_all()
    test_dict[k]["accuracy"] = obj.accuracy

dt_meas
dt_sim
dt_amb
dt_meas+dt_sim+dt_amb
meas
amb
sim
meas+amb+sim
dt_meas+dt_sim
dt_meas+dt_amb
dt_meas+meas
dt_meas+amb
dt_meas+sim
dt_sim+dt_amb
dt_sim+meas
dt_sim+amb
dt_sim+sim
dt_amb+meas
dt_amb+amb
dt_amb+sim
meas+amb
meas+sim
amb+sim
amb_minus_meas
meas_minus_amb
abs_amb_meas_diff


In [18]:
for k, v in test_dict.items():
    print(k, v["accuracy"])

dt_meas 0.6363636363636364
dt_sim 0.2805194805194805
dt_amb 0.4077922077922078
dt_meas+dt_sim+dt_amb 0.612987012987013
meas 0.5428571428571428
amb 0.5766233766233766
sim 0.44675324675324674
meas+amb+sim 0.5376623376623376
dt_meas+dt_sim 0.587012987012987
dt_meas+dt_amb 0.6441558441558441
dt_meas+meas 0.6753246753246753
dt_meas+amb 0.6103896103896104
dt_meas+sim 0.522077922077922
dt_sim+dt_amb 0.45714285714285713
dt_sim+meas 0.5168831168831168
dt_sim+amb 0.44415584415584414
dt_sim+sim 0.4103896103896104
dt_amb+meas 0.6493506493506493
dt_amb+amb 0.5636363636363636
dt_amb+sim 0.4805194805194805
meas+amb 0.6077922077922078
meas+sim 0.5376623376623376
amb+sim 0.574025974025974
amb_minus_meas 0.4909090909090909
meas_minus_amb 0.4909090909090909
abs_amb_meas_diff 0.4623376623376623


In [39]:
res = pd.DataFrame(data=[list(test_dict.keys()), [v["accuracy"] for v in test_dict.values()]], index=["values", "accuracy"]).T

res.sort_values(by="accuracy")

Unnamed: 0,values,accuracy
1,dt_sim,0.280519
2,dt_amb,0.407792
16,dt_sim+sim,0.41039
15,dt_sim+amb,0.444156
6,sim,0.446753
13,dt_sim+dt_amb,0.457143
25,abs_amb_meas_diff,0.462338
19,dt_amb+sim,0.480519
23,amb_minus_meas,0.490909
24,meas_minus_amb,0.490909
