# Make necessary helper functions for comparing algorithms
* 02/21/23

In [56]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd
import numpy as np
import json

# local modules 
import sys
sys.path.insert(0, "../scripts")
from helpers import *

sys.path.insert(0, "../../JUST")
from JUSTjumps import *


In [None]:
# winow open / close detection metric + plotting 
# cross validation algorithm / way to try different parameter values quickly and see how the results change... => CV only really needed for ML 

In [None]:
# for a given windows time series, have data about when window opened, and when window closed 
# because sometimes change freq of data, when win open/close should be decided after data imported 
# then can see (1) if window change detected w/in a given threshold 
# how far away detections are from reality 
# plot should highlight true window anomaly time + lines at the incidence
# for the algo, should show where made the guess...

In [6]:
r00, r01 = import_desired_data("A", "15T")

In [7]:
r00["Window Open"].unique()

array([1.])

In [8]:
r01["Window Open"].unique() 
# how to determine where the value is switching 

array([0., 1.])

In [19]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=r01["DateTime"],
    y=r01["Window Open"], 
    mode='lines+markers',
))

fig.add_trace(go.Scatter(
    x=r01["DateTime"],
    y=check*1 , 
    mode='markers',
))


In [12]:
# see where value is changing 
check = r01["Window Open"].shift() != r01["Window Open"]

In [17]:
check # True => 1 

0       True
1      False
2       True
3      False
4      False
       ...  
380    False
381    False
382    False
383    False
384    False
Name: Window Open, Length: 385, dtype: bool

In [16]:
check*1

0      1
1      0
2      1
3      0
4      0
      ..
380    0
381    0
382    0
383    0
384    0
Name: Window Open, Length: 385, dtype: int64

In [None]:
# can see if the index that is passed in is 

In [26]:
c00, c01 = import_desired_data("C", "15T")
checkc = c00["Window Open"].shift() != c00["Window Open"]

In [27]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=c00["DateTime"],
    y=c00["Window Open"], 
    mode='lines+markers',
))

fig.add_trace(go.Scatter(
    x=c00["DateTime"],
    y=checkc*1 , 
    mode='markers',
))

In [35]:
# find nearest index to ix 
def find_nearest(array, value):
    "find index of closest value in an array to a value that has passed in "
    array = np.asarray(array)
    idx = (np.abs(array - value)).argmin()
    return array[idx]

In [54]:
def calc_win_change_dist(df, ix):
    """
    Report if the index (ix) returned by an algorithm is what the data (df) recognizes as a window flip 

    Returns
    exact: if ix reported by the algo is spot on 
    nearest: the nearest ix to what was reported 
    distance: the distance (# of indices) between the ix and the nearest found value 
    """
    # note where the value in Window Open series changes 
    shift  = df["Window Open"].shift() != df["Window Open"]

    # return if ix > lenght of data 
    if ix > len(shift):
       return "Passed index larger than it should be"
    
    # some of the experiments did not have any shifts 
    flips = np.where(shift*1==1)[0] #[1:-1]
    if len(flips) < 2:
        return "No Flip Happened!"
    
    # check if got the exact flip 
    exact = shift[ix]
    
    # drop the first entry which will always be true 
    flips = flips[1:-1]

    # find the distance between the nearest flip, and the ix 
    nearest = find_nearest(flips, ix)
    distance = find_nearest(flips, ix) - ix

    # report performance 
    return exact, nearest, distance 


In [45]:
calc_win_change_dist(c01, 34)

'No Flip Happened!'

In [49]:
calc_win_change_dist(c00, 800)

'Pass index larger than it should be'

In [55]:
calc_win_change_dist(c00, 36)

(True, 36, 0)

## try JUST

In [61]:
c01

Unnamed: 0,DateTime,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
0,2022-09-08 08:00:00,30.004667,46.522833,1.0,17.777778,19.444444,0.0
1,2022-09-08 08:15:00,30.084667,46.565333,1.0,17.777778,19.444444,0.0
2,2022-09-08 08:30:00,30.165500,46.702400,1.0,17.777778,19.444444,0.0
3,2022-09-08 08:45:00,30.246000,46.985267,1.0,17.777778,19.444444,0.0
4,2022-09-08 09:00:00,30.317500,47.561633,1.0,22.444444,18.333333,0.0
...,...,...,...,...,...,...,...
284,2022-09-11 07:00:00,29.246000,50.797600,1.0,19.611111,30.000000,0.0
285,2022-09-11 07:15:00,29.237667,50.825433,1.0,19.611111,30.000000,0.0
286,2022-09-11 07:30:00,29.221833,50.800933,1.0,19.611111,30.000000,0.0
287,2022-09-11 07:45:00,29.216833,50.815267,1.0,19.611111,30.000000,0.0


In [72]:
c01["DateTime"].dt.day

0       8
1       8
2       8
3       8
4       8
       ..
284    11
285    11
286    11
287    11
288    11
Name: DateTime, Length: 289, dtype: int64

In [73]:
ts03 = c01["DateTime"].dt.day  + normalize_scale(c01["DateTime"].dt.hour, 0.0000001, 0.9999999)

In [82]:
def make_jump_ts(df):
    return df["DateTime"].dt.day  + normalize_scale(df["DateTime"].dt.hour, 0.0000001, 0.9999999)

In [74]:
# just find one jump 
ix, mag, direction = JumpDetect(ts03, c01["Temp C"])
# df03.iloc[ix]

In [75]:
ix

136

In [76]:
calc_win_change_dist(c00, ix)

(True, 136, 0)

In [77]:
res = AllJumps(ts03, c01["Temp C"])

In [78]:
res

[[135, -0.29709233881558994, -0.8570232551561213, 8.5],
 [136, -0.29281890572634595, -0.8333424849178237, 8.5]]

In [79]:
res[1][0]

136

In [81]:
calc_win_change_dist(c00, 135)

(False, 136, 1)

In [83]:
b00, b01 = import_desired_data("B", "15T")
b00ts = make_jump_ts(b00)

In [86]:
b00ts

0       27.391304
1       27.391304
2       27.391304
3       27.391304
4       27.434783
          ...    
1383    10.782609
1384    10.826087
1385    10.826087
1386    10.826087
1387    10.826087
Name: DateTime, Length: 1388, dtype: float64

In [92]:
from datetime import datetime

In [102]:
b00ts = np.array( [b.timestamp() for b in b00["DateTime"]])

In [99]:
res = AllJumps(b00ts, b00["Temp C"])

IndexError: list index out of range

In [100]:
res = AllJumps(b00ts, b01["Temp C"])

IndexError: list index out of range

In [88]:
a00, a01 = import_desired_data("A", "15T")
a00ts = make_jump_ts(a00)

In [91]:
a00ts

0      20.304348
1      20.304348
2      20.304348
3      20.347826
4      20.347826
         ...    
380    24.260870
381    24.260870
382    24.260870
383    24.304348
384    24.304348
Name: DateTime, Length: 385, dtype: float64

In [89]:
res = AllJumps(a00ts, a00["Temp C"])

KeyError: 3

In [90]:
res = AllJumps(a00ts, a01["Temp C"])

KeyError: 3