In [2]:
import plotly.graph_objects as go
# import kaleido
from plotly.subplots import make_subplots
import plotly.io as pio
import pandas as pd
import numpy as np
import json

from statsmodels.tsa.seasonal import seasonal_decompose

# local modules 
import sys
sys.path.insert(0, "../scripts")
import helpers as h
import seaborn as sns

In [3]:
a00, a01 = h.import_desired_data("A", "15T")
# TODO edit in function 
a00.set_index(a00["DateTime"].values, inplace=True)
a01.set_index(a01["DateTime"].values, inplace=True)
print(a00["Window Open"].unique(), a01["Window Open"].unique(), a00.columns)

[1.] [0. 1.] Index(['DateTime', 'Temp C', 'RH %', 'Room', 'Ambient Temp', 'Ambient RH',
       'Window Open'],
      dtype='object')


In [4]:
# period => # samples per unit in length of time over which seasonality occurs (here, 4 samples/1 hour) * length of time over which seasonality occurs (here, 24 hours)
n_samples = 4
seasonality_period = 24
period = n_samples*seasonality_period
print(period)

# calculate derivative of observation only 
obs_deriv = h.normalize(pd.Series(np.gradient(a01["Temp C"]), a01.index, name='obs_deriv'))

stl_deriv = seasonal_decompose(obs_deriv,model='additive', period=period)

# stl_deriv.plot();

96


In [117]:
fig = go.Figure()

fig.add_trace(go.Scatter(
                    x=a01["DateTime"].index,
                    y=h.normalize(a01["Window Open"]), 
                    mode='lines',
                    name="Window Sched",
                    line=dict(width=1),
                ))

fig.add_trace(go.Scatter(
                    x=a01["DateTime"].index,
                    y=h.normalize(a01["Temp C"]), 
                    mode='lines',
                    name="Variable Open Obs"
                ))



# calculate derivative of observation only 
obs_deriv = h.normalize(pd.Series(np.gradient(a01["Temp C"]), a01.index, name='obs_deriv'))
fig.add_trace(go.Scatter(
                    x=obs_deriv.index,
                    y=obs_deriv, 
                    mode='lines',
                    name="Obs 1st Deriv "
                ))

# difference between this stl decomp 
stl_deriv_dif = h.normalize(h.normalize(stl_deriv.seasonal) - h.normalize(a01["Window Open"]))
fig.add_trace(go.Scatter(
                    x=stl_deriv_dif.index,
                    y=stl_deriv_dif, 
                    mode='lines',
                    name="Obs STL Dif  Deriv "
                ))

stl_deriv_dif_deriv = h.normalize(pd.Series(np.gradient(stl_deriv_dif), stl_deriv_dif.index, name='deriv'))
fig.add_trace(go.Scatter(
                    x=stl_deriv_dif_deriv.index,
                    y=stl_deriv_dif_deriv, 
                    mode='lines',
                    name="Deriv of Obs STL Dif  Deriv "
                ))

fig.update_xaxes(
    dtick=4*60*60*1000, #1 hr in miliseconds,
    tickformat='%a-%H')

In [118]:
# find the points at which the derivative is greater than some threshold (here +- 0.3 from the mean) using a mask 
s3d = pd.DataFrame(stl_deriv_dif_deriv)
mask = (s3d["deriv"] > 0.8) | (s3d["deriv"] <= 0.2)
m = s3d.loc[mask]

In [119]:
fig = go.Figure()
fig.add_trace(go.Scatter(
                    x=a01["DateTime"].index,
                    y=h.normalize(a01["Window Open"]), 
                    mode='lines',
                    name="Window Sched",
                    line=dict(width=1),
                ))

fig.add_trace(go.Scatter(
    x = m["deriv"].index,
    y=m["deriv"], 
    mode='markers',
))

In [120]:
# find where the difference in times is not equal to 15 (time perood of each data collection in this modified dataset => see h.import_deired_data())
diff_series = pd.Series(m.index).diff() # drop where diff = 15 
duplicate_mask = diff_series != pd.Timedelta(minutes=15)

# make the datetime index into a column that can be accessed
mdt = m.reset_index()
a = mdt["index"][duplicate_mask].index
clean_deriv = mdt.iloc[a]
clean_deriv


Unnamed: 0,index,deriv
0,2022-07-20 07:30:00,0.014985
2,2022-07-20 12:15:00,0.990955
4,2022-07-20 22:30:00,0.176164
5,2022-07-21 13:00:00,0.956238
7,2022-07-21 14:45:00,0.071118
9,2022-07-21 17:15:00,0.95706
11,2022-07-21 22:30:00,0.176164
13,2022-07-22 07:30:00,0.946933
15,2022-07-22 11:00:00,0.0
17,2022-07-22 15:45:00,0.942301
