In [1]:
import plotly.graph_objects as go
# import kaleido
from plotly.subplots import make_subplots
import plotly.io as pio
import pandas as pd
import numpy as np
import json

from statsmodels.tsa.seasonal import seasonal_decompose
import statsmodels.api as sm

import scipy.optimize

import sys
sys.path.insert(0, "../scripts")
import helpers as h
import seaborn as sns

In [2]:
# general code 

# get time series (ts)
# do smoothing function (ts_smooth)
# subtract (ts - ts_smooth)
# derivative d/dt(ts - ts_smooth), and second deriv d/dt(ts - ts_smooth)
# look at distributions, calculate standard deviations
# calculate accuracy of guesses 

In [3]:
def take_derivative(series):
    return pd.Series(h.normalize(np.gradient(series)))

def plot_many(fig, time, series, names, ):
    for name, ser in zip(names, series):
            fig.add_trace(go.Scatter(x=time, y=ser, name=name, mode='lines'))
    return fig

In [63]:
class Window_Detect2:
    def __init__(self, df):
        self.time = df["DateTime"]
        # magnitude of time passed in seconds 
        time_mag = self.time - self.time.min()
        self.time_seconds = time_mag.dt.total_seconds()

        self.window = df["Window Open"]
        self.window_norm = h.normalize(self.window)

        self.temp = df["Temp C"]
        self.temp_norm = h.normalize(self.temp)


    def analyze_window_change(self, smooth_fx, sim_smooth=None):
        if sim_smooth is not None:
            self.smooth_series = h.normalize(sim_smooth)
        else:
            self.smooth_series = h.normalize(smooth_fx(self.temp_norm, self.time_seconds))

        # TODO: mean largest - mean smallest?
        self.dif = h.normalize(self.temp_norm - self.smooth_series)
        self.deriv = take_derivative(self.dif)
        self.deriv2 = take_derivative(self.deriv)
        self.std, self.std2 = self.deriv.std(), self.deriv2.std()
        self.zscore, self.zscore2 = h.calc_zscore(self.deriv), h.calc_zscore(self.deriv2)


    def plot_analysis(self):
        fig = go.Figure()

        series = [self.window_norm, self.temp_norm, self.smooth_series, self.dif, self.deriv, self.deriv2]
        names = ["Window", "Observed", "Smoothed", "Difference", "Deriv1", "Deriv2"]

        return plot_many(fig, self.time, series, names)


    def plot_distributions(self, marker_width=0.1, bin_size=0.003):
        print(f"Std 1 = {self.std}, Std 2 = {self.std2}")
        fig = go.Figure()

        for ix, ser in enumerate([self.deriv2, self.deriv]):
            opacity = 0.9 if ix == 0 else 1
            fig.add_trace(go.Histogram(
            x=ser, histnorm='probability', name=f' Deriv{2 - ix}', opacity=opacity, marker_line=dict(width=marker_width ,color='black'), xbins=dict( size=bin_size),))

        fig.update_layout(barmode="stack")

        return fig
        

    def plot_zscore(self):
        fig = go.Figure()

        series = [self.window_norm, self.zscore, self.zscore2]
        names = ["Window", "Z-Score 1", "Z-Score 2"]

        return plot_many(fig, self.time, series, names)


    def plot_guesses(self, timedelta=15*2):
        # TODO change to make and plot or split function
        guess_mask = (self.zscore > 2) | (self.zscore <= -2)
        guess_times = self.time[guess_mask]

        # remove guesses that are due to bouncing
        clean_mask = guess_times.diff() >= pd.Timedelta(minutes=timedelta) # TODO make this a function of the time lag in the dataframe 
        self.guess_times = guess_times[clean_mask]

        # plot guesses in terms of zscore2
        self.zscore2_norm = h.normalize(self.zscore2)
        self.guess_values = self.zscore2_norm[self.guess_times.index] 

        fig = go.Figure()
        fig.add_trace(go.Scatter(x=self.time, y=self.window_norm, name="Window", mode='lines'))
        fig.add_trace(go.Scatter(x=self.guess_times, y=self.guess_values, name="Guess ~ Z-Score 2", mode='markers'))

        return fig


    def plot_analysis_and_distributions(self):
        # TODO make into subplots fig = make_subplots(rows=2, cols=1, shared_xaxes=True)
        fig1 = self.plot_analysis()
        fig2 = self.plot_distributions()
        fig1.show()
        fig2.show()



In [48]:
def make_stl_smooth(series, time, model="additive", ):
    # TODO make flexible for period 
    n_samples = 4 # 15 minute intervals (4/hour)
    #n_samples = 12 # 5 minute intervals (4/hour)
    seasonality_period = 24 # 24 hour temperature period 
    period = n_samples*seasonality_period
    result = seasonal_decompose(series, model=model, period=period)
    return result.seasonal

def make_sin_smooth(series, time):
    fit = h.fit_sin(time, series)
    return fit["fitfunc"](time)

def make_ewm_smooth(series, time, level=4):
    return series.ewm(level).mean()



## simulation data 

In [44]:
## simulation data 
sim_data_path = "../../data/energy_model/230307.pkl"
sim_data = pd.read_pickle(sim_data_path)
sim_data

Unnamed: 0,DateTime,Always Closed,Varied,Window Open
0,2017-07-20 00:00:00,22.067189,18.177904,0.0
1,2017-07-20 00:15:00,22.050305,18.467071,0.0
2,2017-07-20 00:30:00,22.035163,18.684117,0.0
3,2017-07-20 00:45:00,22.020159,18.979786,0.0
4,2017-07-20 01:00:00,22.001794,19.203599,0.0
...,...,...,...,...
475,2017-07-24 22:45:00,23.603685,21.835224,0.0
476,2017-07-24 23:00:00,23.583059,21.821459,0.0
477,2017-07-24 23:15:00,23.561852,21.807069,0.0
478,2017-07-24 23:30:00,23.540176,21.792163,0.0


In [49]:
sim_smooth = make_stl_smooth(series=sim_data["Always Closed"], time=None, model="additive")
sim_smooth


0      0.001057
1     -0.024202
2     -0.050251
3     -0.076964
4     -0.103318
         ...   
475    0.115005
476    0.093226
477    0.071135
478    0.048547
479    0.025322
Name: seasonal, Length: 480, dtype: float64

In [51]:

# rename label in simulation dataframe to match 
sim_data.rename(columns={"Varied": "Temp C"}, inplace=True)

In [64]:

w1 = Window_Detect2(sim_data)
# w1.analyze_window_change(smooth_fx=None, sim_smooth=sim_smooth)
# w1.plot_analysis_and_distributions()

In [65]:
w1.analyze_window_change(smooth_fx=None, sim_smooth=sim_smooth)
w1.plot_analysis_and_distributions()

Std 1 = 0.10808391757404515, Std 2 = 0.09142910977527131


In [66]:
fig1 = w1.plot_zscore()
fig1.show()
fig2 = w1.plot_guesses(timedelta=30)
fig2.show()

## real data 

In [6]:
a00, a01 = h.import_desired_data("A", "15T")
print(a00["Window Open"].unique(), a01["Window Open"].unique(), a00.columns)
# use 15 min intervals or higher bc that's what used in sumulation? 

[1.] [0. 1.] Index(['DateTime', 'Temp C', 'RH %', 'Room', 'Ambient Temp', 'Ambient RH',
       'Window Open'],
      dtype='object')


In [7]:
a00, a01 = h.import_desired_data("A", "5T")
print(a00["Window Open"].unique(), a01["Window Open"].unique(), a00.columns)

[1.] [0. 1.] Index(['DateTime', 'Temp C', 'RH %', 'Room', 'Ambient Temp', 'Ambient RH',
       'Window Open'],
      dtype='object')


In [24]:
w1 = Window_Detect2(a01)
w1.analyze_window_change(make_stl_smooth)
# fig1 = w1.plot_zscore()
# fig1.show()
fig2 = w1.plot_guesses(timedelta=30)
fig2.show()
# w1.plot_analysis_and_distributions()

In [27]:
w1.guess_times.index

Int64Index([  17,   43,   62,   76,  124,  133,  182,  307,  415,  472,  569,
             583,  598,  608,  671,  763,  874, 1015, 1099],
           dtype='int64')

In [29]:
h.calc_win_change_dist(a01, w1.guess_times.index[5])

(False, 182, 49)

In [None]:
mask = (w1.zscore2 > 2) | (w1.zscore2 <= -2)
m_times = w1.time[mask]
clean_m = m_times.diff() != pd.Timedelta(minutes=15)

clean_mtimes = m_times[clean_m]
w1.deriv[clean_mtimes.index]



In [31]:
w1 = Window_Detect2(a01)
w1.analyze_window_change(make_sin_smooth)



# w1.plot_analysis_and_distributions()
# w1.plot_zscore()
w1.plot_guesses()

In [43]:
pd.DataFrame(w1.guess_times).apply(lambda x: h.calc_win_change_dist(a01, x.name), axis=1)
# h.calc_win_change_dist(a01, x)
# TODO would prefer this fx to give guess time, nearest true time, and the difference 

17        (False, 6, -11)
62          (True, 62, 0)
133      (False, 182, 49)
182        (True, 182, 0)
416      (False, 410, -6)
473        (True, 473, 0)
488     (False, 473, -15)
583      (False, 582, -1)
599     (False, 582, -17)
608      (False, 622, 14)
671       (False, 680, 9)
763       (False, 765, 2)
911    (False, 765, -146)
dtype: object

In [30]:

h.calc_win_change_dist(a01, w1.guess_times.index[5])

(False, 182, 49)

In [26]:
w1 = Window_Detect2(a01)
w1.analyze_window_change(make_ewm_smooth)
# w1.plot_zscore()
# w1.plot_guesses()
w1.plot_analysis_and_distributions()
w1.plot_zscore()
w1.plot_guesses()

Std 1 = 0.0843056303947691, Std 2 = 0.06624811100663054
