# exploratory analysis examining at day to day trends 
### Jan 5, 2023

In [63]:
import json
import datetime
import pandas as pd
import numpy as np
import math
import sys
import os
from collections import OrderedDict

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import seaborn as sns
import seaborn.objects as so
# Apply the default theme
sns.set_theme()
import matplotlib.pyplot as plt

import sys
sys.path.insert(0, "../scripts")
from helpers import *

In [64]:
df = pd.read_csv("../constants/td_ambient_102022.csv" )
df["DateTime"] = pd.to_datetime(df["DateTime"])
df.head()

Unnamed: 0,DateTime,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
0,2022-07-20 07:20:00,23.189,51.268,0,14.333333,30.555556,1
1,2022-07-20 07:20:30,23.189,51.362,0,14.333333,30.555556,1
2,2022-07-20 07:21:00,23.189,51.488,0,14.333333,30.555556,1
3,2022-07-20 07:21:30,23.189,51.457,0,14.333333,30.555556,1
4,2022-07-20 07:22:00,23.165,51.517,0,14.333333,30.555556,1


## comparison of point values

In [65]:
dftime = df.copy()

# transform datetime index to actual pandas datetime, and set as index of dataframe 
dftime["DateTime"] = pd.to_datetime(dftime["DateTime"])
dftime.set_index("DateTime", inplace=True)


In [66]:

with open('../constants/window_treatment.json') as f:    
    window_treatment = json.load(f)

exp_a_end = str2dt(window_treatment["072522"]["cutoff_times"]["end"])
exp_b_end = str2dt(window_treatment["081622"]["cutoff_times"]["end"])

# only consider experiment a 
df_a = df.loc[df["DateTime"] < exp_a_end]
df_a.head()

Unnamed: 0,DateTime,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
0,2022-07-20 07:20:00,23.189,51.268,0,14.333333,30.555556,1
1,2022-07-20 07:20:30,23.189,51.362,0,14.333333,30.555556,1
2,2022-07-20 07:21:00,23.189,51.488,0,14.333333,30.555556,1
3,2022-07-20 07:21:30,23.189,51.457,0,14.333333,30.555556,1
4,2022-07-20 07:22:00,23.165,51.517,0,14.333333,30.555556,1


In [67]:
df1, df0 = [x.reset_index(drop=True) for _, x in df_a.groupby(df_a['Room'] < 1)]



In [68]:

n = 30
a = len(df1)/n
eff_d = {}
delta_d = {}

for i in range(int(a)):
    # print(i)
    k = n*i
    arr1 = df1["Temp C"].iloc[k:k+n]
    arr2 = df0["Temp C"].iloc[k:k+n]
    eff_size = cohend(arr1, arr2)
    assert df1["DateTime"].iloc[k] == df0["DateTime"].iloc[k]
    eff_d[df1["DateTime"].iloc[k]] = eff_size
    delta_d[df1["DateTime"].iloc[k]] = rmse(arr1, arr2)

eff_df = pd.DataFrame(eff_d, index=[0]).T 
rmse_df = pd.DataFrame(delta_d, index=[0]).T 


In [81]:
def day_split(arr_of_dfs):
    arr_of_split_dfs = []
    for arr in arr_of_dfs:
        try:
            daysplit_list = [group[1] for group in arr.groupby(arr.index.date)]
        except:
            daysplit_list = [group[1] for group in arr.groupby(arr["DateTime"].dt.date)]
        arr_of_split_dfs.append(daysplit_list)
        # print(daysplit_list)
    return arr_of_split_dfs

In [84]:
split_arrs =  day_split([rmse_df, df0, df1])
assert len(split_arrs[0]) == len(split_arrs[1]) == len(split_arrs[2])

In [88]:
fig = make_subplots(rows=5, cols=1, shared_xaxes=True,)

for df0, df1, rmse_df, ix in zip(split_arrs[1], split_arrs[2], split_arrs[0], range(len(split_arrs[0]))):
    
    fig.add_trace(go.Scatter(
    x=rmse_df.index.time,
    y=rmse_df[0],
    name="RMSE of Temperature in Rooms (15 min intervals)"
    ), row=ix+1, col=1)

    fig.add_trace(go.Scatter(
    x=df0["DateTime"],
    y=normalize(df0["Temp C"]),
    name="Normalized Room 0 Temp"
    ), row=ix+1, col=1)

fig.show()


In [68]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=rmse_df.index,
    y=rmse_df[0],
    name="RMSE of Temperature in Rooms (15 min intervals)"
))

fig.add_trace(go.Scatter(
    x=df0["DateTime"],
    y=df0["Window Open"],
    name="Room 0 Window Open"
))

fig.add_trace(go.Scatter(
    x=df1["DateTime"],
    y=df1["Window Open"],
    name="Room 1 Window Open"
))

fig.add_trace(go.Scatter(
    x=df0["DateTime"],
    y=normalize(df0["Temp C"]),
    name="Normalized Room 0 Temp"
))

fig.add_trace(go.Scatter(
    x=df1["DateTime"],
    y=normalize(df1["Temp C"]),
    name="Normalized Room 1 Temp"
))




fig.update_layout(title='Temperature, RMSE Comparison between Rooms',
                   xaxis_title='Dates',
                   yaxis_title='RMSE')

## comparison of averages 

In [41]:
df_15min = make_df_with_freq(df, "15T")
df_15min.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
Room,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,2022-07-20 07:15:00,23.1806,51.4211,0.0,14.333333,30.555556,1.0
0,2022-07-20 07:30:00,23.3258,51.759333,0.0,14.333333,30.555556,1.0
0,2022-07-20 07:45:00,23.4242,51.619233,0.0,14.333333,30.555556,1.0
0,2022-07-20 08:00:00,23.5314,51.2181,0.0,14.666667,30.555556,1.0
0,2022-07-20 08:15:00,23.6314,50.950367,0.0,14.666667,30.555556,1.0
