# exploratory analysis examining at day to day trends 
### Jan 5, 2023
### Update Jan 14, 2023

In [1]:
import json
import datetime
import pandas as pd
import numpy as np
import math
import sys
import os
from collections import OrderedDict

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import seaborn as sns
import seaborn.objects as so
# Apply the default theme
sns.set_theme()
import matplotlib.pyplot as plt

import sys
sys.path.insert(0, "../scripts")
from helpers import *

In [2]:
df = pd.read_csv("../constants/td_ambient_102022.csv" )
df["DateTime"] = pd.to_datetime(df["DateTime"])
df.head()

Unnamed: 0,DateTime,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
0,2022-07-20 07:20:00,23.189,51.268,0,14.333333,30.555556,1
1,2022-07-20 07:20:30,23.189,51.362,0,14.333333,30.555556,1
2,2022-07-20 07:21:00,23.189,51.488,0,14.333333,30.555556,1
3,2022-07-20 07:21:30,23.189,51.457,0,14.333333,30.555556,1
4,2022-07-20 07:22:00,23.165,51.517,0,14.333333,30.555556,1


## comparison of point values

In [3]:
dftime = df.copy()

# transform datetime index to actual pandas datetime, and set as index of dataframe 
dftime["DateTime"] = pd.to_datetime(dftime["DateTime"])
dftime.set_index("DateTime", inplace=True)


In [4]:

with open('../constants/window_treatment.json') as f:    
    window_treatment = json.load(f)

exp_a_end = str2dt(window_treatment["072522"]["cutoff_times"]["end"])
exp_b_end = str2dt(window_treatment["081622"]["cutoff_times"]["end"])

# only consider experiment a 
df_a = df.loc[df["DateTime"] < exp_a_end]
df_a.head()

Unnamed: 0,DateTime,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
0,2022-07-20 07:20:00,23.189,51.268,0,14.333333,30.555556,1
1,2022-07-20 07:20:30,23.189,51.362,0,14.333333,30.555556,1
2,2022-07-20 07:21:00,23.189,51.488,0,14.333333,30.555556,1
3,2022-07-20 07:21:30,23.189,51.457,0,14.333333,30.555556,1
4,2022-07-20 07:22:00,23.165,51.517,0,14.333333,30.555556,1


In [15]:
window_treatment["072522"]["cutoff_times"]["end"]

'2022, 07, 24, 07, 20'

In [21]:
noon_start = str2dt('2022, 07, 20, 12, 00')
noon_end = str2dt('2022, 07, 23, 12, 00')

# only  consider from noon to noon data 
mask = (df['DateTime'] >= noon_start) & (df['DateTime'] <= noon_end)
df_b = df.loc[mask].reset_index(drop=True)
df_b 


Unnamed: 0,DateTime,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
0,2022-07-20 12:00:00,24.876,49.461,0,19.500000,21.111111,1
1,2022-07-20 12:00:30,24.876,49.461,0,19.500000,21.111111,1
2,2022-07-20 12:01:00,24.900,49.464,0,19.500000,21.111111,1
3,2022-07-20 12:01:30,24.876,49.493,0,19.500000,21.111111,1
4,2022-07-20 12:02:00,24.900,49.528,0,19.500000,21.111111,1
...,...,...,...,...,...,...,...
17277,2022-07-23 11:58:00,25.312,50.411,1,19.444444,18.333333,1
17278,2022-07-23 11:58:30,25.312,50.411,1,19.444444,18.333333,1
17279,2022-07-23 11:59:00,25.312,50.411,1,19.444444,18.333333,1
17280,2022-07-23 11:59:30,25.312,50.411,1,19.444444,18.333333,1


In [22]:
df1, df0 = [x.reset_index(drop=True) for _, x in df_b.groupby(df_b['Room'] < 1)]

In [23]:

n = 30
a = len(df1)/n
eff_d = {}
delta_d = {}

for i in range(int(a)):
    # print(i)
    k = n*i
    arr1 = df1["Temp C"].iloc[k:k+n]
    arr2 = df0["Temp C"].iloc[k:k+n]
    eff_size = cohend(arr1, arr2)
    assert df1["DateTime"].iloc[k] == df0["DateTime"].iloc[k]
    eff_d[df1["DateTime"].iloc[k]] = eff_size
    delta_d[df1["DateTime"].iloc[k]] = rmse(arr1, arr2)

eff_df = pd.DataFrame(eff_d, index=[0]).T 
rmse_df = pd.DataFrame(delta_d, index=[0]).T 


In [24]:
def day_split(arr_of_dfs):
    """splits all the dfs in the arr_of_dfs list into smaller dataframes that are split by day"""
    arr_of_split_dfs = []
    for arr in arr_of_dfs:
        # dataframe might have timing information in the index, or in a column 
        try:
            daysplit_list = [group[1] for group in arr.groupby(arr.index.date)]
        except:
            daysplit_list = [group[1] for group in arr.groupby(arr["DateTime"].dt.date)]
        arr_of_split_dfs.append(daysplit_list)
        # print(daysplit_list)

    return arr_of_split_dfs

In [25]:
split_arrs =  day_split([rmse_df, df0, df1])
assert len(split_arrs[0]) == len(split_arrs[1]) == len(split_arrs[2])

In [52]:
df0_split = split_arrs[1][0] # df0
df0_split["DateTime"].dt.time

0       12:00:00
1       12:00:30
2       12:01:00
3       12:01:30
4       12:02:00
          ...   
1435    23:57:30
1436    23:58:00
1437    23:58:30
1438    23:59:00
1439    23:59:30
Name: DateTime, Length: 1440, dtype: object

In [50]:
dfr_split = split_arrs[0][0] # df0
dfr_split.index.time

array([datetime.time(12, 0), datetime.time(12, 15), datetime.time(12, 30),
       datetime.time(12, 45), datetime.time(13, 0), datetime.time(13, 15),
       datetime.time(13, 30), datetime.time(13, 45), datetime.time(14, 0),
       datetime.time(14, 15), datetime.time(14, 30),
       datetime.time(14, 45), datetime.time(15, 0), datetime.time(15, 15),
       datetime.time(15, 30), datetime.time(15, 45), datetime.time(16, 0),
       datetime.time(16, 15), datetime.time(16, 30),
       datetime.time(16, 45), datetime.time(17, 0), datetime.time(17, 15),
       datetime.time(17, 30), datetime.time(17, 45), datetime.time(18, 0),
       datetime.time(18, 15), datetime.time(18, 30),
       datetime.time(18, 45), datetime.time(19, 0), datetime.time(19, 15),
       datetime.time(19, 30), datetime.time(19, 45), datetime.time(20, 0),
       datetime.time(20, 15), datetime.time(20, 30),
       datetime.time(20, 45), datetime.time(21, 0), datetime.time(21, 15),
       datetime.time(21, 30), datetime

In [55]:
# would like to merge this data on time 
# find times that match this 
# df0_split["DateTime"].dt.time == dfr_split.index.time

time_mask = df0_split["DateTime"].dt.time.isin(dfr_split.index.time)
time_mask

In [58]:
ch = df0_split.loc[time_mask].reset_index(drop=True)
ch.head()

Unnamed: 0,DateTime,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
0,2022-07-20 12:00:00,24.876,49.461,0,19.5,21.111111,1
1,2022-07-20 12:15:00,24.924,49.754,0,19.5,21.111111,1
2,2022-07-20 12:30:00,24.973,49.888,0,19.5,21.111111,1
3,2022-07-20 12:45:00,24.9,49.974,0,19.5,21.111111,1
4,2022-07-20 13:00:00,24.9,49.624,0,20.5,18.888889,1


In [61]:
# assert that all time indices are the same 
assert (ch["DateTime"].dt.time == dfr_split.index.time).all()

In [46]:
fig = make_subplots(rows=5, cols=1, shared_xaxes=True,)

for df0, df1, rmse_df, ix in zip(split_arrs[1], split_arrs[2], split_arrs[0], range(len(split_arrs[0]))):
    
    fig.add_trace(go.Scatter(
    x=rmse_df.index.time,
    y=rmse_df[0],
    name="RMSE of Temperature in Rooms (15 min intervals)",
    mode='markers'
    ), row=ix+1, col=1)

    fig.add_trace(go.Scatter(
    x=df0["DateTime"].dt.time,
    y=normalize(df0["Temp C"]),
    name="Normalized Room 0 Temp",
    mode='markers'
    ), row=ix+1, col=1)


fig.update_layout(
                   xaxis_title='Dates',
)
fig.show()


In [27]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=rmse_df.index,
    y=rmse_df[0],
    name="RMSE of Temperature in Rooms (15 min intervals)"
))

fig.add_trace(go.Scatter(
    x=df0["DateTime"],
    y=df0["Window Open"],
    name="Room 0 Window Open"
))

fig.add_trace(go.Scatter(
    x=df1["DateTime"],
    y=df1["Window Open"],
    name="Room 1 Window Open"
))

fig.add_trace(go.Scatter(
    x=df0["DateTime"],
    y=normalize(df0["Temp C"]),
    name="Normalized Room 0 Temp"
))

fig.add_trace(go.Scatter(
    x=df1["DateTime"],
    y=normalize(df1["Temp C"]),
    name="Normalized Room 1 Temp"
))




fig.update_layout(title='Temperature, RMSE Comparison between Rooms',
                   xaxis_title='Dates',
                   yaxis_title='RMSE')

## comparison of averages 

In [28]:
df_15min = make_df_with_freq(df, "15T")
df_15min.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
Room,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,2022-07-20 07:15:00,23.1806,51.4211,0.0,14.333333,30.555556,1.0
0,2022-07-20 07:30:00,23.3258,51.759333,0.0,14.333333,30.555556,1.0
0,2022-07-20 07:45:00,23.4242,51.619233,0.0,14.333333,30.555556,1.0
0,2022-07-20 08:00:00,23.5314,51.2181,0.0,14.666667,30.555556,1.0
0,2022-07-20 08:15:00,23.6314,50.950367,0.0,14.666667,30.555556,1.0
