# exploratory analysis examining at day to day trends 
* Jan 5, 2023
* Update Jan 14, 2023

In [3]:
import json
import datetime
import pandas as pd
import numpy as np
import math
import sys
import os
from collections import OrderedDict

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import seaborn as sns
import seaborn.objects as so
# Apply the default theme
sns.set_theme()
import matplotlib.pyplot as plt

import sys
sys.path.insert(0, "../scripts")
from helpers import *

In [4]:
df = pd.read_csv("../constants/td_ambient_102022.csv" )
df["DateTime"] = pd.to_datetime(df["DateTime"])
df.head()

Unnamed: 0,DateTime,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
0,2022-07-20 07:20:00,23.189,51.268,0,14.333333,30.555556,1
1,2022-07-20 07:20:30,23.189,51.362,0,14.333333,30.555556,1
2,2022-07-20 07:21:00,23.189,51.488,0,14.333333,30.555556,1
3,2022-07-20 07:21:30,23.189,51.457,0,14.333333,30.555556,1
4,2022-07-20 07:22:00,23.165,51.517,0,14.333333,30.555556,1


In [5]:
dftime = df.copy()

# transform datetime index to actual pandas datetime, and set as index of dataframe 
dftime["DateTime"] = pd.to_datetime(dftime["DateTime"])
dftime.set_index("DateTime", inplace=True)


In [6]:

with open('../constants/window_treatment.json') as f:    
    window_treatment = json.load(f)

exp_a_end = str2dt(window_treatment["072522"]["cutoff_times"]["end"])
exp_b_end = str2dt(window_treatment["081622"]["cutoff_times"]["end"])

# only consider experiment a 
df_a = df.loc[df["DateTime"] < exp_a_end]
df_a.head()

Unnamed: 0,DateTime,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
0,2022-07-20 07:20:00,23.189,51.268,0,14.333333,30.555556,1
1,2022-07-20 07:20:30,23.189,51.362,0,14.333333,30.555556,1
2,2022-07-20 07:21:00,23.189,51.488,0,14.333333,30.555556,1
3,2022-07-20 07:21:30,23.189,51.457,0,14.333333,30.555556,1
4,2022-07-20 07:22:00,23.165,51.517,0,14.333333,30.555556,1


In [30]:
# only  consider from noon to noon data  within the time frame of the first experiment 
noon_start = str2dt('2022, 07, 20, 12, 00')
noon_end = str2dt('2022, 07, 23, 12, 00')


mask = (df['DateTime'] >= noon_start) & (df['DateTime'] <= noon_end)
df_b = df.loc[mask].reset_index(drop=True)
df_b 


Unnamed: 0,DateTime,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
0,2022-07-20 12:00:00,24.876,49.461,0,19.500000,21.111111,1
1,2022-07-20 12:00:30,24.876,49.461,0,19.500000,21.111111,1
2,2022-07-20 12:01:00,24.900,49.464,0,19.500000,21.111111,1
3,2022-07-20 12:01:30,24.876,49.493,0,19.500000,21.111111,1
4,2022-07-20 12:02:00,24.900,49.528,0,19.500000,21.111111,1
...,...,...,...,...,...,...,...
17277,2022-07-23 11:58:00,25.312,50.411,1,19.444444,18.333333,1
17278,2022-07-23 11:58:30,25.312,50.411,1,19.444444,18.333333,1
17279,2022-07-23 11:59:00,25.312,50.411,1,19.444444,18.333333,1
17280,2022-07-23 11:59:30,25.312,50.411,1,19.444444,18.333333,1


In [31]:
# split dataframe into two dataframes, one for each room 
df1, df0 = [x.reset_index(drop=True) for _, x in df_b.groupby(df_b['Room'] < 1)]

n = 30 # 15 minute intervals in the data
a = len(df1)/n

# initialize dictionaries for collecting data
eff_d = {}
delta_d = {}
mbe_d = {}

for i in range(int(a)):
    # indexing to the desired time intereval in both dataframes
    k = n*i
    arr1 = df1["Temp C"].iloc[k:k+n]
    arr2 = df0["Temp C"].iloc[k:k+n]

    # ensure datetime alignment before computinf metrics 
    assert df1["DateTime"].iloc[k] == df0["DateTime"].iloc[k]

    # calculate 
    eff_d[df1["DateTime"].iloc[k]] = cohend(arr1, arr2)
    delta_d[df1["DateTime"].iloc[k]] = rmse(arr1, arr2)
    mbe_d[df1["DateTime"].iloc[k]] = mbe(arr1, arr2)

# transform dictionaries to datafames 
eff_df = pd.DataFrame(eff_d, index=[0]).T 
rmse_df = pd.DataFrame(delta_d, index=[0]).T 
mbe_df = pd.DataFrame(mbe_d, index=[0]).T 


In [32]:
def day_split(arr_of_dfs):
    """splits all the dfs in the arr_of_dfs list into smaller dataframes that are split by day"""
    arr_of_split_dfs = []
    for arr in arr_of_dfs:
        # dataframe might have timing information in the index, or in a column 
        try:
            daysplit_list = [group[1] for group in arr.groupby(arr.index.date)]
        except:
            daysplit_list = [group[1] for group in arr.groupby(arr["DateTime"].dt.date)]
        arr_of_split_dfs.append(daysplit_list)
        # print(daysplit_list)

    return arr_of_split_dfs

## faceted plot

In [33]:
split_arrs =  day_split([rmse_df, df0, df1])
assert len(split_arrs[0]) == len(split_arrs[1]) == len(split_arrs[2])

In [36]:
fig = make_subplots(rows=5, cols=1, shared_xaxes=True,)

for df0, df1, rmse_df, ix in zip(split_arrs[1], split_arrs[2], split_arrs[0], range(len(split_arrs[0]))):

    if ix==3:
        showlegendbool = True
    else:
        showlegendbool = False

    fig.add_trace(go.Scatter(
    x=rmse_df.index.time,
    y=rmse_df[0],
    name="RMSE of Temperature in Rooms (15 min intervals)",
    mode='markers',
    marker_color="black",
    showlegend=showlegendbool 
    ), row=ix+1, col=1)

    # adjust df time indices to match rmse df 
    time_mask = df0["DateTime"].dt.time.isin(rmse_df.index.time)
    df0_masked = df0.loc[time_mask].reset_index(drop=True)

    fig.add_trace(go.Scatter(
    x=df0_masked["DateTime"].dt.time,
    y=normalize(df0_masked["Temp C"]),
    name="Normalized Room 0 Temp",
    mode='markers',
    marker_symbol="x",
    marker_color="darkviolet",
    showlegend=showlegendbool 
    ), row=ix+1, col=1)


    # adjust df time indices to match rmse df 
    time_mask = df1["DateTime"].dt.time.isin(rmse_df.index.time)
    df1_masked = df1.loc[time_mask].reset_index(drop=True)
    

    fig.add_trace(go.Scatter(
    x=df1_masked["DateTime"].dt.time,
    y=normalize(df1_masked["Temp C"]),
    name="Normalized Room 1 Temp",
    mode='markers',
    marker_symbol="x",
    marker_color="green",
    showlegend=showlegendbool 

    ), row=ix+1, col=1)

    

fig.show()


## plot across days

In [35]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=rmse_df.index,
    y=rmse_df[0],
    name="RMSE of Temperature in Rooms (15 min intervals)"
))

fig.add_trace(go.Scatter(
    x=df0["DateTime"],
    y=df0["Window Open"],
    name="Room 0 Window Open"
))

fig.add_trace(go.Scatter(
    x=df1["DateTime"],
    y=df1["Window Open"],
    name="Room 1 Window Open"
))

fig.add_trace(go.Scatter(
    x=df0["DateTime"],
    y=normalize(df0["Temp C"]),
    name="Normalized Room 0 Temp"
))

fig.add_trace(go.Scatter(
    x=df1["DateTime"],
    y=normalize(df1["Temp C"]),
    name="Normalized Room 1 Temp"
))




fig.update_layout(title='Temperature, RMSE Comparison between Rooms',
                   xaxis_title='Dates',
                   yaxis_title='RMSE')