# refining analysis of day to day trends 
* Jan 18, 2023

In [1]:
import json
import datetime
import pandas as pd
import numpy as np
import math
import sys
import os
from collections import OrderedDict

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import seaborn as sns
import seaborn.objects as so
# Apply the default theme
sns.set_theme()
import matplotlib.pyplot as plt

import sys
sys.path.insert(0, "../scripts")
from helpers import *

In [2]:
# import data and fix indices 
df = pd.read_csv("../constants/td_ambient_102022.csv" )
df["DateTime"] = pd.to_datetime(df["DateTime"])
dftime = df.copy()

# transform datetime index to actual pandas datetime, and set as index of dataframe 
dftime["DateTime"] = pd.to_datetime(dftime["DateTime"])
dftime.set_index("DateTime", inplace=True)

In [3]:
# narrow current scope to first experiment, from noon on first day to noon on last day 
with open('../constants/window_treatment.json') as f:    
    window_treatment = json.load(f)

exp_a_end = str2dt(window_treatment["072522"]["cutoff_times"]["end"])
exp_b_end = str2dt(window_treatment["081622"]["cutoff_times"]["end"])

# only consider experiment a 
df_a = df.loc[df["DateTime"] < exp_a_end]
df_a.head()

# only  consider from noon to noon data  within the time frame of the first experiment 
noon_start = str2dt('2022, 07, 20, 12, 00')
noon_end = str2dt('2022, 07, 23, 12, 00')


mask = (df['DateTime'] >= noon_start) & (df['DateTime'] <= noon_end)
df_b = df.loc[mask].reset_index(drop=True)
df_b 



Unnamed: 0,DateTime,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
0,2022-07-20 12:00:00,24.876,49.461,0,19.500000,21.111111,1
1,2022-07-20 12:00:30,24.876,49.461,0,19.500000,21.111111,1
2,2022-07-20 12:01:00,24.900,49.464,0,19.500000,21.111111,1
3,2022-07-20 12:01:30,24.876,49.493,0,19.500000,21.111111,1
4,2022-07-20 12:02:00,24.900,49.528,0,19.500000,21.111111,1
...,...,...,...,...,...,...,...
17277,2022-07-23 11:58:00,25.312,50.411,1,19.444444,18.333333,1
17278,2022-07-23 11:58:30,25.312,50.411,1,19.444444,18.333333,1
17279,2022-07-23 11:59:00,25.312,50.411,1,19.444444,18.333333,1
17280,2022-07-23 11:59:30,25.312,50.411,1,19.444444,18.333333,1


In [4]:
# split dataframe into two dataframes, one for each room 
df1, df0 = [x.reset_index(drop=True) for _, x in df_b.groupby(df_b['Room'] < 1)]

In [5]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df0["DateTime"],
    y=df0["Temp C"], 
    mode='lines',
    name="room 0 "
))

fig.add_trace(go.Scatter(
    x=df1["DateTime"],
    y=df1["Temp C"], 
    mode='lines',
    name="room 1 "
))


fig.update_layout(title='Room Data',
                   xaxis_title='Dates',
                   yaxis_title='Temperature (ºC)')


In [6]:
df1

Unnamed: 0,DateTime,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
0,2022-07-20 12:00:00,25.070,51.205,1,19.500000,21.111111,1
1,2022-07-20 12:00:30,25.045,51.233,1,19.500000,21.111111,1
2,2022-07-20 12:01:00,25.045,51.265,1,19.500000,21.111111,1
3,2022-07-20 12:01:30,25.070,51.332,1,19.500000,21.111111,1
4,2022-07-20 12:02:00,25.070,51.332,1,19.500000,21.111111,1
...,...,...,...,...,...,...,...
8636,2022-07-23 11:58:00,25.312,50.411,1,19.444444,18.333333,1
8637,2022-07-23 11:58:30,25.312,50.411,1,19.444444,18.333333,1
8638,2022-07-23 11:59:00,25.312,50.411,1,19.444444,18.333333,1
8639,2022-07-23 11:59:30,25.312,50.411,1,19.444444,18.333333,1


In [7]:
# split dataframe into two dataframes, one for each room 
df1, df0 = [x.reset_index(drop=True) for _, x in df_b.groupby(df_b['Room'] < 1)]

# normalize the data 
df0["Norm Temp C"] = normalize(df0["Temp C"])
df1["Norm Temp C"] = normalize(df1["Temp C"])

n = 30 # 15 minute intervals in the data
a = len(df1)/n

# initialize dictionaries for collecting data
eff_d = {}
delta_d = {}
mbe_d = {}
mean_temps = {}

for i in range(int(a)):
    # indexing to the desired time intereval in both dataframes
    k = n*i
    arr0 = df0["Norm Temp C"].iloc[k:k+n]
    arr1 = df1["Norm Temp C"].iloc[k:k+n]

    # ensure datetime alignment before computinf metrics 
    assert df1["DateTime"].iloc[k] == df0["DateTime"].iloc[k]

    # calculate 
    eff_d[df1["DateTime"].iloc[k]] = cohend(arr0, arr1)
    delta_d[df1["DateTime"].iloc[k]] = rmse(arr0, arr1)
    mbe_d[df1["DateTime"].iloc[k]] = mbe(arr0, arr1)

    # calculate the mean temp in the time period for plotting purposes 
    mean_temps[df1["DateTime"].iloc[k]] = [np.mean(arr0), np.mean(arr1)]

# transform dictionaries to datafames 
eff_df = pd.DataFrame(eff_d, index=[0]).T 
rmse_df = pd.DataFrame(delta_d, index=[0]).T 
mbe_df = pd.DataFrame(mbe_d, index=[0]).T 
norm_temp_df = pd.DataFrame(mean_temps).T


In [10]:
split_arrs =  day_split([rmse_df, norm_temp_df[0], norm_temp_df[1]])
assert len(split_arrs[0]) == len(split_arrs[1]) == len(split_arrs[2])

In [18]:
# split_arrs[1][0]

In [23]:
fig = make_subplots(rows=5, cols=1, shared_xaxes=True,)

for df0, df1, rmse_df, ix in zip(split_arrs[1], split_arrs[2], split_arrs[0], range(len(split_arrs[0]))):

    if ix==3:
        showlegendbool = True
    else:
        showlegendbool = False

    fig.add_trace(go.Scatter(
    x=rmse_df.index.time,
    y=rmse_df[0],
    name="RMSE of Temperature in Rooms (15 min intervals)",
    mode='markers',
    marker_color="black",
    showlegend=showlegendbool 
    ), row=ix+1, col=1)


    fig.add_trace(go.Scatter(
    x=df0.index.time,
    y=df0,
    name="Room 0 Temp",
    mode='markers',
    marker_color="darkviolet",
    showlegend=showlegendbool 
    ), row=ix+1, col=1)

    fig.add_trace(go.Scatter(
    x=df1.index.time,
    y=df1,
    name="Room 1 Temp",
    mode='markers',
    marker_color="green",
    showlegend=showlegendbool 
    ), row=ix+1, col=1)


fig.show()

In [14]:
np.mean(arr0)

25.312800000000003

In [13]:
rmse_df

Unnamed: 0,0
2022-07-20 12:00:00,0.171922
2022-07-20 12:15:00,0.166212
2022-07-20 12:30:00,0.161429
2022-07-20 12:45:00,0.173168
2022-07-20 13:00:00,0.160652
...,...
2022-07-23 10:45:00,0.538074
2022-07-23 11:00:00,0.526625
2022-07-23 11:15:00,0.511596
2022-07-23 11:30:00,0.508483


In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=df0["DateTime"],
    y=df0["Temp C"], 
    mode='lines',
    name="room 0 "
))

fig.add_trace(go.Scatter(
    x=df1["DateTime"],
    y=df1["Temp C"], 
    mode='lines',
    name="room 1 "
))

In [None]:
# normalize the measured temperature data and calculate a rolling average