In [16]:
import json
import datetime
import pandas as pd
import numpy as np
import math
import sys
import os

import plotly.express as px
import plotly.graph_objects as go

import seaborn as sns
import seaborn.objects as so
# Apply the default theme
sns.set_theme()
import matplotlib.pyplot as plt

import sys
sys.path.insert(0, "../scripts")
from helpers import *

In [2]:
from numpy import mean
from numpy import var
from math import sqrt

In [3]:
df = pd.read_csv("../constants/td_ambient_102022.csv" )
df["DateTime"] = pd.to_datetime(df["DateTime"])
df.head()

Unnamed: 0,DateTime,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
0,2022-07-20 07:20:00,23.189,51.268,0,14.333333,30.555556,1
1,2022-07-20 07:20:30,23.189,51.362,0,14.333333,30.555556,1
2,2022-07-20 07:21:00,23.189,51.488,0,14.333333,30.555556,1
3,2022-07-20 07:21:30,23.189,51.457,0,14.333333,30.555556,1
4,2022-07-20 07:22:00,23.165,51.517,0,14.333333,30.555556,1


## comparison of point values

In [4]:
# group by room 0 or 1 
# group by 15 sec intervals 

In [5]:
dftime = df.copy()

# transform datetime index to actual pandas datetime, and set as index of dataframe 
dftime["DateTime"] = pd.to_datetime(dftime["DateTime"])
dftime.set_index("DateTime", inplace=True)

# split into two dataframes for each room 
# compare values over 15 minute intervals in each room 

In [7]:

with open('../constants/window_treatment.json') as f:    
    window_treatment = json.load(f)

exp_a_end = str2dt(window_treatment["072522"]["cutoff_times"]["end"])
exp_b_end = str2dt(window_treatment["081622"]["cutoff_times"]["end"])

# only consider experiment a 
df_a = df.loc[df["DateTime"] < exp_a_end]
df_a.head()

Unnamed: 0,DateTime,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
0,2022-07-20 07:20:00,23.189,51.268,0,14.333333,30.555556,1
1,2022-07-20 07:20:30,23.189,51.362,0,14.333333,30.555556,1
2,2022-07-20 07:21:00,23.189,51.488,0,14.333333,30.555556,1
3,2022-07-20 07:21:30,23.189,51.457,0,14.333333,30.555556,1
4,2022-07-20 07:22:00,23.165,51.517,0,14.333333,30.555556,1


In [8]:
df1, df2 = [x.reset_index(drop=True) for _, x in df_a.groupby(df_a['Room'] < 1)]

In [11]:
print(len(df_a), len(df))

23040 123606


In [19]:
# function to calculate Cohen's d for independent samples
def cohend(d1, d2):
    # calculate the size of samples
    n1, n2 = len(d1), len(d2)
    # calculate the variance of the samples
    s1, s2 = var(d1, ddof=1), var(d2, ddof=1)
    # calculate the pooled standard deviation
    s = sqrt(((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2))
    # calculate the means of the samples
    u1, u2 = mean(d1), mean(d2)
    # calculate the effect size
    return (u1 - u2) / s

In [20]:
def rmse(arr1, arr2):
    MSE = np.square(np.subtract(arr1, arr2)).mean() 
    RMSE = math.sqrt(MSE)
    return RMSE

In [21]:
n = 30
a = len(df1)/n
eff_d = {}
delta_d = {}

for i in range(int(a)):
    # print(i)
    k = n*i
    arr1 = df1["Temp C"].iloc[k:k+n]
    arr2 = df2["Temp C"].iloc[k:k+n]
    eff_size = cohend(arr1, arr2)
    assert df1["DateTime"].iloc[k] == df2["DateTime"].iloc[k]
    eff_d[df1["DateTime"].iloc[k]] = eff_size
    delta_d[df1["DateTime"].iloc[k]] = rmse(arr1, arr2)

eff_df = pd.DataFrame(eff_d, index=[0]).T 
rmse_df = pd.DataFrame(delta_d, index=[0]).T 
rmse_df

Unnamed: 0,0
2022-07-20 07:20:00,0.230367
2022-07-20 07:35:00,0.055771
2022-07-20 07:50:00,0.018067
2022-07-20 08:05:00,0.030984
2022-07-20 08:20:00,0.028818
...,...
2022-07-24 06:05:00,0.107242
2022-07-24 06:20:00,0.099824
2022-07-24 06:35:00,0.091704
2022-07-24 06:50:00,0.073843


## comparison of averages 

In [9]:
df_15min = make_df_with_freq(df, "15T")
df_15min.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Temp C,RH %,Room,Ambient Temp,Ambient RH,Window Open
Room,DateTime,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,2022-07-20 07:15:00,23.1806,51.4211,0.0,14.333333,30.555556,1.0
0,2022-07-20 07:30:00,23.3258,51.759333,0.0,14.333333,30.555556,1.0
0,2022-07-20 07:45:00,23.4242,51.619233,0.0,14.333333,30.555556,1.0
0,2022-07-20 08:00:00,23.5314,51.2181,0.0,14.666667,30.555556,1.0
0,2022-07-20 08:15:00,23.6314,50.950367,0.0,14.666667,30.555556,1.0
