# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/CT-day/2017-jan-day-CT.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9b51dbf3db985732926fa306564f61277b9b6bfa,2017-01-28 19:50:00 UTC,heat,hold,674,704,704,CT,Orange,45,False,False,False,Gas
5,cdc4e0fa4b56e51585b5a93bae44b51150ca4860,2017-01-24 17:25:00 UTC,heat,hold,639,650,627,CT,Windsor Locks,30,False,False,False,Gas
6,90311ac9900b2ba8a03e5f6348c84a144db460b1,2017-01-31 19:30:00 UTC,heat,hold,623,650,635,CT,Mansfield,20,True,False,True,Electric
7,79c1501945e02406136a1cfaba35a338a2727fc5,2017-01-07 17:25:00 UTC,heat,auto,668,792,606,CT,killingworth,15,False,False,False,Gas
8,ecc6ddba35a42fcb31e3c9649a554a96037d34b5,2017-01-27 12:40:00 UTC,heat,hold,668,800,605,CT,Redding,30,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118741,628c5a78fbbbace955cfd5e8c0e384c28e9c0fd7,2017-01-19 14:05:00 UTC,heat,hold,758,760,760,CT,Norwalk,5,False,False,False,Gas
118742,628c5a78fbbbace955cfd5e8c0e384c28e9c0fd7,2017-01-27 12:50:00 UTC,heat,hold,757,760,760,CT,Norwalk,5,False,False,False,Gas
118743,0d4871b896fc2a5b93007ca0a3a0799970fbf168,2017-01-19 11:30:00 UTC,heat,hold,760,760,760,CT,New Milford,15,False,False,False,Gas
118744,0d4871b896fc2a5b93007ca0a3a0799970fbf168,2017-01-19 12:15:00 UTC,heat,hold,752,760,760,CT,New Milford,15,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
01b5776ab24d14ace90a8296c49941a4ba8b0173,Jan,2017,auto,hold,New MIlford,683.484731,800.000000,685.509677,5.0,False,False,False
05d474a41ce90b51ae9627f1dc2a5214d67b2741,Jan,2017,heat,auto,Guilford,630.937500,650.000000,620.000000,35.0,False,False,False
0a751de00d076714c9f1c136f2203fae474b7ace,Jan,2017,heat,auto,East Hampton,652.627729,680.000000,640.000000,0.0,False,False,False
0b3c9b534cf69257d66a1102858406468dd61627,Jan,2017,heat,auto,Greenwich,665.114754,665.004918,664.940984,0.0,False,False,False
0c0744b5e162dc1ad0a5e47d53e8650ba8ade8cf,Jan,2017,heat,auto,Hamden,693.103448,760.000000,680.462644,25.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
f91e491da4d3c5ed8917b116ec11e55dfd5104b8,Jan,2017,heat,auto,killingworth,654.150000,753.300000,626.350000,0.0,False,False,False
f91e491da4d3c5ed8917b116ec11e55dfd5104b8,Jan,2017,heat,hold,killingworth,658.326087,668.195652,663.173913,0.0,False,False,False
fb4ba55769c3bfb68a844bfd68c7b38ca7e1d559,Jan,2017,heat,hold,Wilton,671.000000,660.000000,660.000000,0.0,False,False,False
fc58d08fd7038c94b9cfcb3287a2bbb6e095745f,Jan,2017,auto,auto,Shelton,684.416667,745.416667,693.416667,25.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/CT/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/CT-day/2018-jan-day-CT.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,0382f14f425de2d9f9645def17ee3cfeada55f78,2018-01-25 12:20:00 UTC,heat,hold,700,705,705,CT,Waterford,25,False,False,True,Electric
3,0382f14f425de2d9f9645def17ee3cfeada55f78,2018-01-25 12:50:00 UTC,heat,hold,705,705,705,CT,Waterford,25,False,False,True,Electric
5,4c5caa1e348acf2719f577d8a4727442ca33b60d,2018-01-02 14:15:00 UTC,auto,hold,690,740,682,CT,Barkhamsted,30,False,False,False,Gas
7,0382f14f425de2d9f9645def17ee3cfeada55f78,2018-01-25 13:05:00 UTC,heat,hold,703,705,705,CT,Waterford,25,False,False,True,Electric
8,0b8e511ebb362a92c2b02469a1887fd2a1f86998,2018-01-13 15:55:00 UTC,heat,auto,650,745,613,CT,Monroe,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
371224,81b9f64b6f86312ce534fc4a17536a498b3d48a6,2018-01-06 16:30:00 UTC,heat,auto,734,760,760,CT,East Hartford,50,False,False,False,Gas
371225,81b9f64b6f86312ce534fc4a17536a498b3d48a6,2018-01-06 19:10:00 UTC,heat,auto,764,760,760,CT,East Hartford,50,False,False,False,Gas
371226,c6e64f267606c1a1b657786ba82cc328dc7722d6,2018-01-02 13:25:00 UTC,heat,hold,758,760,760,CT,Groton,66,False,False,False,Gas
371227,4eb272b24b720ab73a573b5d6c69346a7ccac79f,2018-01-15 17:10:00 UTC,heat,hold,748,760,760,CT,Greenwich,10,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/CT/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/CT-day/2019-jan-day-CT.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c73238644b4d7d318233868c0fbae5a18eda1467,2019-01-30 15:30:00 UTC,heat,hold,695,699,699,CT,Wallingford,10,False,False,False,Gas
1,4a8a1907ceccf4b74bd4173088eeeb88faf2a18b,2019-01-22 18:20:00 UTC,heat,hold,703,701,701,CT,Windsor,10,False,False,False,Gas
2,7570fc772e83a3fe72dd8ba13bcc3e56410e40fb,2019-01-27 16:45:00 UTC,auto,hold,669,715,675,CT,Norwich,5,False,False,False,Gas
3,4a8a1907ceccf4b74bd4173088eeeb88faf2a18b,2019-01-12 15:05:00 UTC,heat,hold,684,681,681,CT,Windsor,10,False,False,False,Gas
4,c73238644b4d7d318233868c0fbae5a18eda1467,2019-01-24 19:25:00 UTC,heat,hold,712,701,701,CT,Wallingford,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
546302,a6a33cee140a4ee6cc5c9b875fe15765800fea90,2019-01-15 12:00:00 UTC,auto,hold,753,820,760,CT,Ridgefield,50,False,False,False,Gas
546303,a6a33cee140a4ee6cc5c9b875fe15765800fea90,2019-01-13 11:05:00 UTC,auto,auto,752,820,760,CT,Ridgefield,50,False,False,False,Gas
546304,a6a33cee140a4ee6cc5c9b875fe15765800fea90,2019-01-20 19:00:00 UTC,auto,hold,743,820,760,CT,Ridgefield,50,False,False,False,Gas
546305,a6a33cee140a4ee6cc5c9b875fe15765800fea90,2019-01-15 11:05:00 UTC,auto,hold,741,820,760,CT,Ridgefield,50,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/CT/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/CT-day/2020-jan-day-CT.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,35b533b57d9a9b1f34214bef2b0e4373ebce9450,2020-01-28 17:50:00 UTC,heat,hold,707,715,715,CT,Windsor,10,False,False,False,Gas
4,c2f1a84ed18ee42d39add99fe5e0e1c50fd288e3,2020-01-06 13:55:00 UTC,heat,hold,674,799,629,CT,Mystic,116,False,False,False,Gas
5,1c0a777272f175574b611258b25e2b096fdac9df,2020-01-11 16:35:00 UTC,heat,auto,656,696,654,CT,Deep River,120,False,False,False,Gas
6,4a8a1907ceccf4b74bd4173088eeeb88faf2a18b,2020-01-23 12:15:00 UTC,heat,hold,704,685,685,CT,Windsor,10,False,False,False,Gas
7,f4091c17f6d7dfa998f53a3e0159dbb214355c7f,2020-01-11 12:35:00 UTC,heat,auto,731,705,705,CT,Enfield,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544564,f8378dd5e363e435e2bb8f46ebd81e9e6f23af76,2020-01-30 07:50:00 UTC,heat,auto,740,780,760,CT,Wilton,47,False,False,False,Gas
544565,f8378dd5e363e435e2bb8f46ebd81e9e6f23af76,2020-01-30 08:35:00 UTC,heat,auto,743,780,760,CT,Wilton,47,False,False,False,Gas
544566,f8378dd5e363e435e2bb8f46ebd81e9e6f23af76,2020-01-30 08:00:00 UTC,heat,auto,741,780,760,CT,Wilton,47,False,False,False,Gas
544567,f8378dd5e363e435e2bb8f46ebd81e9e6f23af76,2020-01-30 07:25:00 UTC,heat,auto,736,780,760,CT,Wilton,47,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/CT/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/CT-day/2021-jan-day-CT.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3e7dd224b06e8259cc6c22ad07c6531900f383c3,2021-01-04 19:15:00 UTC,heat,hold,639,650,644,CT,cromwell,30,False,False,False,Gas
1,214f28562b2c6edb923553c2da33e34308417be2,2021-01-07 15:45:00 UTC,auto,hold,719,769,719,CT,Colchester,10,False,False,False,Gas
2,ced1ff247136fb45b6f827deb5cb0c894d162471,2021-01-18 17:30:00 UTC,heat,hold,703,699,699,CT,Prospect,65,False,False,False,Gas
3,b237ed0ef6f08e768c0cf6b7f91fbee3555fcd85,2021-01-08 14:25:00 UTC,heat,hold,669,677,677,CT,Enfield,35,True,False,False,Gas
4,dd42deba452d2cea8e2ddb632972e864194a15b9,2021-01-21 19:45:00 UTC,heat,hold,643,650,643,CT,Madison,50,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
383790,251bf868c241659af914a268398fee21565c16c4,2021-01-21 15:00:00 UTC,heat,hold,764,760,760,CT,Naugatuck,30,False,False,False,Gas
383791,251bf868c241659af914a268398fee21565c16c4,2021-01-21 19:45:00 UTC,heat,hold,756,760,760,CT,Naugatuck,30,False,False,False,Gas
383792,251bf868c241659af914a268398fee21565c16c4,2021-01-29 14:55:00 UTC,heat,hold,731,760,760,CT,Naugatuck,30,False,False,False,Gas
383793,251bf868c241659af914a268398fee21565c16c4,2021-01-21 18:45:00 UTC,heat,hold,765,760,760,CT,Naugatuck,30,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/CT/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/CT/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
CT_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/CT/jan/" + file)
    CT_jan = pd.concat([CT_jan, df])
    
CT_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01b5776ab24d14ace90a8296c49941a4ba8b0173,Jan,2017,auto,hold,New MIlford,683.484731,800.000000,685.509677,5.0,False,False,False
1,05d474a41ce90b51ae9627f1dc2a5214d67b2741,Jan,2017,heat,auto,Guilford,630.937500,650.000000,620.000000,35.0,False,False,False
2,0a751de00d076714c9f1c136f2203fae474b7ace,Jan,2017,heat,auto,East Hampton,652.627729,680.000000,640.000000,0.0,False,False,False
3,0b3c9b534cf69257d66a1102858406468dd61627,Jan,2017,heat,auto,Greenwich,665.114754,665.004918,664.940984,0.0,False,False,False
4,0c0744b5e162dc1ad0a5e47d53e8650ba8ade8cf,Jan,2017,heat,auto,Hamden,693.103448,760.000000,680.462644,25.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
421,fc58d08fd7038c94b9cfcb3287a2bbb6e095745f,Jan,2021,auto,hold,Shelton,695.602740,749.904110,699.904110,25.0,False,False,False
422,fc92f3d7ab046b1e0482835f5cf10e18372dc434,Jan,2021,auto,hold,Fairfield,700.048077,750.918269,697.716346,40.0,True,False,True
423,fef94e274991646e0f28d91d1aaea3cd709d8825,Jan,2021,heat,hold,New Canaan,623.134771,650.000000,611.832884,0.0,False,False,False
424,ff498c11fdbb5d698084e0f03a29b69e76ee8566,Jan,2021,heat,hold,North Haven,695.836999,692.338292,692.338292,70.0,False,False,False


In [34]:
CT_jan.to_csv("Scraper_Output/State_Month_Day/CT/CT_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/CT-day/2017-feb-day-CT.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,ecc6ddba35a42fcb31e3c9649a554a96037d34b5,2017-02-06 12:30:00 UTC,heat,hold,666,650,631,CT,Redding,30,True,False,False,Gas
5,215c2dfd21213f343402901d6a5bee6dae1c5718,2017-02-11 19:15:00 UTC,auto,hold,676,780,657,CT,Orange,0,True,False,False,Gas
6,90311ac9900b2ba8a03e5f6348c84a144db460b1,2017-02-04 19:10:00 UTC,heat,hold,638,676,644,CT,Mansfield,20,True,False,True,Electric
8,f1c7933114855923a9cf2d59719743f601e2da8c,2017-02-04 17:40:00 UTC,heat,auto,699,686,686,CT,Norwich,45,False,False,False,Gas
9,edddbdf7c078f31fcbb10678a197397afebd2601,2017-02-17 17:55:00 UTC,heat,hold,692,665,665,CT,Old Mystic,70,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113459,c6e64f267606c1a1b657786ba82cc328dc7722d6,2017-02-10 19:05:00 UTC,heat,hold,758,760,760,CT,Groton,66,False,False,False,Gas
113460,c6e64f267606c1a1b657786ba82cc328dc7722d6,2017-02-07 16:00:00 UTC,heat,auto,761,760,760,CT,Groton,66,False,False,False,Gas
113461,c6e64f267606c1a1b657786ba82cc328dc7722d6,2017-02-07 14:40:00 UTC,heat,auto,755,760,760,CT,Groton,66,False,False,False,Gas
113462,c6e64f267606c1a1b657786ba82cc328dc7722d6,2017-02-10 18:00:00 UTC,heat,hold,758,760,760,CT,Groton,66,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/CT/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/CT-day/2018-feb-day-CT.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cdc4e0fa4b56e51585b5a93bae44b51150ca4860,2018-02-24 15:00:00 UTC,auto,auto,636,685,635,CT,Windsor Locks,30,False,False,False,Gas
3,cdc4e0fa4b56e51585b5a93bae44b51150ca4860,2018-02-26 19:45:00 UTC,auto,auto,637,685,635,CT,Windsor Locks,30,False,False,False,Gas
5,cdc4e0fa4b56e51585b5a93bae44b51150ca4860,2018-02-24 11:40:00 UTC,auto,auto,635,685,635,CT,Windsor Locks,30,False,False,False,Gas
8,f4091c17f6d7dfa998f53a3e0159dbb214355c7f,2018-02-11 15:35:00 UTC,heat,auto,738,802,667,CT,Enfield,60,False,False,False,Gas
9,e62b302fe4554f54348b4c85776c7983d5d1ce5e,2018-02-27 11:35:00 UTC,heat,hold,718,716,716,CT,Farmington,7,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
352385,e41a3c5004a67cafeafb255716f89a431046249d,2018-02-24 17:20:00 UTC,heat,hold,761,760,760,CT,Greenwich,10,False,False,False,Gas
352386,0d4871b896fc2a5b93007ca0a3a0799970fbf168,2018-02-16 16:15:00 UTC,heat,hold,757,760,760,CT,New Milford,15,False,False,False,Gas
352387,0d4871b896fc2a5b93007ca0a3a0799970fbf168,2018-02-16 15:10:00 UTC,heat,hold,756,760,760,CT,New Milford,15,False,False,False,Gas
352388,0d4871b896fc2a5b93007ca0a3a0799970fbf168,2018-02-16 11:10:00 UTC,heat,hold,760,760,760,CT,New Milford,15,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/CT/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/CT-day/2019-feb-day-CT.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c73238644b4d7d318233868c0fbae5a18eda1467,2019-02-04 18:25:00 UTC,heat,hold,719,709,709,CT,Wallingford,10,False,False,False,Gas
2,c73238644b4d7d318233868c0fbae5a18eda1467,2019-02-01 13:45:00 UTC,heat,hold,701,699,699,CT,Wallingford,10,False,False,False,Gas
3,7570fc772e83a3fe72dd8ba13bcc3e56410e40fb,2019-02-03 13:30:00 UTC,auto,auto,656,705,665,CT,Norwich,5,False,False,False,Gas
4,c73238644b4d7d318233868c0fbae5a18eda1467,2019-02-15 18:40:00 UTC,heat,hold,695,699,699,CT,Wallingford,10,False,False,False,Gas
5,528482e17661477da3fa31d0b3d158c6fb72e9aa,2019-02-12 13:55:00 UTC,heat,hold,641,672,672,CT,Woodstock,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
367059,0d4871b896fc2a5b93007ca0a3a0799970fbf168,2019-02-28 14:45:00 UTC,heat,hold,758,760,760,CT,New Milford,15,False,False,False,Gas
367060,0d4871b896fc2a5b93007ca0a3a0799970fbf168,2019-02-28 13:45:00 UTC,heat,hold,758,760,760,CT,New Milford,15,False,False,False,Gas
367061,0d4871b896fc2a5b93007ca0a3a0799970fbf168,2019-02-28 18:00:00 UTC,heat,hold,761,760,760,CT,New Milford,15,False,False,False,Gas
367062,0d4871b896fc2a5b93007ca0a3a0799970fbf168,2019-02-28 17:55:00 UTC,heat,hold,758,760,760,CT,New Milford,15,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/CT/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/CT-day/2020-feb-day-CT.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f4091c17f6d7dfa998f53a3e0159dbb214355c7f,2020-02-09 14:00:00 UTC,heat,hold,710,711,711,CT,Enfield,60,False,False,False,Gas
2,b5fdea0595d25b4775cbef1b9dac1f2857940c7d,2020-02-11 12:55:00 UTC,auto,auto,656,780,627,CT,Tolland,25,False,False,False,Gas
4,a8bf398ef2aaff7fe69277442a23274c472e658e,2020-02-18 18:50:00 UTC,heat,hold,708,702,702,CT,East haven,65,False,False,False,Gas
11,b9eb493652b04bdbd9a25c27c17a34f3e00646cd,2020-02-14 15:00:00 UTC,heat,auto,691,770,708,CT,Orange,55,False,False,False,Gas
12,105d4bcd5c19ff6242a31be48b095281b1f41eec,2020-02-15 18:15:00 UTC,heat,hold,702,705,705,CT,Watertown,25,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
500242,764deab9260532acb2bb9a882b120eb77973625c,2020-02-18 19:15:00 UTC,auto,hold,723,820,760,CT,Fairfield,0,True,False,False,Gas
500243,a6a33cee140a4ee6cc5c9b875fe15765800fea90,2020-02-15 19:40:00 UTC,auto,hold,751,810,760,CT,Ridgefield,50,False,False,False,Gas
500244,a6a33cee140a4ee6cc5c9b875fe15765800fea90,2020-02-15 18:40:00 UTC,auto,hold,747,810,760,CT,Ridgefield,50,False,False,False,Gas
500245,a6a33cee140a4ee6cc5c9b875fe15765800fea90,2020-02-15 18:55:00 UTC,auto,hold,761,810,760,CT,Ridgefield,50,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/CT/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/CT-day/2021-feb-day-CT.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ced1ff247136fb45b6f827deb5cb0c894d162471,2021-02-15 17:55:00 UTC,heat,hold,699,702,702,CT,Prospect,65,False,False,False,Gas
1,35b533b57d9a9b1f34214bef2b0e4373ebce9450,2021-02-10 18:55:00 UTC,heat,hold,730,735,735,CT,Windsor,10,False,False,False,Gas
2,35b533b57d9a9b1f34214bef2b0e4373ebce9450,2021-02-22 11:40:00 UTC,heat,hold,723,725,725,CT,Windsor,10,False,False,False,Gas
3,35b533b57d9a9b1f34214bef2b0e4373ebce9450,2021-02-01 14:40:00 UTC,heat,hold,724,748,748,CT,Windsor,10,False,False,False,Gas
4,35b533b57d9a9b1f34214bef2b0e4373ebce9450,2021-02-20 15:15:00 UTC,heat,hold,710,715,715,CT,Windsor,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
350768,251bf868c241659af914a268398fee21565c16c4,2021-02-23 08:45:00 UTC,heat,hold,750,760,760,CT,Naugatuck,30,False,False,False,Gas
350769,d844acbe1727e39f17d98fc6505c5afb592cb458,2021-02-26 09:10:00 UTC,heat,hold,746,760,760,CT,Torrington,15,False,False,False,Gas
350770,d844acbe1727e39f17d98fc6505c5afb592cb458,2021-02-26 09:00:00 UTC,heat,hold,760,760,760,CT,Torrington,15,False,False,False,Gas
350771,d844acbe1727e39f17d98fc6505c5afb592cb458,2021-02-26 08:05:00 UTC,heat,hold,752,760,760,CT,Torrington,15,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/CT/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/CT/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
CT_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/CT/feb/" + file)
    CT_feb = pd.concat([CT_feb, df])
    
CT_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01b5776ab24d14ace90a8296c49941a4ba8b0173,feb,2017,auto,hold,New MIlford,688.446360,800.000000,690.000000,5.0,False,False,False
1,05d474a41ce90b51ae9627f1dc2a5214d67b2741,feb,2017,heat,hold,Guilford,689.846154,695.000000,695.000000,35.0,False,False,False
2,0a751de00d076714c9f1c136f2203fae474b7ace,feb,2017,heat,auto,East Hampton,646.242312,680.000000,638.308733,0.0,False,False,False
3,0b3c9b534cf69257d66a1102858406468dd61627,feb,2017,heat,auto,Greenwich,669.248031,670.000000,670.000000,0.0,False,False,False
4,0c0744b5e162dc1ad0a5e47d53e8650ba8ade8cf,feb,2017,heat,auto,Hamden,701.683333,760.000000,680.491667,25.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
411,fc58d08fd7038c94b9cfcb3287a2bbb6e095745f,feb,2021,auto,hold,Shelton,694.914634,750.000000,700.000000,25.0,False,False,False
412,fc92f3d7ab046b1e0482835f5cf10e18372dc434,feb,2021,heat,hold,Fairfield,674.650000,699.900000,699.316667,40.0,True,False,True
413,fef94e274991646e0f28d91d1aaea3cd709d8825,feb,2021,heat,hold,New Canaan,613.409683,650.000000,610.355680,0.0,False,False,False
414,ff498c11fdbb5d698084e0f03a29b69e76ee8566,feb,2021,heat,hold,North Haven,693.165198,689.720264,689.720264,70.0,False,False,False


In [67]:
CT_feb.to_csv("Scraper_Output/State_Month_Day/CT/CT_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/CT-day/2017-jun-day-CT.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a83e1ff380a8949f1ca87840e41db535065e5120,2017-06-16 17:30:00 UTC,auto,auto,739,735,685,CT,Suffield,20,False,False,False,Gas
1,a83e1ff380a8949f1ca87840e41db535065e5120,2017-06-11 14:45:00 UTC,auto,auto,737,735,685,CT,Suffield,20,False,False,False,Gas
2,565c7e8b066f34db999e142b69b61c1a6c189829,2017-06-22 16:10:00 UTC,cool,hold,783,817,662,CT,Killingworth,15,False,False,False,Gas
3,b237ed0ef6f08e768c0cf6b7f91fbee3555fcd85,2017-06-13 09:05:00 UTC,auto,auto,729,725,675,CT,Enfield,35,True,False,False,Gas
4,a83e1ff380a8949f1ca87840e41db535065e5120,2017-06-14 19:45:00 UTC,auto,auto,739,735,685,CT,Suffield,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170851,966ada58bdafef93eef95ff34ccb024ef8052f62,2017-06-02 11:25:00 UTC,cool,hold,698,760,760,CT,wethersfield,100,False,False,True,Electric
170852,966ada58bdafef93eef95ff34ccb024ef8052f62,2017-06-02 14:10:00 UTC,cool,hold,711,760,760,CT,wethersfield,100,False,False,True,Electric
170853,966ada58bdafef93eef95ff34ccb024ef8052f62,2017-06-02 13:25:00 UTC,cool,hold,709,760,760,CT,wethersfield,100,False,False,True,Electric
170854,966ada58bdafef93eef95ff34ccb024ef8052f62,2017-06-23 18:30:00 UTC,cool,hold,764,760,760,CT,wethersfield,100,False,False,True,Electric


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/CT/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/CT-day/2018-jun-day-CT.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,528482e17661477da3fa31d0b3d158c6fb72e9aa,2018-06-20 13:30:00 UTC,cool,hold,699,782,782,CT,Woodstock,10,False,False,False,Gas
1,e9870d26f289a344a43ab52d729cb869f3baaa62,2018-06-26 17:10:00 UTC,auto,auto,710,725,675,CT,Broad Brook,60,False,False,False,Gas
2,528482e17661477da3fa31d0b3d158c6fb72e9aa,2018-06-01 17:15:00 UTC,auto,hold,691,692,632,CT,Woodstock,10,False,False,False,Gas
3,528482e17661477da3fa31d0b3d158c6fb72e9aa,2018-06-29 15:55:00 UTC,cool,hold,703,702,702,CT,Woodstock,10,False,False,False,Gas
4,528482e17661477da3fa31d0b3d158c6fb72e9aa,2018-06-18 17:55:00 UTC,cool,hold,717,742,742,CT,Woodstock,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
325820,966ada58bdafef93eef95ff34ccb024ef8052f62,2018-06-22 16:30:00 UTC,cool,hold,732,760,760,CT,wethersfield,100,False,False,True,Electric
325821,966ada58bdafef93eef95ff34ccb024ef8052f62,2018-06-25 17:20:00 UTC,cool,hold,739,760,760,CT,wethersfield,100,False,False,True,Electric
325822,966ada58bdafef93eef95ff34ccb024ef8052f62,2018-06-27 12:05:00 UTC,cool,auto,707,760,760,CT,wethersfield,100,False,False,True,Electric
325823,966ada58bdafef93eef95ff34ccb024ef8052f62,2018-06-17 19:25:00 UTC,cool,hold,743,760,760,CT,wethersfield,100,False,False,True,Electric


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/CT/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/CT-day/2019-jun-day-CT.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,564c6aec11077a013c08daef6f741cea54a99227,2019-06-02 12:55:00 UTC,cool,hold,684,685,685,CT,Southbury,40,False,False,False,Gas
1,564c6aec11077a013c08daef6f741cea54a99227,2019-06-14 10:00:00 UTC,cool,hold,678,745,732,CT,Southbury,40,False,False,False,Gas
2,0382f14f425de2d9f9645def17ee3cfeada55f78,2019-06-20 17:20:00 UTC,cool,hold,708,705,705,CT,Waterford,25,False,False,True,Electric
3,e9870d26f289a344a43ab52d729cb869f3baaa62,2019-06-01 11:20:00 UTC,auto,auto,694,725,675,CT,Broad Brook,60,False,False,False,Gas
4,f84fcb0cde327263283f23cea32fdd90e0e0d48e,2019-06-29 17:20:00 UTC,cool,hold,726,724,724,CT,Wethersfield,20,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
416637,7d4a998520c8f7836cbde9e5d1321bbe2f945076,2019-06-29 10:15:00 UTC,cool,hold,762,760,760,CT,West Hartford,75,True,False,False,Gas
416638,7d4a998520c8f7836cbde9e5d1321bbe2f945076,2019-06-29 19:35:00 UTC,cool,hold,763,760,760,CT,West Hartford,75,True,False,False,Gas
416639,e84cfd4b5dfd576f4ba539b81c92c56b6584367a,2019-06-29 11:00:00 UTC,cool,hold,732,760,760,CT,North Grosvenordale,80,False,False,False,Gas
416640,e84cfd4b5dfd576f4ba539b81c92c56b6584367a,2019-06-29 11:05:00 UTC,cool,hold,732,760,760,CT,North Grosvenordale,80,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/CT/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/CT-day/2020-jun-day-CT.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0090bb6fc40c84b7f42c125689e22350e6128e3e,2020-06-03 17:25:00 UTC,cool,hold,732,740,731,CT,Monroe,0,False,False,False,Gas
1,e9870d26f289a344a43ab52d729cb869f3baaa62,2020-06-02 11:10:00 UTC,auto,auto,685,735,685,CT,Broad Brook,60,False,False,False,Gas
2,b9eb493652b04bdbd9a25c27c17a34f3e00646cd,2020-06-30 19:55:00 UTC,cool,auto,732,730,664,CT,Orange,55,False,False,False,Gas
3,0090bb6fc40c84b7f42c125689e22350e6128e3e,2020-06-24 15:45:00 UTC,cool,hold,749,750,731,CT,Monroe,0,False,False,False,Gas
4,edd88b332aea2fee543048ec1b27d65c159e4bab,2020-06-01 18:55:00 UTC,heat,hold,710,715,715,CT,Newtown,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
478213,7d4a998520c8f7836cbde9e5d1321bbe2f945076,2020-06-06 17:25:00 UTC,cool,hold,762,760,760,CT,West Hartford,75,True,False,False,Gas
478214,7d4a998520c8f7836cbde9e5d1321bbe2f945076,2020-06-06 11:15:00 UTC,cool,auto,762,760,760,CT,West Hartford,75,True,False,False,Gas
478215,7d4a998520c8f7836cbde9e5d1321bbe2f945076,2020-06-06 16:20:00 UTC,cool,hold,758,760,760,CT,West Hartford,75,True,False,False,Gas
478216,7d4a998520c8f7836cbde9e5d1321bbe2f945076,2020-06-06 12:15:00 UTC,cool,auto,760,760,760,CT,West Hartford,75,True,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/CT/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/CT-day/2021-jun-day-CT.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,71b7a96cb3b5e2af708eb1e817f3e24a0023cb51,2021-06-25 11:35:00 UTC,heat,hold,721,723,723,CT,Farmington,20,True,False,False,Gas
1,35b533b57d9a9b1f34214bef2b0e4373ebce9450,2021-06-23 18:35:00 UTC,cool,hold,718,715,665,CT,Windsor,10,False,False,False,Gas
2,bd36ac1884d1b47040031d82aecd4533604135d8,2021-06-22 17:20:00 UTC,heat,hold,674,650,642,CT,Tolland,29,False,False,False,Gas
3,0fa0b4c084347c87c094d0ff966f45f13fa5b251,2021-06-18 19:40:00 UTC,cool,hold,768,762,756,CT,Hartford,90,False,False,False,Gas
4,35b533b57d9a9b1f34214bef2b0e4373ebce9450,2021-06-12 17:55:00 UTC,cool,hold,760,715,665,CT,Windsor,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
309764,966ada58bdafef93eef95ff34ccb024ef8052f62,2021-06-04 18:05:00 UTC,cool,hold,729,760,760,CT,wethersfield,100,False,False,True,Electric
309765,966ada58bdafef93eef95ff34ccb024ef8052f62,2021-06-05 18:50:00 UTC,cool,hold,729,760,760,CT,wethersfield,100,False,False,True,Electric
309766,966ada58bdafef93eef95ff34ccb024ef8052f62,2021-06-05 18:30:00 UTC,cool,hold,726,760,760,CT,wethersfield,100,False,False,True,Electric
309767,966ada58bdafef93eef95ff34ccb024ef8052f62,2021-06-05 19:30:00 UTC,cool,hold,735,760,760,CT,wethersfield,100,False,False,True,Electric


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/CT/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/CT/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
CT_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/CT/jun/" + file)
    CT_jun = pd.concat([CT_jun, df])
    
CT_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,007f9b37f1ac5cfe9806f9a1a47809855e7e31b8,jun,2017,cool,hold,South Windsor,717.464162,732.698266,660.000000,20.0,False,False,False
1,0094e2c467552f4b15765889040931605fb22890,jun,2017,cool,auto,Norwalk,739.454545,709.272727,692.727273,27.0,False,False,False
2,0094e2c467552f4b15765889040931605fb22890,jun,2017,cool,hold,Norwalk,765.000000,699.000000,700.000000,27.0,False,False,False
3,0382f14f425de2d9f9645def17ee3cfeada55f78,jun,2017,cool,auto,Waterford,699.173669,700.000000,686.504202,25.0,False,False,True
4,0382f14f425de2d9f9645def17ee3cfeada55f78,jun,2017,cool,hold,Waterford,708.696148,708.810271,708.770328,25.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
446,fc58d08fd7038c94b9cfcb3287a2bbb6e095745f,jun,2021,cool,hold,Shelton,711.339921,710.000000,710.000000,25.0,False,False,False
447,fc92f3d7ab046b1e0482835f5cf10e18372dc434,jun,2021,cool,hold,Fairfield,694.968354,687.664557,687.145570,40.0,True,False,True
448,fef94e274991646e0f28d91d1aaea3cd709d8825,jun,2021,cool,hold,New Canaan,650.846603,647.909746,647.802334,0.0,False,False,False
449,ff498c11fdbb5d698084e0f03a29b69e76ee8566,jun,2021,cool,hold,North Haven,715.425000,722.812500,722.812500,70.0,False,False,False


In [100]:
CT_jun.to_csv("Scraper_Output/State_Month_Day/CT/CT_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/CT-day/2017-jul-day-CT.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e32eeab5de758aca829b12a6c53604bdbeba9734,2017-07-01 15:55:00 UTC,cool,hold,751,747,747,CT,Litchfield,60,True,False,True,Electric
1,bb815c20941dfc2c09da049e6d22845484811dd3,2017-07-16 16:45:00 UTC,cool,auto,724,748,667,CT,Easton,66,False,False,True,Electric
2,7570fc772e83a3fe72dd8ba13bcc3e56410e40fb,2017-07-29 15:00:00 UTC,cool,auto,703,725,638,CT,Norwich,5,False,False,False,Gas
3,582e46f241d8ee21335889156509339ff2e12628,2017-07-28 15:35:00 UTC,cool,auto,733,772,708,CT,Killingworth,15,False,False,False,Gas
4,565c7e8b066f34db999e142b69b61c1a6c189829,2017-07-16 12:45:00 UTC,cool,auto,717,780,772,CT,Killingworth,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226220,7f429ed3e777b71b73c74afb5d188105933ebab4,2017-07-23 15:15:00 UTC,cool,hold,760,760,760,CT,East Hampton,0,False,False,False,Gas
226221,966ada58bdafef93eef95ff34ccb024ef8052f62,2017-07-05 17:35:00 UTC,cool,hold,743,760,760,CT,wethersfield,100,False,False,True,Electric
226222,966ada58bdafef93eef95ff34ccb024ef8052f62,2017-07-12 16:40:00 UTC,cool,hold,754,760,760,CT,wethersfield,100,False,False,True,Electric
226223,966ada58bdafef93eef95ff34ccb024ef8052f62,2017-07-23 19:00:00 UTC,cool,hold,743,760,760,CT,wethersfield,100,False,False,True,Electric


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/CT/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/CT-day/2018-jul-day-CT.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4c5caa1e348acf2719f577d8a4727442ca33b60d,2018-07-02 11:40:00 UTC,cool,hold,758,755,755,CT,Barkhamsted,30,False,False,False,Gas
2,528482e17661477da3fa31d0b3d158c6fb72e9aa,2018-07-20 17:10:00 UTC,cool,hold,713,712,712,CT,Woodstock,10,False,False,False,Gas
3,35b533b57d9a9b1f34214bef2b0e4373ebce9450,2018-07-27 16:20:00 UTC,cool,auto,774,770,755,CT,Windsor,10,False,False,False,Gas
4,7e57d8be21393036f10df57e000165b31fed3576,2018-07-05 10:15:00 UTC,cool,hold,824,830,810,CT,West Haven,20,True,False,True,Electric
5,528482e17661477da3fa31d0b3d158c6fb72e9aa,2018-07-06 14:20:00 UTC,cool,hold,725,762,762,CT,Woodstock,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427059,966ada58bdafef93eef95ff34ccb024ef8052f62,2018-07-21 19:10:00 UTC,cool,hold,746,760,760,CT,wethersfield,100,False,False,True,Electric
427060,6c05c45d90fd4a9c36fbe9a37f7d0f7aacc2d6eb,2018-07-25 19:20:00 UTC,cool,hold,753,760,760,CT,West Hartford,68,False,False,False,Gas
427061,6c05c45d90fd4a9c36fbe9a37f7d0f7aacc2d6eb,2018-07-05 17:35:00 UTC,cool,auto,760,770,760,CT,West Hartford,68,False,False,False,Gas
427062,6c05c45d90fd4a9c36fbe9a37f7d0f7aacc2d6eb,2018-07-25 19:35:00 UTC,cool,hold,757,760,760,CT,West Hartford,68,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/CT/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/CT-day/2019-jul-day-CT.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7d20b90cb0a2faa613c7635d9bb1b6e86d22cc13,2019-07-06 18:00:00 UTC,auto,hold,762,764,713,CT,Monroe,10,False,False,False,Gas
1,0382f14f425de2d9f9645def17ee3cfeada55f78,2019-07-31 17:30:00 UTC,cool,hold,720,715,715,CT,Waterford,25,False,False,True,Electric
2,0382f14f425de2d9f9645def17ee3cfeada55f78,2019-07-07 15:35:00 UTC,cool,hold,705,705,705,CT,Waterford,25,False,False,True,Electric
3,0382f14f425de2d9f9645def17ee3cfeada55f78,2019-07-11 13:25:00 UTC,cool,hold,705,715,715,CT,Waterford,25,False,False,True,Electric
4,0382f14f425de2d9f9645def17ee3cfeada55f78,2019-07-21 13:35:00 UTC,cool,hold,725,725,725,CT,Waterford,25,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
500663,7d4a998520c8f7836cbde9e5d1321bbe2f945076,2019-07-21 17:30:00 UTC,cool,hold,760,760,760,CT,West Hartford,75,True,False,False,Gas
500664,7d4a998520c8f7836cbde9e5d1321bbe2f945076,2019-07-21 18:30:00 UTC,cool,hold,763,760,760,CT,West Hartford,75,True,False,False,Gas
500665,7d4a998520c8f7836cbde9e5d1321bbe2f945076,2019-07-04 17:05:00 UTC,cool,hold,760,760,760,CT,West Hartford,75,True,False,False,Gas
500666,7d4a998520c8f7836cbde9e5d1321bbe2f945076,2019-07-14 19:55:00 UTC,cool,auto,764,760,760,CT,West Hartford,75,True,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/CT/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/CT-day/2020-jul-day-CT.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b3c0ff17ca5abe920bd4b20969fcf37806cd940d,2020-07-11 18:55:00 UTC,cool,hold,735,715,715,CT,Ellington,20,False,False,False,Gas
1,c147c2e1cbe076bc976db8e788fa285e351f07db,2020-07-08 15:25:00 UTC,cool,auto,749,744,732,CT,Durham,20,False,False,False,Gas
2,81b9f64b6f86312ce534fc4a17536a498b3d48a6,2020-07-01 17:50:00 UTC,cool,auto,737,727,713,CT,East Hartford,50,False,False,False,Gas
3,97b4e7f717297d6303e705e50cd9e5b83035da86,2020-07-27 16:10:00 UTC,cool,auto,741,760,731,CT,Monroe,39,True,False,False,Gas
5,1d643e73a2c013cb3b79b099d91d3e32d25ebf7f,2020-07-25 17:20:00 UTC,cool,hold,776,731,731,CT,Stonington,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
527109,af6867edfa2849e1718a669e29fb2ae5f04852fd,2020-07-16 17:10:00 UTC,cool,auto,715,760,760,CT,West Hartford,56,False,False,False,Gas
527110,467bba8253f6e67dc1327f0ac0d681f5c7115c83,2020-07-16 19:45:00 UTC,cool,auto,748,760,760,CT,West Hartford,56,False,False,False,Gas
527111,af6867edfa2849e1718a669e29fb2ae5f04852fd,2020-07-11 19:45:00 UTC,cool,auto,754,760,760,CT,West Hartford,56,False,False,False,Gas
527112,af6867edfa2849e1718a669e29fb2ae5f04852fd,2020-07-16 15:45:00 UTC,cool,auto,706,760,760,CT,West Hartford,56,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/CT/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/CT-day/2021-jul-day-CT.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,215c2dfd21213f343402901d6a5bee6dae1c5718,2021-07-17 18:40:00 UTC,auto,hold,777,770,675,CT,Orange,0,True,False,False,Gas
1,e9ecbea95473c9c90df38e5c762652f6d9ca5c4c,2021-07-13 17:00:00 UTC,cool,hold,710,705,705,CT,Easton,60,False,False,True,Electric
2,215c2dfd21213f343402901d6a5bee6dae1c5718,2021-07-17 19:20:00 UTC,auto,hold,783,770,675,CT,Orange,0,True,False,False,Gas
3,71b7a96cb3b5e2af708eb1e817f3e24a0023cb51,2021-07-21 11:30:00 UTC,cool,hold,715,731,731,CT,Farmington,20,True,False,False,Gas
4,215c2dfd21213f343402901d6a5bee6dae1c5718,2021-07-19 13:00:00 UTC,auto,hold,752,770,675,CT,Orange,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
323907,966ada58bdafef93eef95ff34ccb024ef8052f62,2021-07-02 17:20:00 UTC,cool,hold,742,760,760,CT,wethersfield,100,False,False,True,Electric
323908,966ada58bdafef93eef95ff34ccb024ef8052f62,2021-07-07 19:50:00 UTC,cool,hold,745,760,760,CT,wethersfield,100,False,False,True,Electric
323909,966ada58bdafef93eef95ff34ccb024ef8052f62,2021-07-02 18:20:00 UTC,cool,hold,740,760,760,CT,wethersfield,100,False,False,True,Electric
323910,966ada58bdafef93eef95ff34ccb024ef8052f62,2021-07-16 18:10:00 UTC,cool,hold,762,760,760,CT,wethersfield,100,False,False,True,Electric


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/CT/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/CT/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
CT_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/CT/jul/" + file)
    CT_jul = pd.concat([CT_jul, df])
    
CT_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,007f9b37f1ac5cfe9806f9a1a47809855e7e31b8,jul,2017,cool,hold,South Windsor,721.520971,727.560706,660.000000,20.0,False,False,False
1,0094e2c467552f4b15765889040931605fb22890,jul,2017,auto,auto,Norwalk,721.624161,763.489933,642.751678,27.0,False,False,False
2,0094e2c467552f4b15765889040931605fb22890,jul,2017,cool,auto,Norwalk,720.050556,740.758342,651.882710,27.0,False,False,False
3,0094e2c467552f4b15765889040931605fb22890,jul,2017,cool,hold,Norwalk,728.125000,719.375000,713.875000,27.0,False,False,False
4,011dae903f5512427598b14bbb3f02baa8813745,jul,2017,auto,auto,Shelton,765.666667,732.000000,650.000000,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
350,fc92f3d7ab046b1e0482835f5cf10e18372dc434,jul,2021,cool,hold,Fairfield,700.558824,690.397059,689.794118,40.0,True,False,True
351,fdce2932511e0bf91c9516b4f56bba7ca424f147,jul,2021,auto,hold,Shelton,745.000000,716.666667,663.333333,5.0,False,False,False
352,fdda1e11626a48045c202219e652ae8b195c85b1,jul,2021,cool,hold,Sandy Hook,722.970588,721.176471,721.176471,20.0,False,False,False
353,fef94e274991646e0f28d91d1aaea3cd709d8825,jul,2021,cool,hold,New Canaan,658.626189,655.972733,655.955295,0.0,False,False,False


In [133]:
CT_jul.to_csv("Scraper_Output/State_Month_Day/CT/CT_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/CT-day/2017-aug-day-CT.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,476a76536d2ba66f48e71fa98aeb0ede4d434c87,2017-08-27 10:30:00 UTC,auto,hold,713,724,674,CT,Woodbury,30,False,False,False,Gas
1,b4061b7ca02df9d17e37c1919645ce998c0b568d,2017-08-27 17:00:00 UTC,auto,hold,712,734,684,CT,Woodbury,0,False,False,False,Gas
2,b4061b7ca02df9d17e37c1919645ce998c0b568d,2017-08-29 15:25:00 UTC,auto,hold,705,734,684,CT,Woodbury,0,False,False,False,Gas
3,476a76536d2ba66f48e71fa98aeb0ede4d434c87,2017-08-28 12:30:00 UTC,auto,hold,718,734,674,CT,Woodbury,30,False,False,False,Gas
4,564c6aec11077a013c08daef6f741cea54a99227,2017-08-14 11:10:00 UTC,cool,auto,680,704,746,CT,Southbury,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
215915,966ada58bdafef93eef95ff34ccb024ef8052f62,2017-08-02 19:35:00 UTC,cool,hold,747,760,760,CT,wethersfield,100,False,False,True,Electric
215916,966ada58bdafef93eef95ff34ccb024ef8052f62,2017-08-23 19:45:00 UTC,cool,hold,744,760,760,CT,wethersfield,100,False,False,True,Electric
215917,966ada58bdafef93eef95ff34ccb024ef8052f62,2017-08-09 17:00:00 UTC,cool,hold,745,760,760,CT,wethersfield,100,False,False,True,Electric
215918,966ada58bdafef93eef95ff34ccb024ef8052f62,2017-08-09 16:10:00 UTC,cool,hold,739,760,760,CT,wethersfield,100,False,False,True,Electric


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/CT/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/CT-day/2018-aug-day-CT.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e9870d26f289a344a43ab52d729cb869f3baaa62,2018-08-28 18:30:00 UTC,auto,auto,726,725,675,CT,Broad Brook,60,False,False,False,Gas
1,476a76536d2ba66f48e71fa98aeb0ede4d434c87,2018-08-15 12:40:00 UTC,auto,hold,749,755,685,CT,Woodbury,30,False,False,False,Gas
2,7e57d8be21393036f10df57e000165b31fed3576,2018-08-26 07:10:00 UTC,cool,hold,828,840,810,CT,West Haven,20,True,False,True,Electric
3,528482e17661477da3fa31d0b3d158c6fb72e9aa,2018-08-28 18:45:00 UTC,cool,hold,728,752,752,CT,Woodstock,10,False,False,False,Gas
4,e9870d26f289a344a43ab52d729cb869f3baaa62,2018-08-21 12:35:00 UTC,auto,auto,711,725,675,CT,Broad Brook,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
417989,7d4a998520c8f7836cbde9e5d1321bbe2f945076,2018-08-16 10:30:00 UTC,cool,hold,761,760,760,CT,West Hartford,75,True,False,False,Gas
417990,7d4a998520c8f7836cbde9e5d1321bbe2f945076,2018-08-17 10:00:00 UTC,cool,auto,759,760,760,CT,West Hartford,75,True,False,False,Gas
417991,7d4a998520c8f7836cbde9e5d1321bbe2f945076,2018-08-17 18:55:00 UTC,cool,auto,763,760,760,CT,West Hartford,75,True,False,False,Gas
417992,7d4a998520c8f7836cbde9e5d1321bbe2f945076,2018-08-20 12:15:00 UTC,cool,auto,750,760,760,CT,West Hartford,75,True,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/CT/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/CT-day/2019-aug-day-CT.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0382f14f425de2d9f9645def17ee3cfeada55f78,2019-08-25 13:25:00 UTC,cool,hold,724,715,715,CT,Waterford,25,False,False,True,Electric
1,0382f14f425de2d9f9645def17ee3cfeada55f78,2019-08-30 18:10:00 UTC,cool,hold,702,685,685,CT,Waterford,25,False,False,True,Electric
2,b9eb493652b04bdbd9a25c27c17a34f3e00646cd,2019-08-26 19:40:00 UTC,auto,auto,722,737,687,CT,Orange,55,False,False,False,Gas
3,d4db52a911b76755478ebb3cd77a5cfea9e3007b,2019-08-19 12:45:00 UTC,cool,hold,710,712,712,CT,Bloomfield,30,True,False,True,Electric
4,249b0f3298602e0d392735958afe0b536033998b,2019-08-11 15:15:00 UTC,cool,hold,756,780,778,CT,Cromwell,35,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455061,889dcbf43cea92e9262ed9971030d156a71de5f5,2019-08-10 14:35:00 UTC,cool,hold,711,700,700,CT,Mystic,49,False,False,False,Gas
455062,6530ef0335c4d41524283ecfa5d1fc0e1fd25f55,2019-08-05 12:40:00 UTC,auto,hold,805,800,700,CT,Killingworth,15,False,False,False,Gas
455063,b92711525879af78592f41c536de1ace727e4dc2,2019-08-23 10:00:00 UTC,cool,auto,703,700,700,CT,Ellington,15,False,False,False,Gas
455064,528482e17661477da3fa31d0b3d158c6fb72e9aa,2019-08-04 13:55:00 UTC,cool,hold,703,700,700,CT,Woodstock,10,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/CT/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/CT-day/2020-aug-day-CT.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cdc4e0fa4b56e51585b5a93bae44b51150ca4860,2020-08-10 14:55:00 UTC,auto,hold,689,685,635,CT,Windsor Locks,30,False,False,False,Gas
1,1c755336958b8023df4fceca6edfabb466deaef5,2020-08-01 15:10:00 UTC,cool,hold,628,632,632,CT,Mansfield Center,0,True,False,True,Electric
2,cdc4e0fa4b56e51585b5a93bae44b51150ca4860,2020-08-27 13:45:00 UTC,auto,hold,687,685,635,CT,Windsor Locks,30,False,False,False,Gas
3,cdc4e0fa4b56e51585b5a93bae44b51150ca4860,2020-08-15 10:25:00 UTC,auto,hold,689,685,635,CT,Windsor Locks,30,False,False,False,Gas
4,0090bb6fc40c84b7f42c125689e22350e6128e3e,2020-08-28 19:05:00 UTC,cool,hold,746,740,731,CT,Monroe,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
462249,966ada58bdafef93eef95ff34ccb024ef8052f62,2020-08-22 16:45:00 UTC,cool,hold,737,760,760,CT,wethersfield,100,False,False,True,Electric
462250,af6867edfa2849e1718a669e29fb2ae5f04852fd,2020-08-25 17:15:00 UTC,cool,auto,734,760,760,CT,West Hartford,56,False,False,False,Gas
462251,af6867edfa2849e1718a669e29fb2ae5f04852fd,2020-08-25 17:35:00 UTC,cool,auto,738,760,760,CT,West Hartford,56,False,False,False,Gas
462252,af6867edfa2849e1718a669e29fb2ae5f04852fd,2020-08-25 17:25:00 UTC,cool,auto,736,760,760,CT,West Hartford,56,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/CT/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/CT/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
CT_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/CT/aug/" + file)
    CT_aug = pd.concat([CT_aug, df])
    
CT_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,007f9b37f1ac5cfe9806f9a1a47809855e7e31b8,aug,2017,cool,hold,South Windsor,720.473373,720.727811,660.000000,20.0,False,False,False
1,0094e2c467552f4b15765889040931605fb22890,aug,2017,cool,auto,Norwalk,720.490196,718.400000,650.200000,27.0,False,False,False
2,0094e2c467552f4b15765889040931605fb22890,aug,2017,cool,hold,Norwalk,721.850000,710.000000,710.000000,27.0,False,False,False
3,011dae903f5512427598b14bbb3f02baa8813745,aug,2017,auto,auto,Shelton,784.868922,780.002114,620.002114,5.0,False,False,False
4,01440e0a70900810dceaace98a7965a75bc818dc,aug,2017,cool,auto,Guilford,727.825439,740.000000,730.000000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
613,fe0e87faf88e04d7b4bbaaa926867d237a8c0b80,aug,2020,cool,hold,Westport,742.250000,725.000000,725.000000,40.0,False,False,False
614,fef94e274991646e0f28d91d1aaea3cd709d8825,aug,2020,cool,hold,New Canaan,676.085106,673.591380,619.991271,0.0,False,False,False
615,fefe9a1e7cdca41851d594cf74b2d28a98aca1de,aug,2020,cool,hold,Plantsville,720.607143,705.511905,704.988095,10.0,False,False,False
616,ff498c11fdbb5d698084e0f03a29b69e76ee8566,aug,2020,cool,auto,North Haven,749.138889,751.601852,751.601852,70.0,False,False,False


In [160]:
CT_aug.to_csv("Scraper_Output/State_Month_Day/CT/CT_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/CT-day/2017-dec-day-CT.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,476a76536d2ba66f48e71fa98aeb0ede4d434c87,2017-12-27 10:35:00 UTC,heat,hold,699,705,705,CT,Woodbury,30,False,False,False,Gas
1,1e6ca6fd677aff1dd39019c596be93cefd8a2ac7,2017-12-14 16:05:00 UTC,heat,hold,660,665,665,CT,Ansonia,25,False,False,False,Gas
3,476a76536d2ba66f48e71fa98aeb0ede4d434c87,2017-12-04 13:25:00 UTC,auto,hold,704,755,705,CT,Woodbury,30,False,False,False,Gas
4,cd8d4e49e9275cb2b70333d2e4f6feab86b3c681,2017-12-19 13:30:00 UTC,heat,auto,674,820,624,CT,killingworth,20,False,False,False,Gas
6,476a76536d2ba66f48e71fa98aeb0ede4d434c87,2017-12-14 10:55:00 UTC,auto,hold,700,755,705,CT,Woodbury,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355910,628c5a78fbbbace955cfd5e8c0e384c28e9c0fd7,2017-12-31 11:30:00 UTC,heat,auto,753,760,760,CT,Norwalk,5,False,False,False,Gas
355911,628c5a78fbbbace955cfd5e8c0e384c28e9c0fd7,2017-12-31 12:40:00 UTC,heat,auto,759,760,760,CT,Norwalk,5,False,False,False,Gas
355912,628c5a78fbbbace955cfd5e8c0e384c28e9c0fd7,2017-12-30 11:35:00 UTC,heat,auto,759,760,760,CT,Norwalk,5,False,False,False,Gas
355913,628c5a78fbbbace955cfd5e8c0e384c28e9c0fd7,2017-12-31 12:15:00 UTC,heat,auto,760,760,760,CT,Norwalk,5,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/CT/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/CT-day/2018-dec-day-CT.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e9870d26f289a344a43ab52d729cb869f3baaa62,2018-12-31 14:45:00 UTC,auto,auto,672,725,675,CT,Broad Brook,60,False,False,False,Gas
1,e9870d26f289a344a43ab52d729cb869f3baaa62,2018-12-26 16:50:00 UTC,auto,auto,675,725,675,CT,Broad Brook,60,False,False,False,Gas
2,528482e17661477da3fa31d0b3d158c6fb72e9aa,2018-12-08 16:35:00 UTC,heat,hold,657,662,662,CT,Woodstock,10,False,False,False,Gas
3,e9870d26f289a344a43ab52d729cb869f3baaa62,2018-12-21 17:05:00 UTC,auto,auto,685,725,675,CT,Broad Brook,60,False,False,False,Gas
4,476a76536d2ba66f48e71fa98aeb0ede4d434c87,2018-12-27 12:25:00 UTC,auto,hold,700,775,705,CT,Woodbury,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
486076,a6a33cee140a4ee6cc5c9b875fe15765800fea90,2018-12-27 18:50:00 UTC,auto,hold,749,820,760,CT,Ridgefield,50,False,False,False,Gas
486077,a6a33cee140a4ee6cc5c9b875fe15765800fea90,2018-12-30 17:40:00 UTC,auto,hold,748,820,760,CT,Ridgefield,50,False,False,False,Gas
486078,a6a33cee140a4ee6cc5c9b875fe15765800fea90,2018-12-30 19:10:00 UTC,auto,hold,750,820,760,CT,Ridgefield,50,False,False,False,Gas
486079,a6a33cee140a4ee6cc5c9b875fe15765800fea90,2018-12-27 19:50:00 UTC,auto,hold,762,820,760,CT,Ridgefield,50,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/CT/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/CT-day/2019-dec-day-CT.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d4db52a911b76755478ebb3cd77a5cfea9e3007b,2019-12-12 10:50:00 UTC,heat,hold,673,682,682,CT,Bloomfield,30,True,False,True,Electric
1,edd88b332aea2fee543048ec1b27d65c159e4bab,2019-12-12 14:55:00 UTC,heat,hold,703,722,686,CT,Newtown,20,False,False,False,Gas
4,d4db52a911b76755478ebb3cd77a5cfea9e3007b,2019-12-11 18:55:00 UTC,heat,hold,681,682,682,CT,Bloomfield,30,True,False,True,Electric
5,214f28562b2c6edb923553c2da33e34308417be2,2019-12-20 15:30:00 UTC,auto,hold,697,766,702,CT,Colchester,10,False,False,False,Gas
7,35b533b57d9a9b1f34214bef2b0e4373ebce9450,2019-12-13 11:50:00 UTC,heat,hold,627,725,725,CT,Windsor,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
535491,2cc84ecbecd2da4c226c1569268fb8196a51343a,2019-12-19 18:20:00 UTC,heat,hold,757,760,760,CT,Norwalk,80,False,False,False,Gas
535492,2cc84ecbecd2da4c226c1569268fb8196a51343a,2019-12-19 16:00:00 UTC,heat,hold,761,760,760,CT,Norwalk,80,False,False,False,Gas
535493,d844acbe1727e39f17d98fc6505c5afb592cb458,2019-12-23 13:55:00 UTC,heat,hold,695,760,760,CT,Torrington,15,False,False,False,Gas
535494,d844acbe1727e39f17d98fc6505c5afb592cb458,2019-12-23 14:05:00 UTC,heat,hold,694,760,760,CT,Torrington,15,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/CT/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/CT-day/2020-dec-day-CT.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dce9e39382dc10df015dc1225d61261879f8b3c6,2020-12-26 17:30:00 UTC,heat,hold,685,761,657,CT,Prospect,65,False,False,False,Gas
1,f4091c17f6d7dfa998f53a3e0159dbb214355c7f,2020-12-03 17:30:00 UTC,heat,hold,700,691,691,CT,Enfield,60,False,False,False,Gas
2,564c6aec11077a013c08daef6f741cea54a99227,2020-12-20 12:20:00 UTC,heat,hold,704,703,703,CT,Southbury,40,False,False,False,Gas
3,ced1ff247136fb45b6f827deb5cb0c894d162471,2020-12-07 17:00:00 UTC,heat,hold,689,685,685,CT,Prospect,65,False,False,False,Gas
4,dd42deba452d2cea8e2ddb632972e864194a15b9,2020-12-08 15:35:00 UTC,heat,auto,648,650,644,CT,Madison,50,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
507855,880c33e614e002f07f231be13c494d208eccd723,2020-12-04 14:35:00 UTC,heat,auto,700,760,760,CT,Hamden,50,True,False,False,Gas
507856,5b1787be6399cd216b5d7b415423412db93e0395,2020-12-02 13:05:00 UTC,heat,hold,698,760,760,CT,Fairfield,90,False,False,False,Gas
507857,5b1787be6399cd216b5d7b415423412db93e0395,2020-12-02 14:45:00 UTC,heat,hold,711,760,760,CT,Fairfield,90,False,False,False,Gas
507858,5b1787be6399cd216b5d7b415423412db93e0395,2020-12-02 13:40:00 UTC,heat,hold,706,760,760,CT,Fairfield,90,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/CT/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/CT/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
CT_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/CT/dec/" + file)
    CT_dec = pd.concat([CT_dec, df])
    
CT_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,007f9b37f1ac5cfe9806f9a1a47809855e7e31b8,dec,2017,heat,hold,South Windsor,648.299346,650.928105,649.856863,20.0,False,False,False
1,011dae903f5512427598b14bbb3f02baa8813745,dec,2017,auto,auto,Shelton,637.965116,779.924419,620.148256,5.0,False,False,False
2,011dae903f5512427598b14bbb3f02baa8813745,dec,2017,auto,hold,Shelton,660.357143,760.000000,660.000000,5.0,False,False,False
3,01440e0a70900810dceaace98a7965a75bc818dc,dec,2017,heat,auto,Guilford,651.019200,702.430400,648.147520,0.0,False,False,False
4,01440e0a70900810dceaace98a7965a75bc818dc,dec,2017,heat,hold,Guilford,672.093750,698.343750,697.718750,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
696,fdce2932511e0bf91c9516b4f56bba7ca424f147,dec,2020,auto,auto,Shelton,669.600000,760.000000,640.000000,5.0,False,False,False
697,fdce2932511e0bf91c9516b4f56bba7ca424f147,dec,2020,auto,hold,Shelton,668.000000,750.000000,678.500000,5.0,False,False,False
698,fef94e274991646e0f28d91d1aaea3cd709d8825,dec,2020,heat,hold,New Canaan,625.842541,650.000000,616.223757,0.0,False,False,False
699,ff498c11fdbb5d698084e0f03a29b69e76ee8566,dec,2020,heat,auto,North Haven,692.695067,693.360987,693.360987,70.0,False,False,False


In [187]:
CT_dec.to_csv("Scraper_Output/State_Month_Day/CT/CT_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/CT/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
CT_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/CT/" + file)
    CT_all = pd.concat([CT_all, df])
    
CT_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,007f9b37f1ac5cfe9806f9a1a47809855e7e31b8,aug,2017,cool,hold,South Windsor,720.473373,720.727811,660.000000,20.0,False,False,False
1,0094e2c467552f4b15765889040931605fb22890,aug,2017,cool,auto,Norwalk,720.490196,718.400000,650.200000,27.0,False,False,False
2,0094e2c467552f4b15765889040931605fb22890,aug,2017,cool,hold,Norwalk,721.850000,710.000000,710.000000,27.0,False,False,False
3,011dae903f5512427598b14bbb3f02baa8813745,aug,2017,auto,auto,Shelton,784.868922,780.002114,620.002114,5.0,False,False,False
4,01440e0a70900810dceaace98a7965a75bc818dc,aug,2017,cool,auto,Guilford,727.825439,740.000000,730.000000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2738,fc58d08fd7038c94b9cfcb3287a2bbb6e095745f,jun,2021,cool,hold,Shelton,711.339921,710.000000,710.000000,25.0,False,False,False
2739,fc92f3d7ab046b1e0482835f5cf10e18372dc434,jun,2021,cool,hold,Fairfield,694.968354,687.664557,687.145570,40.0,True,False,True
2740,fef94e274991646e0f28d91d1aaea3cd709d8825,jun,2021,cool,hold,New Canaan,650.846603,647.909746,647.802334,0.0,False,False,False
2741,ff498c11fdbb5d698084e0f03a29b69e76ee8566,jun,2021,cool,hold,North Haven,715.425000,722.812500,722.812500,70.0,False,False,False


In [190]:
CT_all.to_csv("Scraper_Output/State_Month_Day/CT_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mCTe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['CT']
Unique jan_2018: ['CT']
Unique jan_2019: ['CT']
Unique jan_2020: ['CT']
Unique jan_2021: ['CT']
Unique feb_2017: ['CT']
Unique feb_2018: ['CT']
Unique feb_2019: ['CT']
Unique feb_2020: ['CT']
Unique feb_2021: ['CT']
Unique jun_2017: ['CT']
Unique jun_2018: ['CT']
Unique jun_2019: ['CT']
Unique jun_2020: ['CT']
Unique jun_2021: ['CT']
Unique jul_2017: ['CT']
Unique jul_2018: ['CT']
Unique jul_2019: ['CT']
Unique jul_2020: ['CT']
Unique jul_2021: ['CT']
Unique aug_2017: ['CT']
Unique aug_2018: ['CT']
Unique aug_2019: ['CT']
Unique aug_2020: ['CT']
Unique dec_2017: ['CT']
Unique dec_2018: ['CT']
Unique dec_2019: ['CT']
Unique dec_2020: ['CT']
