# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/DE-day/2017-jan-day-DE.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6c0521ad14d0a889e4eda839e4b802054c4f5831,2017-01-05 16:20:00 UTC,heat,auto,661,653,653,DE,Newark,15,False,False,False,Gas
2,6c0521ad14d0a889e4eda839e4b802054c4f5831,2017-01-03 19:30:00 UTC,heat,auto,666,653,653,DE,Newark,15,False,False,False,Gas
4,e33f9f93ce853244445f8a18ad1de745868d5126,2017-01-14 18:55:00 UTC,heat,hold,661,665,665,DE,Hockessin,25,False,False,False,Gas
6,6c0521ad14d0a889e4eda839e4b802054c4f5831,2017-01-03 14:05:00 UTC,heat,auto,654,653,653,DE,Newark,15,False,False,False,Gas
7,6c0521ad14d0a889e4eda839e4b802054c4f5831,2017-01-14 19:00:00 UTC,heat,hold,651,671,671,DE,Newark,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43353,c9c0cbe0ff333388b6c45df3a06ec1d707f372aa,2017-01-13 16:55:00 UTC,heat,auto,722,750,750,DE,Townsend,10,False,False,False,Gas
43354,321ba24265dfd2afb6e168f39f5d428954b286c9,2017-01-10 18:40:00 UTC,heat,auto,745,750,750,DE,Townsend,5,False,False,False,Gas
43355,321ba24265dfd2afb6e168f39f5d428954b286c9,2017-01-10 18:45:00 UTC,heat,auto,756,750,750,DE,Townsend,5,False,False,False,Gas
43356,321ba24265dfd2afb6e168f39f5d428954b286c9,2017-01-10 19:15:00 UTC,heat,auto,748,750,750,DE,Townsend,5,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
21928216c72d02f4395d9585af619b190c11ed47,Jan,2017,auxHeatOnly,auto,Claymont,719.223399,711.618199,711.618199,115.0,False,False,True
21928216c72d02f4395d9585af619b190c11ed47,Jan,2017,auxHeatOnly,hold,Claymont,714.428571,707.804608,707.804608,115.0,False,False,True
21a8021fbd2bc2e837a130d42a96082c5b166d78,Jan,2017,heat,hold,Middletown,699.868852,735.393443,684.245902,30.0,False,False,False
2a56b6a9368fcce30b408077c4ac2287cc8ba96b,Jan,2017,heat,auto,Camden Wyoming,698.738235,699.814706,698.867647,10.0,False,False,False
2a56b6a9368fcce30b408077c4ac2287cc8ba96b,Jan,2017,heat,hold,Camden Wyoming,698.835616,708.452055,704.013699,10.0,False,False,False
321ba24265dfd2afb6e168f39f5d428954b286c9,Jan,2017,heat,auto,Townsend,708.089243,707.898438,707.898438,5.0,False,False,False
321ba24265dfd2afb6e168f39f5d428954b286c9,Jan,2017,heat,hold,Townsend,727.963235,729.988971,729.988971,5.0,False,False,False
35b56a84063caa816fc10756456f40b63b8ba7b6,Jan,2017,heat,auto,Middletown,663.133333,693.2,669.466667,16.0,False,False,False
37b36edf77b0ca8d1b343f751c62092cff706c5d,Jan,2017,heat,auto,Hockessin,657.956522,697.950311,660.68323,55.0,False,False,True
3eeccb5f11385e084be41f643b2dbdf9f75be0d1,Jan,2017,auto,auto,Long Neck,646.722222,780.0,650.0,15.0,True,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/DE/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/DE-day/2018-jan-day-DE.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,789215502796ec24f3fa4468f7ae8fa003aaf5bd,2018-01-14 16:20:00 UTC,auto,auto,714,765,715,DE,Middletown,25,False,False,False,Gas
1,789215502796ec24f3fa4468f7ae8fa003aaf5bd,2018-01-14 15:00:00 UTC,auto,auto,708,765,715,DE,Middletown,25,False,False,False,Gas
2,f98c03de48bff902800d8778506362f25763a3b0,2018-01-14 16:05:00 UTC,heat,auto,685,688,688,DE,Bethany Beach,48,False,False,False,Gas
3,8390aeb500634a103c845d1be7bbcec9c32524a8,2018-01-23 17:05:00 UTC,heat,auto,662,810,665,DE,Wilmington,65,False,False,False,Gas
4,cab48140b2481c3ec5f722d2b8eb5712fca9525f,2018-01-15 14:20:00 UTC,heat,hold,723,797,692,DE,Milford,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97036,f1c31e2900d9c0cadbba9552cffcdeca139f6d71,2018-01-05 13:50:00 UTC,auxHeatOnly,hold,651,750,750,DE,Claymont,0,False,False,True,Electric
97037,f1c31e2900d9c0cadbba9552cffcdeca139f6d71,2018-01-05 14:15:00 UTC,auxHeatOnly,hold,655,750,750,DE,Claymont,0,False,False,True,Electric
97038,f1c31e2900d9c0cadbba9552cffcdeca139f6d71,2018-01-05 15:50:00 UTC,auxHeatOnly,hold,681,750,750,DE,Claymont,0,False,False,True,Electric
97039,f1c31e2900d9c0cadbba9552cffcdeca139f6d71,2018-01-05 19:35:00 UTC,auxHeatOnly,hold,741,750,750,DE,Claymont,0,False,False,True,Electric


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/DE/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/DE-day/2019-jan-day-DE.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c06e749f8d636d36f2b8403770643682ba3a1dd7,2019-01-04 19:05:00 UTC,auto,hold,716,765,715,DE,Rehoboth Beach,10,False,False,False,Gas
1,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2019-01-27 18:10:00 UTC,heat,auto,731,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
2,c06e749f8d636d36f2b8403770643682ba3a1dd7,2019-01-04 14:20:00 UTC,auto,hold,711,765,715,DE,Rehoboth Beach,10,False,False,False,Gas
3,c06e749f8d636d36f2b8403770643682ba3a1dd7,2019-01-05 18:35:00 UTC,auto,hold,714,765,715,DE,Rehoboth Beach,10,False,False,False,Gas
4,6c0521ad14d0a889e4eda839e4b802054c4f5831,2019-01-28 14:50:00 UTC,heat,auto,690,689,689,DE,Newark,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132934,715ee5f833d0e6a31457bd5746c4dca0295d0e6d,2019-01-13 11:50:00 UTC,auto,auto,744,810,750,DE,Ocean View,5,False,False,False,Gas
132935,715ee5f833d0e6a31457bd5746c4dca0295d0e6d,2019-01-13 12:45:00 UTC,auto,auto,744,810,750,DE,Ocean View,5,False,False,False,Gas
132936,715ee5f833d0e6a31457bd5746c4dca0295d0e6d,2019-01-13 12:30:00 UTC,auto,auto,744,810,750,DE,Ocean View,5,False,False,False,Gas
132937,8e12babe8e6ece4bf6d2eaef55fb89d48b024bed,2019-01-01 18:35:00 UTC,auto,auto,722,770,750,DE,Seaford,5,True,False,True,Electric


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/DE/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/DE-day/2020-jan-day-DE.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2020-01-18 17:25:00 UTC,heat,auto,727,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
1,2a56b6a9368fcce30b408077c4ac2287cc8ba96b,2020-01-27 13:25:00 UTC,heat,auto,711,738,702,DE,Camden Wyoming,10,False,False,False,Gas
2,0ad826a253a907f5946f8402db4a56a04d9f0e69,2020-01-18 15:40:00 UTC,auto,auto,671,783,678,DE,Wilmington,48,False,False,False,Gas
3,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2020-01-18 14:10:00 UTC,heat,auto,723,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
4,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2020-01-18 19:10:00 UTC,heat,auto,730,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151688,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2020-01-31 19:15:00 UTC,heat,hold,752,760,760,DE,Wilmington,70,False,False,False,Gas
151689,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2020-01-11 14:00:00 UTC,heat,auto,754,720,760,DE,Wilmington,70,False,False,False,Gas
151690,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2020-01-10 17:55:00 UTC,heat,auto,755,720,760,DE,Wilmington,70,False,False,False,Gas
151691,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2020-01-30 15:55:00 UTC,heat,hold,758,760,760,DE,Wilmington,70,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/DE/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/DE-day/2021-jan-day-DE.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,eb43e2be5e191587815d3ec7c8f0fe6c8210dcf5,2021-01-23 09:55:00 UTC,heat,hold,673,700,675,DE,Dover,25,False,False,False,Gas
1,ab1ae2944a11c23b331a730b140a343e2172018c,2021-01-10 14:45:00 UTC,heat,hold,673,675,675,DE,Houston,20,False,False,True,Electric
2,eb43e2be5e191587815d3ec7c8f0fe6c8210dcf5,2021-01-12 19:30:00 UTC,heat,hold,670,700,675,DE,Dover,25,False,False,False,Gas
3,eb43e2be5e191587815d3ec7c8f0fe6c8210dcf5,2021-01-29 11:50:00 UTC,heat,hold,672,700,675,DE,Dover,25,False,False,False,Gas
4,ab1ae2944a11c23b331a730b140a343e2172018c,2021-01-23 17:25:00 UTC,heat,hold,674,675,675,DE,Houston,20,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85054,f1c31e2900d9c0cadbba9552cffcdeca139f6d71,2021-01-29 15:20:00 UTC,auxHeatOnly,hold,648,750,750,DE,Claymont,0,False,False,True,Electric
85055,f1c31e2900d9c0cadbba9552cffcdeca139f6d71,2021-01-29 14:15:00 UTC,auxHeatOnly,hold,644,750,750,DE,Claymont,0,False,False,True,Electric
85056,f1c31e2900d9c0cadbba9552cffcdeca139f6d71,2021-01-29 15:00:00 UTC,auxHeatOnly,hold,643,750,750,DE,Claymont,0,False,False,True,Electric
85057,f1c31e2900d9c0cadbba9552cffcdeca139f6d71,2021-01-29 14:30:00 UTC,auxHeatOnly,hold,645,750,750,DE,Claymont,0,False,False,True,Electric


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/DE/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/DE/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
DE_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/DE/jan/" + file)
    DE_jan = pd.concat([DE_jan, df])
    
DE_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,21928216c72d02f4395d9585af619b190c11ed47,Jan,2017,auxHeatOnly,auto,Claymont,719.223399,711.618199,711.618199,115.0,False,False,True
1,21928216c72d02f4395d9585af619b190c11ed47,Jan,2017,auxHeatOnly,hold,Claymont,714.428571,707.804608,707.804608,115.0,False,False,True
2,21a8021fbd2bc2e837a130d42a96082c5b166d78,Jan,2017,heat,hold,Middletown,699.868852,735.393443,684.245902,30.0,False,False,False
3,2a56b6a9368fcce30b408077c4ac2287cc8ba96b,Jan,2017,heat,auto,Camden Wyoming,698.738235,699.814706,698.867647,10.0,False,False,False
4,2a56b6a9368fcce30b408077c4ac2287cc8ba96b,Jan,2017,heat,hold,Camden Wyoming,698.835616,708.452055,704.013699,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,f1c31e2900d9c0cadbba9552cffcdeca139f6d71,Jan,2021,auxHeatOnly,hold,Claymont,679.007789,713.659565,678.625659,0.0,False,False,True
85,f1c31e2900d9c0cadbba9552cffcdeca139f6d71,Jan,2021,heat,hold,Claymont,649.333333,734.000000,726.000000,0.0,False,False,True
86,f4adc8f627afc3d4f0facd64851ab8bd720435b7,Jan,2021,heat,hold,Wilmington,688.054312,690.833730,690.823726,50.0,False,False,False
87,f8e940d02995305f21d82e5169cd1cd514fe7cfc,Jan,2021,auto,hold,Hockessin,737.218182,820.000000,740.000000,7.0,False,False,False


In [34]:
DE_jan.to_csv("Scraper_Output/State_Month_Day/DE/DE_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/DE-day/2017-feb-day-DE.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,321ba24265dfd2afb6e168f39f5d428954b286c9,2017-02-11 12:50:00 UTC,heat,auto,685,690,690,DE,Townsend,5,False,False,False,Gas
1,321ba24265dfd2afb6e168f39f5d428954b286c9,2017-02-21 17:40:00 UTC,heat,auto,720,690,690,DE,Townsend,5,False,False,False,Gas
2,321ba24265dfd2afb6e168f39f5d428954b286c9,2017-02-08 19:30:00 UTC,heat,auto,714,690,690,DE,Townsend,5,False,False,False,Gas
3,321ba24265dfd2afb6e168f39f5d428954b286c9,2017-02-08 15:05:00 UTC,heat,auto,707,690,690,DE,Townsend,5,False,False,False,Gas
4,321ba24265dfd2afb6e168f39f5d428954b286c9,2017-02-11 19:50:00 UTC,heat,auto,689,690,690,DE,Townsend,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
36405,21928216c72d02f4395d9585af619b190c11ed47,2017-02-22 17:30:00 UTC,auxHeatOnly,auto,750,760,760,DE,Claymont,115,False,False,True,Electric
36406,21928216c72d02f4395d9585af619b190c11ed47,2017-02-23 13:05:00 UTC,auxHeatOnly,auto,756,760,760,DE,Claymont,115,False,False,True,Electric
36407,21928216c72d02f4395d9585af619b190c11ed47,2017-02-20 13:55:00 UTC,auxHeatOnly,auto,758,760,760,DE,Claymont,115,False,False,True,Electric
36408,21928216c72d02f4395d9585af619b190c11ed47,2017-02-21 18:55:00 UTC,auxHeatOnly,auto,765,760,760,DE,Claymont,115,False,False,True,Electric


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/DE/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/DE-day/2018-feb-day-DE.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3eeccb5f11385e084be41f643b2dbdf9f75be0d1,2018-02-01 13:45:00 UTC,auto,auto,662,800,603,DE,Long Neck,15,True,False,False,Gas
1,5ffed153364c0d5a1c1b6e3c82bd141912d4b0c7,2018-02-04 19:50:00 UTC,auto,hold,658,735,665,DE,Middletown,10,False,False,False,Gas
2,0f4f6f6ec2bd31e06c5897e74c6b06e789bbd188,2018-02-11 14:40:00 UTC,heat,auto,675,702,676,DE,Newark,25,False,False,False,Gas
4,18f7ccf6b435622cb968d6da44cda99f382171bf,2018-02-16 19:55:00 UTC,heat,auto,691,766,669,DE,Camden Wyoming,27,False,False,True,Electric
7,5ffed153364c0d5a1c1b6e3c82bd141912d4b0c7,2018-02-05 15:00:00 UTC,auto,hold,700,735,665,DE,Middletown,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94334,05f17d52e1db8e1d1c118b4d867161f3429fce55,2018-02-23 17:20:00 UTC,heat,auto,687,690,690,DE,New Castle,20,False,False,False,Gas
94335,05f17d52e1db8e1d1c118b4d867161f3429fce55,2018-02-07 19:30:00 UTC,heat,auto,684,690,690,DE,New Castle,20,False,False,False,Gas
94336,05f17d52e1db8e1d1c118b4d867161f3429fce55,2018-02-23 17:35:00 UTC,heat,auto,685,690,690,DE,New Castle,20,False,False,False,Gas
94337,05f17d52e1db8e1d1c118b4d867161f3429fce55,2018-02-07 19:45:00 UTC,heat,auto,689,690,690,DE,New Castle,20,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/DE/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/DE-day/2019-feb-day-DE.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2019-02-15 19:00:00 UTC,heat,auto,728,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
1,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2019-02-15 17:05:00 UTC,heat,auto,729,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
2,6c0521ad14d0a889e4eda839e4b802054c4f5831,2019-02-06 19:15:00 UTC,heat,auto,696,689,707,DE,Newark,15,False,False,False,Gas
3,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2019-02-16 16:55:00 UTC,heat,auto,731,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
4,21a8021fbd2bc2e837a130d42a96082c5b166d78,2019-02-05 12:55:00 UTC,heat,auto,677,745,675,DE,Middletown,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94392,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2019-02-25 14:50:00 UTC,auto,hold,756,820,760,DE,Wilmington,70,False,False,False,Gas
94393,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2019-02-25 12:10:00 UTC,auto,hold,754,820,760,DE,Wilmington,70,False,False,False,Gas
94394,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2019-02-25 13:05:00 UTC,auto,hold,761,820,760,DE,Wilmington,70,False,False,False,Gas
94395,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2019-02-25 12:20:00 UTC,auto,hold,758,820,760,DE,Wilmington,70,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/DE/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/DE-day/2020-feb-day-DE.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,2a56b6a9368fcce30b408077c4ac2287cc8ba96b,2020-02-04 16:25:00 UTC,heat,auto,696,729,711,DE,Camden Wyoming,10,False,False,False,Gas
3,2a56b6a9368fcce30b408077c4ac2287cc8ba96b,2020-02-22 19:15:00 UTC,heat,auto,733,732,732,DE,Camden Wyoming,10,False,False,False,Gas
5,26e12cb4e44a02ae943a9d8a3595e598f11750a4,2020-02-15 14:55:00 UTC,heat,auto,629,650,604,DE,Newark,80,False,False,False,Gas
6,c9c0cbe0ff333388b6c45df3a06ec1d707f372aa,2020-02-15 15:50:00 UTC,heat,hold,717,682,682,DE,Townsend,10,False,False,False,Gas
11,2a56b6a9368fcce30b408077c4ac2287cc8ba96b,2020-02-25 16:25:00 UTC,heat,auto,717,765,675,DE,Camden Wyoming,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150197,f4adc8f627afc3d4f0facd64851ab8bd720435b7,2020-02-16 17:55:00 UTC,heat,hold,700,700,700,DE,Wilmington,50,False,False,False,Gas
150198,f4adc8f627afc3d4f0facd64851ab8bd720435b7,2020-02-16 17:00:00 UTC,heat,hold,698,700,700,DE,Wilmington,50,False,False,False,Gas
150199,f4adc8f627afc3d4f0facd64851ab8bd720435b7,2020-02-02 19:15:00 UTC,heat,hold,694,700,700,DE,Wilmington,50,False,False,False,Gas
150200,f4adc8f627afc3d4f0facd64851ab8bd720435b7,2020-02-16 18:05:00 UTC,heat,hold,696,700,700,DE,Wilmington,50,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/DE/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/DE-day/2021-feb-day-DE.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,22d1d3b362a7b650679adf4fc2c4b9c8eedee8ac,2021-02-24 17:30:00 UTC,heat,hold,659,650,625,DE,Newark,40,True,False,True,Electric
2,22d1d3b362a7b650679adf4fc2c4b9c8eedee8ac,2021-02-24 15:55:00 UTC,heat,hold,663,650,625,DE,Newark,40,True,False,True,Electric
3,e22503ef543b569f72199160f7ecfc0f59eef587,2021-02-13 19:45:00 UTC,heat,hold,673,675,675,DE,Dover,10,False,False,True,Electric
10,22d1d3b362a7b650679adf4fc2c4b9c8eedee8ac,2021-02-23 19:15:00 UTC,heat,hold,692,650,635,DE,Newark,40,True,False,True,Electric
14,22d1d3b362a7b650679adf4fc2c4b9c8eedee8ac,2021-02-24 16:55:00 UTC,heat,hold,659,650,625,DE,Newark,40,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72887,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2021-02-23 18:50:00 UTC,heat,hold,750,750,750,DE,Wilmington,70,False,False,False,Gas
72888,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2021-02-24 19:10:00 UTC,heat,hold,749,750,750,DE,Wilmington,70,False,False,False,Gas
72889,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2021-02-23 18:00:00 UTC,heat,hold,747,750,750,DE,Wilmington,70,False,False,False,Gas
72890,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2021-02-24 11:55:00 UTC,heat,hold,745,750,750,DE,Wilmington,70,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/DE/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/DE/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
DE_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/DE/feb/" + file)
    DE_feb = pd.concat([DE_feb, df])
    
DE_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,21928216c72d02f4395d9585af619b190c11ed47,feb,2017,auxHeatOnly,auto,Claymont,747.299229,752.005957,752.005957,115.0,False,False,True
1,21a8021fbd2bc2e837a130d42a96082c5b166d78,feb,2017,heat,auto,Middletown,669.035714,670.000000,670.000000,30.0,False,False,False
2,21a8021fbd2bc2e837a130d42a96082c5b166d78,feb,2017,heat,hold,Middletown,676.230769,675.256410,675.256410,30.0,False,False,False
3,2a56b6a9368fcce30b408077c4ac2287cc8ba96b,feb,2017,heat,auto,Camden Wyoming,699.158416,705.227723,704.633663,10.0,False,False,False
4,2a56b6a9368fcce30b408077c4ac2287cc8ba96b,feb,2017,heat,hold,Camden Wyoming,694.493243,698.425676,697.209459,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
82,f4adc8f627afc3d4f0facd64851ab8bd720435b7,feb,2021,heat,hold,Wilmington,676.325797,679.231304,678.749565,50.0,False,False,False
83,f8e940d02995305f21d82e5169cd1cd514fe7cfc,feb,2021,auto,hold,Hockessin,736.000000,820.000000,747.000000,7.0,False,False,False
84,f912367502ddecab35f132f60dfa4a4f8ee8aa9f,feb,2021,heat,hold,Newark,670.525355,679.937120,679.811359,60.0,False,False,False
85,f93eb6907201bce993eca365e1f674e535452109,feb,2021,heat,hold,Ocean View,697.753425,679.424658,678.520548,15.0,False,False,True


In [67]:
DE_feb.to_csv("Scraper_Output/State_Month_Day/DE/DE_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/DE-day/2017-jun-day-DE.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
5,6c0521ad14d0a889e4eda839e4b802054c4f5831,2017-06-25 17:30:00 UTC,cool,hold,795,797,797,DE,Newark,15,False,False,False,Gas
12,4228fa99f653c336280a15a438be7cc17a035794,2017-06-28 16:25:00 UTC,auto,auto,719,724,666,DE,Middletown,10,False,False,False,Gas
21,9d30d04feccfcdd6bf548779873d9ac979298cff,2017-06-18 18:45:00 UTC,cool,hold,735,731,722,DE,Camden Wyoming,27,False,False,False,Gas
54,6c0521ad14d0a889e4eda839e4b802054c4f5831,2017-06-25 18:55:00 UTC,cool,hold,791,797,797,DE,Newark,15,False,False,False,Gas
79,d9c2a9a0efed7e21cd5774c78067eb7b94fd883e,2017-06-17 17:50:00 UTC,cool,hold,761,779,684,DE,Hockessin,27,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58040,8e12babe8e6ece4bf6d2eaef55fb89d48b024bed,2017-06-02 18:45:00 UTC,cool,hold,764,760,760,DE,Seaford,5,True,False,True,Electric
58041,8e12babe8e6ece4bf6d2eaef55fb89d48b024bed,2017-06-02 18:35:00 UTC,cool,hold,760,760,760,DE,Seaford,5,True,False,True,Electric
58042,8e12babe8e6ece4bf6d2eaef55fb89d48b024bed,2017-06-23 19:00:00 UTC,cool,auto,762,760,760,DE,Seaford,5,True,False,True,Electric
58043,8e12babe8e6ece4bf6d2eaef55fb89d48b024bed,2017-06-02 18:20:00 UTC,cool,hold,755,760,760,DE,Seaford,5,True,False,True,Electric


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/DE/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/DE-day/2018-jun-day-DE.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2018-06-22 17:20:00 UTC,cool,auto,725,730,721,DE,Rehoboth Beach,20,False,False,False,Gas
1,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2018-06-22 13:40:00 UTC,cool,auto,724,730,721,DE,Rehoboth Beach,20,False,False,False,Gas
2,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2018-06-16 13:05:00 UTC,cool,auto,710,730,721,DE,Rehoboth Beach,20,False,False,False,Gas
3,e74d2913ea65e856bd587ec7e59018c162cac2e8,2018-06-28 12:45:00 UTC,auto,auto,723,722,672,DE,Newark,30,False,False,False,Gas
4,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2018-06-21 18:20:00 UTC,cool,auto,735,730,721,DE,Rehoboth Beach,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139556,f4adc8f627afc3d4f0facd64851ab8bd720435b7,2018-06-14 11:20:00 UTC,cool,auto,739,760,760,DE,Wilmington,50,False,False,False,Gas
139557,f4adc8f627afc3d4f0facd64851ab8bd720435b7,2018-06-11 11:55:00 UTC,cool,auto,690,760,760,DE,Wilmington,50,False,False,False,Gas
139558,f4adc8f627afc3d4f0facd64851ab8bd720435b7,2018-06-17 11:45:00 UTC,cool,auto,739,760,760,DE,Wilmington,50,False,False,False,Gas
139559,f4adc8f627afc3d4f0facd64851ab8bd720435b7,2018-06-15 12:10:00 UTC,cool,auto,734,760,760,DE,Wilmington,50,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/DE/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/DE-day/2019-jun-day-DE.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c06e749f8d636d36f2b8403770643682ba3a1dd7,2019-06-08 11:30:00 UTC,cool,hold,721,735,735,DE,Rehoboth Beach,10,False,False,False,Gas
1,31c53ad9cd3a77a664496763de2bd953f64719be,2019-06-10 11:35:00 UTC,cool,auto,710,713,663,DE,Newark,20,False,False,False,Gas
2,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2019-06-29 19:05:00 UTC,cool,auto,738,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
3,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2019-06-24 11:10:00 UTC,cool,auto,718,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
4,e22503ef543b569f72199160f7ecfc0f59eef587,2019-06-30 19:35:00 UTC,cool,hold,722,715,715,DE,Dover,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172143,ffc31260f43f93640ab078897b9d432400345a5e,2019-06-09 12:40:00 UTC,cool,auto,740,760,760,DE,Bear,17,False,False,False,Gas
172144,ffc31260f43f93640ab078897b9d432400345a5e,2019-06-09 12:05:00 UTC,cool,auto,738,760,760,DE,Bear,17,False,False,False,Gas
172145,ffc31260f43f93640ab078897b9d432400345a5e,2019-06-22 12:05:00 UTC,cool,auto,762,760,760,DE,Bear,17,False,False,False,Gas
172146,ffc31260f43f93640ab078897b9d432400345a5e,2019-06-22 18:55:00 UTC,cool,auto,758,760,760,DE,Bear,17,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/DE/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/DE-day/2020-jun-day-DE.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d7515e02bf55abc53e97df87785c4162654a4bcc,2020-06-07 18:15:00 UTC,cool,auto,761,780,752,DE,Lewes,10,False,False,False,Gas
1,31c53ad9cd3a77a664496763de2bd953f64719be,2020-06-06 16:20:00 UTC,auto,hold,702,705,655,DE,Newark,20,False,False,False,Gas
2,657103d68361e7b714a3c66ad663a2bd137b246e,2020-06-10 18:50:00 UTC,cool,auto,721,727,727,DE,Wilmington,0,False,False,True,Electric
3,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2020-06-26 14:15:00 UTC,cool,auto,737,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
4,0ad826a253a907f5946f8402db4a56a04d9f0e69,2020-06-28 18:35:00 UTC,auto,auto,752,750,635,DE,Wilmington,48,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170061,ffc31260f43f93640ab078897b9d432400345a5e,2020-06-26 12:15:00 UTC,cool,auto,758,760,760,DE,Bear,17,False,False,False,Gas
170062,ffc31260f43f93640ab078897b9d432400345a5e,2020-06-26 12:45:00 UTC,cool,auto,760,760,760,DE,Bear,17,False,False,False,Gas
170063,ffc31260f43f93640ab078897b9d432400345a5e,2020-06-26 11:10:00 UTC,cool,auto,763,760,760,DE,Bear,17,False,False,False,Gas
170064,ffc31260f43f93640ab078897b9d432400345a5e,2020-06-26 11:25:00 UTC,cool,auto,764,760,760,DE,Bear,17,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/DE/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/DE-day/2021-jun-day-DE.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
3,0f4f6f6ec2bd31e06c5897e74c6b06e789bbd188,2021-06-05 15:55:00 UTC,cool,hold,758,767,725,DE,Newark,25,False,False,False,Gas
7,5fc5e608146c199640f67a236dd5b54c73318273,2021-06-04 18:50:00 UTC,cool,hold,757,742,742,DE,Middletown,19,True,False,False,Gas
8,86c381a01ef84668d0d99768a25da77f538e42c7,2021-06-10 16:15:00 UTC,cool,hold,756,757,757,DE,Wilmington,5,False,False,False,Gas
9,614a56aa2f892b40ccdc5835b04952bfca2d746e,2021-06-27 15:30:00 UTC,cool,hold,750,760,751,DE,Smyrna,20,True,False,False,Gas
13,eb43e2be5e191587815d3ec7c8f0fe6c8210dcf5,2021-06-22 17:20:00 UTC,auto,hold,756,760,665,DE,Dover,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71341,ffc31260f43f93640ab078897b9d432400345a5e,2021-06-17 11:25:00 UTC,cool,hold,736,760,760,DE,Bear,17,False,False,False,Gas
71342,ffc31260f43f93640ab078897b9d432400345a5e,2021-06-17 13:55:00 UTC,cool,hold,742,760,760,DE,Bear,17,False,False,False,Gas
71343,ffc31260f43f93640ab078897b9d432400345a5e,2021-06-16 14:25:00 UTC,cool,hold,745,760,760,DE,Bear,17,False,False,False,Gas
71344,ffc31260f43f93640ab078897b9d432400345a5e,2021-06-16 17:45:00 UTC,cool,hold,754,760,760,DE,Bear,17,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/DE/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/DE/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
DE_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/DE/jun/" + file)
    DE_jun = pd.concat([DE_jun, df])
    
DE_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,18f7ccf6b435622cb968d6da44cda99f382171bf,jun,2017,cool,auto,Camden Wyoming,738.119688,755.197745,664.636600,27.0,False,False,True
1,18f7ccf6b435622cb968d6da44cda99f382171bf,jun,2017,cool,hold,Camden Wyoming,689.675105,688.881857,688.706751,27.0,False,False,True
2,21928216c72d02f4395d9585af619b190c11ed47,jun,2017,cool,auto,Claymont,736.624831,730.000000,700.000000,115.0,False,False,True
3,21a8021fbd2bc2e837a130d42a96082c5b166d78,jun,2017,cool,hold,Middletown,703.552910,703.809524,703.809524,30.0,False,False,False
4,2a56b6a9368fcce30b408077c4ac2287cc8ba96b,jun,2017,cool,auto,Camden Wyoming,756.018868,747.943396,692.015723,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,f4adc8f627afc3d4f0facd64851ab8bd720435b7,jun,2021,auto,hold,Wilmington,703.146417,705.000000,645.000000,50.0,False,False,False
101,f4adc8f627afc3d4f0facd64851ab8bd720435b7,jun,2021,cool,hold,Wilmington,700.879630,700.000000,700.000000,50.0,False,False,False
102,f8e940d02995305f21d82e5169cd1cd514fe7cfc,jun,2021,auto,hold,Hockessin,801.165909,796.904545,720.000000,7.0,False,False,False
103,f912367502ddecab35f132f60dfa4a4f8ee8aa9f,jun,2021,cool,hold,Newark,743.333333,740.000000,740.000000,60.0,False,False,False


In [100]:
DE_jun.to_csv("Scraper_Output/State_Month_Day/DE/DE_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/DE-day/2017-jul-day-DE.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,2def1e7802bce77e54325f0fea8af43d6a73f830,2017-07-31 12:45:00 UTC,cool,hold,711,712,698,DE,Selbyville,5,False,False,False,Gas
15,6c0521ad14d0a889e4eda839e4b802054c4f5831,2017-07-22 16:10:00 UTC,cool,hold,776,788,788,DE,Newark,15,False,False,False,Gas
18,21a8021fbd2bc2e837a130d42a96082c5b166d78,2017-07-09 15:10:00 UTC,cool,hold,719,715,715,DE,Middletown,30,False,False,False,Gas
20,21a8021fbd2bc2e837a130d42a96082c5b166d78,2017-07-30 13:55:00 UTC,cool,hold,713,715,715,DE,Middletown,30,False,False,False,Gas
32,6c0521ad14d0a889e4eda839e4b802054c4f5831,2017-07-23 15:25:00 UTC,cool,hold,779,779,779,DE,Newark,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73598,8e12babe8e6ece4bf6d2eaef55fb89d48b024bed,2017-07-13 15:55:00 UTC,cool,hold,767,760,760,DE,Seaford,5,True,False,True,Electric
73599,8e12babe8e6ece4bf6d2eaef55fb89d48b024bed,2017-07-13 13:50:00 UTC,cool,hold,760,760,760,DE,Seaford,5,True,False,True,Electric
73600,8e12babe8e6ece4bf6d2eaef55fb89d48b024bed,2017-07-13 19:35:00 UTC,cool,hold,762,760,760,DE,Seaford,5,True,False,True,Electric
73601,8e12babe8e6ece4bf6d2eaef55fb89d48b024bed,2017-07-13 17:15:00 UTC,cool,hold,761,760,760,DE,Seaford,5,True,False,True,Electric


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/DE/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/DE-day/2018-jul-day-DE.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2018-07-31 11:50:00 UTC,cool,auto,732,730,721,DE,Rehoboth Beach,20,False,False,False,Gas
1,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2018-07-30 15:25:00 UTC,cool,auto,732,730,721,DE,Rehoboth Beach,20,False,False,False,Gas
4,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2018-07-28 15:15:00 UTC,cool,auto,733,730,721,DE,Rehoboth Beach,20,False,False,False,Gas
5,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2018-07-28 14:45:00 UTC,cool,auto,733,730,721,DE,Rehoboth Beach,20,False,False,False,Gas
6,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2018-07-21 15:50:00 UTC,cool,auto,727,730,721,DE,Rehoboth Beach,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
163705,ffc31260f43f93640ab078897b9d432400345a5e,2018-07-11 18:45:00 UTC,cool,hold,761,760,760,DE,Bear,17,False,False,False,Gas
163706,ffc31260f43f93640ab078897b9d432400345a5e,2018-07-11 19:05:00 UTC,cool,hold,762,760,760,DE,Bear,17,False,False,False,Gas
163707,ffc31260f43f93640ab078897b9d432400345a5e,2018-07-11 15:40:00 UTC,cool,hold,768,760,760,DE,Bear,17,False,False,False,Gas
163708,ffc31260f43f93640ab078897b9d432400345a5e,2018-07-11 16:10:00 UTC,cool,hold,760,760,760,DE,Bear,17,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/DE/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/DE-day/2019-jul-day-DE.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2019-07-03 12:55:00 UTC,cool,auto,734,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
1,e22503ef543b569f72199160f7ecfc0f59eef587,2019-07-26 15:10:00 UTC,cool,hold,711,665,665,DE,Dover,10,False,False,True,Electric
2,7380d8eb099826a845e4b560d1c1b0550365b5ca,2019-07-03 11:25:00 UTC,auto,hold,761,759,645,DE,New Castle,10,False,False,False,Gas
3,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2019-07-29 10:40:00 UTC,cool,auto,728,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
4,e22503ef543b569f72199160f7ecfc0f59eef587,2019-07-30 12:35:00 UTC,cool,hold,663,665,665,DE,Dover,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191420,f93eb6907201bce993eca365e1f674e535452109,2019-07-16 16:50:00 UTC,cool,auto,759,760,760,DE,Ocean View,15,False,False,True,Electric
191421,f93eb6907201bce993eca365e1f674e535452109,2019-07-16 16:40:00 UTC,cool,auto,765,760,760,DE,Ocean View,15,False,False,True,Electric
191422,f93eb6907201bce993eca365e1f674e535452109,2019-07-16 11:05:00 UTC,cool,auto,760,760,760,DE,Ocean View,15,False,False,True,Electric
191423,f93eb6907201bce993eca365e1f674e535452109,2019-07-16 19:40:00 UTC,cool,auto,759,760,760,DE,Ocean View,15,False,False,True,Electric


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/DE/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/DE-day/2020-jul-day-DE.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,11d84820d97292fd2ac228b5170ac611815a10b3,2020-07-22 17:35:00 UTC,cool,auto,749,750,650,DE,Wilmington,57,False,False,False,Gas
1,11d84820d97292fd2ac228b5170ac611815a10b3,2020-07-28 12:20:00 UTC,cool,auto,701,700,650,DE,Wilmington,57,False,False,False,Gas
2,11d84820d97292fd2ac228b5170ac611815a10b3,2020-07-28 16:15:00 UTC,cool,auto,744,740,650,DE,Wilmington,57,False,False,False,Gas
3,11d84820d97292fd2ac228b5170ac611815a10b3,2020-07-22 15:00:00 UTC,cool,auto,743,740,650,DE,Wilmington,57,False,False,False,Gas
4,11d84820d97292fd2ac228b5170ac611815a10b3,2020-07-28 12:25:00 UTC,cool,auto,700,700,650,DE,Wilmington,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178897,104114e5ae7f9080668d70b6b16a992e4cb6b0e9,2020-07-23 13:10:00 UTC,cool,hold,692,690,690,DE,Laurel,0,True,False,True,Electric
178898,05f17d52e1db8e1d1c118b4d867161f3429fce55,2020-07-09 10:35:00 UTC,cool,hold,692,690,690,DE,New Castle,20,False,False,False,Gas
178899,5ffed153364c0d5a1c1b6e3c82bd141912d4b0c7,2020-07-26 18:15:00 UTC,cool,auto,690,690,690,DE,Middletown,10,False,False,False,Gas
178900,104114e5ae7f9080668d70b6b16a992e4cb6b0e9,2020-07-29 11:30:00 UTC,cool,hold,690,690,690,DE,Laurel,0,True,False,True,Electric


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/DE/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/DE-day/2021-jul-day-DE.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3528ce1a3af5153bd73944decb2974512a40fc78,2021-07-29 14:20:00 UTC,auto,hold,731,730,650,DE,Townsend,67,False,False,True,Electric
1,3528ce1a3af5153bd73944decb2974512a40fc78,2021-07-26 15:30:00 UTC,auto,hold,731,730,650,DE,Townsend,67,False,False,True,Electric
2,3528ce1a3af5153bd73944decb2974512a40fc78,2021-07-26 17:15:00 UTC,auto,hold,734,730,650,DE,Townsend,67,False,False,True,Electric
3,3528ce1a3af5153bd73944decb2974512a40fc78,2021-07-28 19:40:00 UTC,auto,hold,766,730,650,DE,Townsend,67,False,False,True,Electric
4,3528ce1a3af5153bd73944decb2974512a40fc78,2021-07-28 17:15:00 UTC,auto,hold,733,730,650,DE,Townsend,67,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80595,dce89c01ae0cb8450c0d919804d036d5f997204b,2021-07-13 13:30:00 UTC,cool,hold,758,760,760,DE,Townsend,20,False,False,False,Gas
80596,dce89c01ae0cb8450c0d919804d036d5f997204b,2021-07-13 13:40:00 UTC,cool,hold,760,760,760,DE,Townsend,20,False,False,False,Gas
80597,dce89c01ae0cb8450c0d919804d036d5f997204b,2021-07-13 12:35:00 UTC,cool,hold,751,760,760,DE,Townsend,20,False,False,False,Gas
80598,dce89c01ae0cb8450c0d919804d036d5f997204b,2021-07-13 12:50:00 UTC,cool,hold,751,760,760,DE,Townsend,20,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/DE/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/DE/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
DE_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/DE/jul/" + file)
    DE_jul = pd.concat([DE_jul, df])
    
DE_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,070839ef58c74b72d673dd533e6f35aec7ce8f27,jul,2017,cool,auto,Houston,772.148828,749.730377,745.947503,15.0,True,False,True
1,18f7ccf6b435622cb968d6da44cda99f382171bf,jul,2017,cool,auto,Camden Wyoming,745.919858,764.386525,660.070922,27.0,False,False,True
2,21928216c72d02f4395d9585af619b190c11ed47,jul,2017,cool,auto,Claymont,739.692308,701.923077,710.000000,115.0,False,False,True
3,21928216c72d02f4395d9585af619b190c11ed47,jul,2017,cool,hold,Claymont,734.560732,730.000000,730.000000,115.0,False,False,True
4,21a8021fbd2bc2e837a130d42a96082c5b166d78,jul,2017,cool,hold,Middletown,716.729730,715.202703,714.695946,30.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
101,f8e940d02995305f21d82e5169cd1cd514fe7cfc,jul,2021,auto,hold,Hockessin,806.146388,803.585551,720.000000,7.0,False,False,False
102,f912367502ddecab35f132f60dfa4a4f8ee8aa9f,jul,2021,cool,hold,Newark,747.133455,745.505005,745.505005,60.0,False,False,False
103,f93eb6907201bce993eca365e1f674e535452109,jul,2021,auto,hold,Ocean View,727.824490,721.636735,667.755102,15.0,False,False,True
104,f93eb6907201bce993eca365e1f674e535452109,jul,2021,cool,hold,Ocean View,706.708000,691.372000,691.352000,15.0,False,False,True


In [133]:
DE_jul.to_csv("Scraper_Output/State_Month_Day/DE/DE_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/DE-day/2017-aug-day-DE.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,21a8021fbd2bc2e837a130d42a96082c5b166d78,2017-08-21 10:35:00 UTC,cool,hold,716,715,715,DE,Middletown,30,False,False,False,Gas
3,21a8021fbd2bc2e837a130d42a96082c5b166d78,2017-08-20 12:30:00 UTC,cool,hold,719,715,715,DE,Middletown,30,False,False,False,Gas
6,21a8021fbd2bc2e837a130d42a96082c5b166d78,2017-08-21 10:30:00 UTC,cool,hold,716,715,715,DE,Middletown,30,False,False,False,Gas
9,21a8021fbd2bc2e837a130d42a96082c5b166d78,2017-08-21 11:30:00 UTC,cool,hold,716,715,715,DE,Middletown,30,False,False,False,Gas
14,21a8021fbd2bc2e837a130d42a96082c5b166d78,2017-08-23 19:20:00 UTC,cool,hold,719,715,715,DE,Middletown,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83356,e1d0e7745fb8ca0ba9780a576a110604b08d59e5,2017-08-18 15:50:00 UTC,cool,auto,762,770,710,DE,Milton,15,False,False,False,Gas
83357,e1d0e7745fb8ca0ba9780a576a110604b08d59e5,2017-08-14 15:15:00 UTC,cool,auto,772,790,710,DE,Milton,15,False,False,False,Gas
83358,e1d0e7745fb8ca0ba9780a576a110604b08d59e5,2017-08-04 14:45:00 UTC,cool,auto,791,800,710,DE,Milton,15,False,False,False,Gas
83359,e1d0e7745fb8ca0ba9780a576a110604b08d59e5,2017-08-04 15:15:00 UTC,cool,auto,799,800,710,DE,Milton,15,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/DE/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/DE-day/2018-aug-day-DE.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f98c03de48bff902800d8778506362f25763a3b0,2018-08-04 16:50:00 UTC,cool,auto,697,715,715,DE,Bethany Beach,48,False,False,False,Gas
1,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2018-08-22 12:25:00 UTC,cool,auto,732,730,721,DE,Rehoboth Beach,20,False,False,False,Gas
2,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2018-08-17 15:50:00 UTC,cool,auto,731,730,721,DE,Rehoboth Beach,20,False,False,False,Gas
3,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2018-08-20 12:20:00 UTC,cool,auto,734,730,721,DE,Rehoboth Beach,20,False,False,False,Gas
4,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2018-08-16 17:50:00 UTC,cool,auto,741,730,721,DE,Rehoboth Beach,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149449,f98c03de48bff902800d8778506362f25763a3b0,2018-08-27 14:20:00 UTC,cool,hold,694,690,690,DE,Bethany Beach,48,False,False,False,Gas
149450,f98c03de48bff902800d8778506362f25763a3b0,2018-08-07 17:05:00 UTC,cool,auto,691,690,690,DE,Bethany Beach,48,False,False,False,Gas
149451,bd23344059260caeaa283a944059b8358bf4dee7,2018-08-27 19:30:00 UTC,cool,hold,729,690,690,DE,Bear,0,False,False,False,Gas
149452,77177ccbc4eb7e89fa0e4ce628d7911e4b4bb531,2018-08-13 12:35:00 UTC,cool,hold,694,690,690,DE,Dewey Beach,0,True,False,True,Electric


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/DE/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/DE-day/2019-aug-day-DE.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,22d1d3b362a7b650679adf4fc2c4b9c8eedee8ac,2019-08-05 17:40:00 UTC,cool,hold,740,735,735,DE,Newark,40,True,False,True,Electric
1,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2019-08-14 19:40:00 UTC,cool,auto,736,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
2,a72502e0010aa97de1ad5dc6bfea5bd5f1bc46ba,2019-08-02 18:25:00 UTC,cool,hold,760,686,686,DE,Milton,99,False,False,False,Gas
3,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2019-08-05 17:50:00 UTC,cool,auto,736,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
4,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2019-08-30 13:40:00 UTC,cool,auto,731,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187086,b7700aa6c467694f28b83690983d25a58dee0c94,2019-08-08 14:40:00 UTC,cool,auto,756,760,760,DE,Wilmington,45,False,False,False,Gas
187087,b7700aa6c467694f28b83690983d25a58dee0c94,2019-08-08 12:05:00 UTC,cool,auto,758,760,760,DE,Wilmington,45,False,False,False,Gas
187088,b7700aa6c467694f28b83690983d25a58dee0c94,2019-08-28 14:10:00 UTC,cool,hold,750,760,760,DE,Wilmington,45,False,False,False,Gas
187089,b7700aa6c467694f28b83690983d25a58dee0c94,2019-08-08 12:45:00 UTC,cool,auto,759,760,760,DE,Wilmington,45,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/DE/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/DE-day/2020-aug-day-DE.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3967bc72fcf03c6f8de73795bb135539aaf341d9,2020-08-01 12:40:00 UTC,cool,auto,753,750,748,DE,Milford,15,False,False,False,Gas
1,14fe57f4a2dced9bbf296dcd0280277c39f5c2f6,2020-08-28 09:45:00 UTC,cool,hold,778,780,764,DE,Millsboro,20,False,False,False,Gas
2,14fe57f4a2dced9bbf296dcd0280277c39f5c2f6,2020-08-28 09:25:00 UTC,cool,hold,778,780,764,DE,Millsboro,20,False,False,False,Gas
3,14fe57f4a2dced9bbf296dcd0280277c39f5c2f6,2020-08-29 19:45:00 UTC,cool,hold,778,780,764,DE,Millsboro,20,False,False,False,Gas
4,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2020-08-24 11:15:00 UTC,cool,auto,720,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186473,104114e5ae7f9080668d70b6b16a992e4cb6b0e9,2020-08-26 11:00:00 UTC,cool,hold,694,690,690,DE,Laurel,0,True,False,True,Electric
186474,5ffed153364c0d5a1c1b6e3c82bd141912d4b0c7,2020-08-20 13:35:00 UTC,cool,auto,688,690,690,DE,Middletown,10,False,False,False,Gas
186475,5ffed153364c0d5a1c1b6e3c82bd141912d4b0c7,2020-08-20 15:50:00 UTC,cool,auto,688,690,690,DE,Middletown,10,False,False,False,Gas
186476,5ffed153364c0d5a1c1b6e3c82bd141912d4b0c7,2020-08-20 15:20:00 UTC,cool,auto,691,690,690,DE,Middletown,10,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/DE/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/DE/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
DE_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/DE/aug/" + file)
    DE_aug = pd.concat([DE_aug, df])
    
DE_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,070839ef58c74b72d673dd533e6f35aec7ce8f27,aug,2017,cool,auto,Houston,776.114583,750.000000,750.000000,15.0,True,False,True
1,11d84820d97292fd2ac228b5170ac611815a10b3,aug,2017,auto,auto,Wilmington,799.000000,780.000000,690.000000,57.0,False,False,False
2,11d84820d97292fd2ac228b5170ac611815a10b3,aug,2017,auto,hold,Wilmington,766.769231,810.000000,690.000000,57.0,False,False,False
3,11d84820d97292fd2ac228b5170ac611815a10b3,aug,2017,cool,auto,Wilmington,756.220924,801.545406,661.949549,57.0,False,False,False
4,11d84820d97292fd2ac228b5170ac611815a10b3,aug,2017,cool,hold,Wilmington,768.335038,789.915601,783.317136,57.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
173,f912367502ddecab35f132f60dfa4a4f8ee8aa9f,aug,2020,cool,hold,Newark,712.394393,710.000000,710.000000,60.0,False,False,False
174,f93eb6907201bce993eca365e1f674e535452109,aug,2020,cool,auto,Ocean View,760.597569,775.047941,775.047941,15.0,False,False,True
175,f93eb6907201bce993eca365e1f674e535452109,aug,2020,cool,hold,Ocean View,719.491094,710.524173,710.524173,15.0,False,False,True
176,ffc31260f43f93640ab078897b9d432400345a5e,aug,2020,cool,auto,Bear,753.297373,750.454991,750.065849,17.0,False,False,False


In [160]:
DE_aug.to_csv("Scraper_Output/State_Month_Day/DE/DE_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/DE-day/2017-dec-day-DE.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f8e940d02995305f21d82e5169cd1cd514fe7cfc,2017-12-20 18:20:00 UTC,auto,hold,671,725,655,DE,Hockessin,7,False,False,False,Gas
1,cab48140b2481c3ec5f722d2b8eb5712fca9525f,2017-12-25 12:00:00 UTC,heat,hold,746,737,737,DE,Milford,57,False,False,False,Gas
2,37b36edf77b0ca8d1b343f751c62092cff706c5d,2017-12-05 18:15:00 UTC,heat,hold,649,675,675,DE,Hockessin,55,False,False,True,Electric
3,3eeccb5f11385e084be41f643b2dbdf9f75be0d1,2017-12-21 16:00:00 UTC,auto,hold,656,815,635,DE,Long Neck,15,True,False,False,Gas
4,3eeccb5f11385e084be41f643b2dbdf9f75be0d1,2017-12-21 14:00:00 UTC,auto,hold,649,815,635,DE,Long Neck,15,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85294,715ee5f833d0e6a31457bd5746c4dca0295d0e6d,2017-12-31 19:30:00 UTC,heat,hold,755,760,760,DE,Ocean View,5,False,False,False,Gas
85295,715ee5f833d0e6a31457bd5746c4dca0295d0e6d,2017-12-31 15:50:00 UTC,heat,hold,757,760,760,DE,Ocean View,5,False,False,False,Gas
85296,715ee5f833d0e6a31457bd5746c4dca0295d0e6d,2017-12-31 13:30:00 UTC,heat,hold,757,760,760,DE,Ocean View,5,False,False,False,Gas
85297,715ee5f833d0e6a31457bd5746c4dca0295d0e6d,2017-12-31 12:00:00 UTC,heat,hold,758,760,760,DE,Ocean View,5,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/DE/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/DE-day/2018-dec-day-DE.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2a56b6a9368fcce30b408077c4ac2287cc8ba96b,2018-12-30 12:35:00 UTC,heat,auto,634,670,664,DE,Camden Wyoming,10,False,False,False,Gas
1,de91bd0d484dad4be3cfd707e5f0327b3429da88,2018-12-27 10:45:00 UTC,auto,auto,717,765,715,DE,SMYRNA,15,True,False,False,Gas
2,e22503ef543b569f72199160f7ecfc0f59eef587,2018-12-28 19:50:00 UTC,heat,hold,703,665,665,DE,Dover,10,False,False,True,Electric
3,2a56b6a9368fcce30b408077c4ac2287cc8ba96b,2018-12-13 13:25:00 UTC,heat,auto,690,756,672,DE,Camden Wyoming,10,False,False,False,Gas
4,a4fbf90402b9d144b6dd8b7d37f2811f509e8253,2018-12-27 10:55:00 UTC,auto,auto,715,765,715,DE,SMYRNA,15,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130031,2e0bf3d56d5cc793e2be36a8125b79af99133ca5,2018-12-30 18:25:00 UTC,heat,hold,684,750,750,DE,Townsend,10,False,False,False,Gas
130032,8e12babe8e6ece4bf6d2eaef55fb89d48b024bed,2018-12-28 19:25:00 UTC,auto,hold,744,770,750,DE,Seaford,5,True,False,True,Electric
130033,8e12babe8e6ece4bf6d2eaef55fb89d48b024bed,2018-12-28 18:40:00 UTC,auto,hold,743,770,750,DE,Seaford,5,True,False,True,Electric
130034,c8faab854e0e3c9e858bfd28f97dfc4067c4bb1a,2018-12-20 18:55:00 UTC,heat,hold,748,750,750,DE,Hockessin,5,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/DE/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/DE-day/2019-dec-day-DE.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2019-12-28 15:30:00 UTC,heat,auto,728,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
1,21a8021fbd2bc2e837a130d42a96082c5b166d78,2019-12-26 11:20:00 UTC,heat,hold,652,655,655,DE,Middletown,30,False,False,False,Gas
2,92754c6c6de54feeb0d425fc6c55742b9671e5b8,2019-12-28 17:20:00 UTC,heat,auto,731,730,731,DE,Rehoboth Beach,20,False,False,False,Gas
3,8ba8ccbf4b4d72b0c0cee2baba8fefca3ae3e728,2019-12-01 12:25:00 UTC,auto,hold,723,785,725,DE,Wilmington,89,True,False,False,Gas
4,1f6a98a58b7cbdbb109e2f2f9c6677760e612690,2019-12-22 16:55:00 UTC,auto,auto,663,720,657,DE,Georgetown,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162753,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2019-12-11 16:25:00 UTC,heat,auto,745,810,760,DE,Wilmington,70,False,False,False,Gas
162754,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2019-12-15 13:35:00 UTC,heat,auto,756,810,760,DE,Wilmington,70,False,False,False,Gas
162755,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2019-12-10 18:55:00 UTC,heat,auto,757,810,760,DE,Wilmington,70,False,False,False,Gas
162756,36187b5af5abca4bd8e3ecae2cb8916e3582ed2a,2019-12-07 17:40:00 UTC,heat,auto,752,810,760,DE,Wilmington,70,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/DE/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/DE-day/2020-dec-day-DE.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e422babdcbf5dcc98c3ac35caae35ad9f3dfaf39,2020-12-04 13:25:00 UTC,auto,auto,618,756,636,DE,Rehoboth Beach,20,True,False,True,Electric
1,e22503ef543b569f72199160f7ecfc0f59eef587,2020-12-10 13:00:00 UTC,heat,hold,668,675,675,DE,Dover,10,False,False,True,Electric
2,2a56b6a9368fcce30b408077c4ac2287cc8ba96b,2020-12-14 19:25:00 UTC,heat,auto,731,721,721,DE,Camden Wyoming,10,False,False,False,Gas
3,86c381a01ef84668d0d99768a25da77f538e42c7,2020-12-04 14:55:00 UTC,auto,hold,729,785,735,DE,Wilmington,5,False,False,False,Gas
4,ab1ae2944a11c23b331a730b140a343e2172018c,2020-12-25 13:35:00 UTC,heat,hold,721,675,675,DE,Houston,20,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145592,67ad59f874225d51d91bae4479aa1e4fad6b8b75,2020-12-15 17:30:00 UTC,auto,hold,764,780,760,DE,Dover,10,True,False,False,Gas
145593,67ad59f874225d51d91bae4479aa1e4fad6b8b75,2020-12-15 18:55:00 UTC,auto,hold,759,780,760,DE,Dover,10,True,False,False,Gas
145594,67ad59f874225d51d91bae4479aa1e4fad6b8b75,2020-12-15 16:40:00 UTC,auto,hold,736,780,760,DE,Dover,10,True,False,False,Gas
145595,86c381a01ef84668d0d99768a25da77f538e42c7,2020-12-17 19:20:00 UTC,auto,auto,738,810,760,DE,Wilmington,5,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/DE/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/DE/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
DE_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/DE/dec/" + file)
    DE_dec = pd.concat([DE_dec, df])
    
DE_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,05f17d52e1db8e1d1c118b4d867161f3429fce55,dec,2017,heat,auto,New Castle,650.000000,721.000000,719.000000,20.0,False,False,False
1,05f17d52e1db8e1d1c118b4d867161f3429fce55,dec,2017,heat,hold,New Castle,681.647059,691.367647,691.367647,20.0,False,False,False
2,070839ef58c74b72d673dd533e6f35aec7ce8f27,dec,2017,heat,auto,Houston,644.760000,789.880000,640.240000,15.0,True,False,True
3,0763b11c276e25fc64ca052d8bd55899e0e340bc,dec,2017,heat,auto,Wilmington,700.563452,703.461929,703.461929,10.0,False,False,False
4,0763b11c276e25fc64ca052d8bd55899e0e340bc,dec,2017,heat,hold,Wilmington,696.773163,699.760383,699.760383,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,f4adc8f627afc3d4f0facd64851ab8bd720435b7,dec,2020,heat,auto,Wilmington,667.340807,666.028401,666.028401,50.0,False,False,False
171,f4adc8f627afc3d4f0facd64851ab8bd720435b7,dec,2020,heat,hold,Wilmington,688.301051,691.513020,691.513020,50.0,False,False,False
172,f8e940d02995305f21d82e5169cd1cd514fe7cfc,dec,2020,auto,hold,Hockessin,733.944444,820.000000,743.388889,7.0,False,False,False
173,ffc31260f43f93640ab078897b9d432400345a5e,dec,2020,heat,auto,Bear,704.075000,700.116667,700.116667,17.0,False,False,False


In [187]:
DE_dec.to_csv("Scraper_Output/State_Month_Day/DE/DE_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/DE/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
DE_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/DE/" + file)
    DE_all = pd.concat([DE_all, df])
    
DE_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,070839ef58c74b72d673dd533e6f35aec7ce8f27,aug,2017,cool,auto,Houston,776.114583,750.000000,750.000000,15.0,True,False,True
1,11d84820d97292fd2ac228b5170ac611815a10b3,aug,2017,auto,auto,Wilmington,799.000000,780.000000,690.000000,57.0,False,False,False
2,11d84820d97292fd2ac228b5170ac611815a10b3,aug,2017,auto,hold,Wilmington,766.769231,810.000000,690.000000,57.0,False,False,False
3,11d84820d97292fd2ac228b5170ac611815a10b3,aug,2017,cool,auto,Wilmington,756.220924,801.545406,661.949549,57.0,False,False,False
4,11d84820d97292fd2ac228b5170ac611815a10b3,aug,2017,cool,hold,Wilmington,768.335038,789.915601,783.317136,57.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
751,f4adc8f627afc3d4f0facd64851ab8bd720435b7,jun,2021,auto,hold,Wilmington,703.146417,705.000000,645.000000,50.0,False,False,False
752,f4adc8f627afc3d4f0facd64851ab8bd720435b7,jun,2021,cool,hold,Wilmington,700.879630,700.000000,700.000000,50.0,False,False,False
753,f8e940d02995305f21d82e5169cd1cd514fe7cfc,jun,2021,auto,hold,Hockessin,801.165909,796.904545,720.000000,7.0,False,False,False
754,f912367502ddecab35f132f60dfa4a4f8ee8aa9f,jun,2021,cool,hold,Newark,743.333333,740.000000,740.000000,60.0,False,False,False


In [190]:
DE_all.to_csv("Scraper_Output/State_Month_Day/DE_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mDEe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['DE']
Unique jan_2018: ['DE']
Unique jan_2019: ['DE']
Unique jan_2020: ['DE']
Unique jan_2021: ['DE']
Unique feb_2017: ['DE']
Unique feb_2018: ['DE']
Unique feb_2019: ['DE']
Unique feb_2020: ['DE']
Unique feb_2021: ['DE']
Unique jun_2017: ['DE']
Unique jun_2018: ['DE']
Unique jun_2019: ['DE']
Unique jun_2020: ['DE']
Unique jun_2021: ['DE']
Unique jul_2017: ['DE']
Unique jul_2018: ['DE']
Unique jul_2019: ['DE']
Unique jul_2020: ['DE']
Unique jul_2021: ['DE']
Unique aug_2017: ['DE']
Unique aug_2018: ['DE']
Unique aug_2019: ['DE']
Unique aug_2020: ['DE']
Unique dec_2017: ['DE']
Unique dec_2018: ['DE']
Unique dec_2019: ['DE']
Unique dec_2020: ['DE']
