# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/DC-day/2017-jan-day-DC.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2db691a18f43b6a0940805f9e0a6436a652eaf56,2017-01-23 12:25:00 UTC,auto,hold,698,752,702,DC,Washington,0,False,False,True,Electric
1,2db691a18f43b6a0940805f9e0a6436a652eaf56,2017-01-23 16:30:00 UTC,auto,hold,699,752,702,DC,Washington,0,False,False,True,Electric
2,2db691a18f43b6a0940805f9e0a6436a652eaf56,2017-01-23 16:35:00 UTC,auto,hold,697,752,702,DC,Washington,0,False,False,True,Electric
3,2db691a18f43b6a0940805f9e0a6436a652eaf56,2017-01-18 14:20:00 UTC,auto,hold,699,752,702,DC,Washington,0,False,False,True,Electric
4,2db691a18f43b6a0940805f9e0a6436a652eaf56,2017-01-17 18:15:00 UTC,auto,hold,703,752,702,DC,Washington,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45649,744113fa6489b733b94ee8c68516b7320449eeff,2017-01-22 13:00:00 UTC,heat,hold,756,760,760,DC,Washington,35,False,False,False,Gas
45650,744113fa6489b733b94ee8c68516b7320449eeff,2017-01-22 09:35:00 UTC,heat,hold,759,760,760,DC,Washington,35,False,False,False,Gas
45651,744113fa6489b733b94ee8c68516b7320449eeff,2017-01-22 16:40:00 UTC,heat,hold,765,760,760,DC,Washington,35,False,False,False,Gas
45652,744113fa6489b733b94ee8c68516b7320449eeff,2017-01-22 15:35:00 UTC,heat,hold,775,760,760,DC,Washington,35,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0731de82edef8853f48e57b7e3af32f7db039935,Jan,2017,auto,auto,Washington,667.196078,750.000000,670.000000,100.0,False,False,True
0731de82edef8853f48e57b7e3af32f7db039935,Jan,2017,auto,hold,Washington,687.852941,750.000000,683.680392,100.0,False,False,True
0dab5564077db11d375f2c8138456bd970432b9a,Jan,2017,auto,auto,Washington,701.679389,776.664122,703.282443,99.0,False,False,False
0dab5564077db11d375f2c8138456bd970432b9a,Jan,2017,auto,hold,Washington,680.566667,780.000000,660.000000,99.0,False,False,False
0ef03d5ff6a191d52626482c35b8ca26a346f033,Jan,2017,heat,hold,Washington,691.162698,693.214286,691.182540,120.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
f8a87afcbb709ecea1b7d451fbf3a7923d1db59d,Jan,2017,heat,hold,Washington,712.433911,713.945305,713.871468,0.0,False,False,True
f9d96e77e0340890004b7e89150034e20d210cc7,Jan,2017,auto,auto,Washington,701.259542,750.832061,700.618321,80.0,False,False,False
f9d96e77e0340890004b7e89150034e20d210cc7,Jan,2017,heat,auto,Washington,693.780000,700.653333,700.035556,80.0,False,False,False
fb940e0ecbb352c3a8721d44ffdafe2753a39bfc,Jan,2017,auto,hold,Washington,724.095238,790.000000,740.000000,120.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/DC/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/DC-day/2018-jan-day-DC.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,aba2a96d74b366b3b1e882bc69717211fb1d46e8,2018-01-02 13:25:00 UTC,auto,hold,755,825,755,DC,Washington,20,False,False,True,Electric
1,939b8cfb049fd0e08b2627f0fb1a166ad697ac6c,2018-01-17 18:35:00 UTC,heat,hold,752,752,752,DC,Washington,20,False,False,False,Gas
2,aba2a96d74b366b3b1e882bc69717211fb1d46e8,2018-01-26 17:55:00 UTC,auto,hold,744,805,745,DC,Washington,20,False,False,True,Electric
3,2db691a18f43b6a0940805f9e0a6436a652eaf56,2018-01-23 18:55:00 UTC,heat,hold,733,684,684,DC,Washington,0,False,False,True,Electric
4,939b8cfb049fd0e08b2627f0fb1a166ad697ac6c,2018-01-18 17:00:00 UTC,heat,hold,749,752,752,DC,Washington,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125633,bcd834bcc1865fd141bb5652d54649a98bcca5eb,2018-01-04 11:55:00 UTC,auto,hold,732,810,760,DC,Washington,5,False,False,True,Electric
125634,bcd834bcc1865fd141bb5652d54649a98bcca5eb,2018-01-04 19:15:00 UTC,auto,hold,759,810,760,DC,Washington,5,False,False,True,Electric
125635,bcd834bcc1865fd141bb5652d54649a98bcca5eb,2018-01-05 18:30:00 UTC,auto,hold,739,810,760,DC,Washington,5,False,False,True,Electric
125636,bcd834bcc1865fd141bb5652d54649a98bcca5eb,2018-01-05 19:15:00 UTC,auto,hold,743,810,760,DC,Washington,5,False,False,True,Electric


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/DC/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/DC-day/2019-jan-day-DC.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2db691a18f43b6a0940805f9e0a6436a652eaf56,2019-01-14 19:35:00 UTC,heat,hold,757,731,731,DC,Washington,0,False,False,True,Electric
1,9b99bc3639b08576fbe7a553d86805aafe5c436c,2019-01-28 11:05:00 UTC,heat,hold,684,712,712,DC,Washington,50,False,False,True,Electric
2,2db691a18f43b6a0940805f9e0a6436a652eaf56,2019-01-13 14:40:00 UTC,heat,hold,732,731,731,DC,Washington,0,False,False,True,Electric
3,2c416acdac92290927562967243027a870ed8051,2019-01-09 16:15:00 UTC,auto,hold,681,768,653,DC,Washington,10,False,False,True,Electric
4,2c416acdac92290927562967243027a870ed8051,2019-01-01 14:40:00 UTC,auto,auto,716,824,644,DC,Washington,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167712,c2aabc2a819e03786a52add287f51b9cec167d39,2019-01-11 13:20:00 UTC,auto,hold,752,820,760,DC,Washington,0,False,False,True,Electric
167713,c2aabc2a819e03786a52add287f51b9cec167d39,2019-01-11 15:20:00 UTC,auto,hold,760,820,760,DC,Washington,0,False,False,True,Electric
167714,c2aabc2a819e03786a52add287f51b9cec167d39,2019-01-10 13:20:00 UTC,auto,hold,753,820,760,DC,Washington,0,False,False,True,Electric
167715,c2aabc2a819e03786a52add287f51b9cec167d39,2019-01-10 16:00:00 UTC,auto,hold,759,820,760,DC,Washington,0,False,False,True,Electric


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/DC/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/DC-day/2020-jan-day-DC.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,976c95bf0ecfd05b2d7e521a87f86500d6016654,2020-01-29 15:30:00 UTC,heat,auto,773,771,830,DC,Washington,79,False,False,False,Gas
1,1bb7b15da9cae77cb7ab5b9ec149bf4fb3546e2c,2020-01-23 19:45:00 UTC,auto,hold,713,772,722,DC,Washington,19,True,False,False,Gas
3,29c6ab4bdab51b4cff102ce7aef8582fc76bb363,2020-01-16 13:25:00 UTC,heat,hold,709,759,641,DC,Washington,29,False,False,False,Gas
4,dff9dfa40600bd94e5aee4ea5faeb1be8c59af64,2020-01-28 18:50:00 UTC,heat,hold,711,713,713,DC,Washington,0,False,False,False,Gas
5,7e5bc97e4c6471d2d39a1eaeb1712bdd37e410ad,2020-01-24 10:25:00 UTC,heat,auto,709,745,709,DC,Washington,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178875,f6dfd57fc02263a4403ded7415b7a0a5fff0f3e7,2020-01-27 16:20:00 UTC,heat,auto,759,760,760,DC,Washington,0,True,False,False,Gas
178876,f6dfd57fc02263a4403ded7415b7a0a5fff0f3e7,2020-01-26 13:55:00 UTC,heat,auto,763,760,760,DC,Washington,0,True,False,False,Gas
178877,f6dfd57fc02263a4403ded7415b7a0a5fff0f3e7,2020-01-27 17:50:00 UTC,heat,auto,754,760,760,DC,Washington,0,True,False,False,Gas
178878,f6dfd57fc02263a4403ded7415b7a0a5fff0f3e7,2020-01-30 15:40:00 UTC,heat,auto,759,760,760,DC,Washington,0,True,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/DC/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/DC-day/2021-jan-day-DC.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,aa353292a5c313a3764433d8ae8d8044a6e6b947,2021-01-05 19:00:00 UTC,heat,hold,739,741,741,DC,Washington,5,False,False,False,Gas
1,cfb77b9dc1a5867970758fad84a638d56dfacb34,2021-01-01 19:50:00 UTC,auto,hold,729,712,692,DC,Washington,15,False,False,False,Gas
2,cfb77b9dc1a5867970758fad84a638d56dfacb34,2021-01-25 19:50:00 UTC,auto,hold,714,742,722,DC,Washington,15,False,False,False,Gas
3,cfb77b9dc1a5867970758fad84a638d56dfacb34,2021-01-01 18:35:00 UTC,auto,hold,721,712,692,DC,Washington,15,False,False,False,Gas
4,cfb77b9dc1a5867970758fad84a638d56dfacb34,2021-01-11 17:30:00 UTC,auto,hold,720,742,722,DC,Washington,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81357,ba7f8959bec2591fc20e7bf3a9eeed767a868c7b,2021-01-18 13:15:00 UTC,heat,hold,762,736,760,DC,Washington,5,False,False,False,Gas
81358,ba7f8959bec2591fc20e7bf3a9eeed767a868c7b,2021-01-18 16:55:00 UTC,heat,hold,762,736,760,DC,Washington,5,False,False,False,Gas
81359,ba7f8959bec2591fc20e7bf3a9eeed767a868c7b,2021-01-18 14:40:00 UTC,heat,hold,755,736,760,DC,Washington,5,False,False,False,Gas
81360,ba7f8959bec2591fc20e7bf3a9eeed767a868c7b,2021-01-17 17:30:00 UTC,heat,hold,752,736,760,DC,Washington,5,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/DC/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/DC/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
DC_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/DC/jan/" + file)
    DC_jan = pd.concat([DC_jan, df])
    
DC_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0731de82edef8853f48e57b7e3af32f7db039935,Jan,2017,auto,auto,Washington,667.196078,750.000000,670.000000,100.0,False,False,True
1,0731de82edef8853f48e57b7e3af32f7db039935,Jan,2017,auto,hold,Washington,687.852941,750.000000,683.680392,100.0,False,False,True
2,0dab5564077db11d375f2c8138456bd970432b9a,Jan,2017,auto,auto,Washington,701.679389,776.664122,703.282443,99.0,False,False,False
3,0dab5564077db11d375f2c8138456bd970432b9a,Jan,2017,auto,hold,Washington,680.566667,780.000000,660.000000,99.0,False,False,False
4,0ef03d5ff6a191d52626482c35b8ca26a346f033,Jan,2017,heat,hold,Washington,691.162698,693.214286,691.182540,120.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
111,f3200dcdf76d98f0843987ccd32ff8ee9f97bede,Jan,2021,heat,hold,Washington,758.742857,759.238095,759.238095,95.0,False,False,False
112,f6dfd57fc02263a4403ded7415b7a0a5fff0f3e7,Jan,2021,auto,hold,Washington,715.440000,739.250000,689.250000,0.0,True,False,False
113,f6dfd57fc02263a4403ded7415b7a0a5fff0f3e7,Jan,2021,heat,hold,Washington,707.896507,709.864166,709.152652,0.0,True,False,False
114,fd5f6d27152f3a3139dd17d1997eb3fd1a51e913,Jan,2021,auto,hold,Washington DC,674.894737,760.000000,680.763158,7.0,False,False,False


In [34]:
DC_jan.to_csv("Scraper_Output/State_Month_Day/DC/DC_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/DC-day/2017-feb-day-DC.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,685d332caaec9952f04746f539416720ceeff813,2017-02-21 13:00:00 UTC,heat,auto,729,722,722,DC,Washington,5,False,False,False,Gas
1,dff9dfa40600bd94e5aee4ea5faeb1be8c59af64,2017-02-09 17:10:00 UTC,heat,auto,725,751,713,DC,Washington,0,False,False,False,Gas
2,f9d96e77e0340890004b7e89150034e20d210cc7,2017-02-24 11:50:00 UTC,cool,auto,687,728,681,DC,Washington,80,False,False,False,Gas
3,9a1a0af8e0823c37e792039e1fd0609e7fb56494,2017-02-11 14:10:00 UTC,heat,auto,677,693,693,DC,Washington,5,False,False,False,Gas
4,5189b21d5aeea9c4ce00019c92ccc8df14f3d956,2017-02-17 11:40:00 UTC,heat,auto,703,723,723,DC,Washington,105,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43481,744113fa6489b733b94ee8c68516b7320449eeff,2017-02-02 14:25:00 UTC,heat,hold,752,750,750,DC,Washington,35,False,False,False,Gas
43482,744113fa6489b733b94ee8c68516b7320449eeff,2017-02-03 18:30:00 UTC,heat,hold,748,750,750,DC,Washington,35,False,False,False,Gas
43483,744113fa6489b733b94ee8c68516b7320449eeff,2017-02-02 18:55:00 UTC,heat,hold,749,750,750,DC,Washington,35,False,False,False,Gas
43484,1a243af962525a82097a7d37d6b9c986d533cc59,2017-02-04 19:45:00 UTC,heat,auto,765,760,760,DC,Washington,30,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/DC/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/DC-day/2018-feb-day-DC.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2db691a18f43b6a0940805f9e0a6436a652eaf56,2018-02-19 13:50:00 UTC,heat,hold,710,718,718,DC,Washington,0,False,False,True,Electric
1,2db691a18f43b6a0940805f9e0a6436a652eaf56,2018-02-12 15:35:00 UTC,heat,hold,713,659,659,DC,Washington,0,False,False,True,Electric
2,2db691a18f43b6a0940805f9e0a6436a652eaf56,2018-02-26 17:35:00 UTC,heat,hold,710,650,644,DC,Washington,0,False,False,True,Electric
3,cf8eca3f4f6057fc778c659c80abc0cceaa236b9,2018-02-19 17:20:00 UTC,heat,hold,665,698,698,DC,Washington,37,True,False,True,Electric
4,2db691a18f43b6a0940805f9e0a6436a652eaf56,2018-02-04 17:05:00 UTC,heat,hold,742,744,744,DC,Washington,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124198,1c984fbcf18181b26dd323cb0b5e0195e8a138f6,2018-02-26 18:25:00 UTC,heat,hold,783,760,760,DC,Washington,27,False,False,True,Electric
124199,1c984fbcf18181b26dd323cb0b5e0195e8a138f6,2018-02-26 18:45:00 UTC,heat,hold,771,760,760,DC,Washington,27,False,False,True,Electric
124200,1c984fbcf18181b26dd323cb0b5e0195e8a138f6,2018-02-24 17:30:00 UTC,heat,hold,762,760,760,DC,Washington,27,False,False,True,Electric
124201,1c984fbcf18181b26dd323cb0b5e0195e8a138f6,2018-02-24 17:25:00 UTC,heat,hold,764,760,760,DC,Washington,27,False,False,True,Electric


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/DC/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/DC-day/2019-feb-day-DC.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2c416acdac92290927562967243027a870ed8051,2019-02-28 15:40:00 UTC,heat,auto,710,689,689,DC,Washington,10,False,False,True,Electric
1,939b8cfb049fd0e08b2627f0fb1a166ad697ac6c,2019-02-06 19:50:00 UTC,auto,hold,768,818,748,DC,Washington,20,False,False,False,Gas
2,9b99bc3639b08576fbe7a553d86805aafe5c436c,2019-02-28 11:30:00 UTC,heat,hold,669,657,657,DC,Washington,50,False,False,True,Electric
3,991f5268bd60fb62a055c4005bc9c9adfc6ac853,2019-02-03 16:50:00 UTC,auto,hold,667,715,665,DC,Washington,10,False,False,True,Electric
4,991f5268bd60fb62a055c4005bc9c9adfc6ac853,2019-02-04 14:50:00 UTC,heat,hold,665,715,665,DC,Washington,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118706,c2aabc2a819e03786a52add287f51b9cec167d39,2019-02-02 12:35:00 UTC,auto,auto,753,830,760,DC,Washington,0,False,False,True,Electric
118707,c2aabc2a819e03786a52add287f51b9cec167d39,2019-02-02 12:40:00 UTC,auto,auto,753,830,760,DC,Washington,0,False,False,True,Electric
118708,c2aabc2a819e03786a52add287f51b9cec167d39,2019-02-02 12:00:00 UTC,auto,auto,754,830,760,DC,Washington,0,False,False,True,Electric
118709,c2aabc2a819e03786a52add287f51b9cec167d39,2019-02-02 13:00:00 UTC,auto,auto,751,830,760,DC,Washington,0,False,False,True,Electric


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/DC/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/DC-day/2020-feb-day-DC.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9a1a0af8e0823c37e792039e1fd0609e7fb56494,2020-02-18 19:50:00 UTC,heat,hold,717,726,726,DC,Washington,5,False,False,False,Gas
1,9a1a0af8e0823c37e792039e1fd0609e7fb56494,2020-02-19 14:35:00 UTC,heat,hold,715,716,716,DC,Washington,5,False,False,False,Gas
2,976c95bf0ecfd05b2d7e521a87f86500d6016654,2020-02-19 17:50:00 UTC,heat,auto,796,771,830,DC,Washington,79,False,False,False,Gas
3,8ba0fa51d656edd840fd13fe89587879c5154ce2,2020-02-22 18:20:00 UTC,heat,auto,707,762,712,DC,Washington,95,False,False,False,Gas
4,2a42b25f1bd2881ca47bd98c379a4033b820dee6,2020-02-05 16:40:00 UTC,heat,auto,723,737,702,DC,Washington,70,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148809,f6dfd57fc02263a4403ded7415b7a0a5fff0f3e7,2020-02-01 16:40:00 UTC,heat,auto,753,760,760,DC,Washington,0,True,False,False,Gas
148810,f6dfd57fc02263a4403ded7415b7a0a5fff0f3e7,2020-02-02 19:20:00 UTC,heat,auto,759,760,760,DC,Washington,0,True,False,False,Gas
148811,f6dfd57fc02263a4403ded7415b7a0a5fff0f3e7,2020-02-02 17:55:00 UTC,heat,auto,757,760,760,DC,Washington,0,True,False,False,Gas
148812,f6dfd57fc02263a4403ded7415b7a0a5fff0f3e7,2020-02-01 19:25:00 UTC,heat,auto,756,760,760,DC,Washington,0,True,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/DC/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/DC-day/2021-feb-day-DC.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,976c95bf0ecfd05b2d7e521a87f86500d6016654,2021-02-22 15:55:00 UTC,heat,hold,839,802,842,DC,Washington,79,False,False,False,Gas
1,9a1a0af8e0823c37e792039e1fd0609e7fb56494,2021-02-07 19:50:00 UTC,auto,hold,709,752,702,DC,Washington,5,False,False,False,Gas
2,685d332caaec9952f04746f539416720ceeff813,2021-02-12 19:00:00 UTC,auto,hold,772,805,755,DC,Washington,5,False,False,False,Gas
3,976c95bf0ecfd05b2d7e521a87f86500d6016654,2021-02-28 19:35:00 UTC,heat,hold,820,802,822,DC,Washington,79,False,False,False,Gas
4,aa353292a5c313a3764433d8ae8d8044a6e6b947,2021-02-06 17:00:00 UTC,heat,hold,744,747,747,DC,Washington,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72164,ea02375c56a5287fde771be01aee81d3fd5543bf,2021-02-01 12:45:00 UTC,heat,hold,758,760,760,DC,Washington,0,False,False,False,Gas
72165,ea02375c56a5287fde771be01aee81d3fd5543bf,2021-02-01 14:20:00 UTC,heat,hold,756,760,760,DC,Washington,0,False,False,False,Gas
72166,ea02375c56a5287fde771be01aee81d3fd5543bf,2021-02-01 13:20:00 UTC,heat,hold,761,760,760,DC,Washington,0,False,False,False,Gas
72167,ea02375c56a5287fde771be01aee81d3fd5543bf,2021-02-01 13:55:00 UTC,heat,hold,761,760,760,DC,Washington,0,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/DC/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/DC/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
DC_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/DC/feb/" + file)
    DC_feb = pd.concat([DC_feb, df])
    
DC_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0731de82edef8853f48e57b7e3af32f7db039935,feb,2017,auto,auto,Washington,679.007246,760.000000,680.123188,100.0,False,False,True
1,0731de82edef8853f48e57b7e3af32f7db039935,feb,2017,auto,hold,Washington,677.753719,752.231405,678.953719,100.0,False,False,True
2,0977612bebbbbc2ff5a90040050f2a55607e6004,feb,2017,heat,auto,Washington,662.360000,664.940000,664.580000,5.0,False,False,False
3,0977612bebbbbc2ff5a90040050f2a55607e6004,feb,2017,heat,hold,Washington,699.611940,707.149254,704.537313,5.0,False,False,False
4,0dab5564077db11d375f2c8138456bd970432b9a,feb,2017,auto,hold,Washington,694.752000,760.336000,699.664000,99.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,f3200dcdf76d98f0843987ccd32ff8ee9f97bede,feb,2021,heat,hold,Washington,764.159420,766.086957,766.086957,95.0,False,False,False
113,f6dfd57fc02263a4403ded7415b7a0a5fff0f3e7,feb,2021,heat,hold,Washington,735.674397,738.923933,738.923933,0.0,True,False,False
114,fc63ce0d6f6ebbd50a13655a38b42706779a276c,feb,2021,heat,hold,Washington,720.500000,720.000000,720.000000,85.0,True,False,False
115,fd5f6d27152f3a3139dd17d1997eb3fd1a51e913,feb,2021,auto,hold,Washington DC,678.285714,740.408163,680.000000,7.0,False,False,False


In [67]:
DC_feb.to_csv("Scraper_Output/State_Month_Day/DC/DC_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/DC-day/2017-jun-day-DC.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9a1a0af8e0823c37e792039e1fd0609e7fb56494,2017-06-09 19:35:00 UTC,cool,hold,739,745,745,DC,Washington,5,False,False,False,Gas
1,2db691a18f43b6a0940805f9e0a6436a652eaf56,2017-06-18 12:15:00 UTC,cool,hold,712,707,707,DC,Washington,0,False,False,True,Electric
2,9a1a0af8e0823c37e792039e1fd0609e7fb56494,2017-06-09 16:10:00 UTC,cool,hold,741,745,745,DC,Washington,5,False,False,False,Gas
3,9a1a0af8e0823c37e792039e1fd0609e7fb56494,2017-06-09 14:40:00 UTC,cool,hold,738,745,745,DC,Washington,5,False,False,False,Gas
4,2db691a18f43b6a0940805f9e0a6436a652eaf56,2017-06-24 19:15:00 UTC,cool,hold,753,752,752,DC,Washington,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74895,ed20fac1913ced227fdf66c8b2a0c3aa49316eaa,2017-06-23 19:10:00 UTC,cool,auto,749,770,760,DC,Washington,100,False,False,False,Gas
74896,ed20fac1913ced227fdf66c8b2a0c3aa49316eaa,2017-06-19 19:20:00 UTC,cool,hold,758,760,760,DC,Washington,100,False,False,False,Gas
74897,ed20fac1913ced227fdf66c8b2a0c3aa49316eaa,2017-06-24 15:50:00 UTC,cool,hold,771,760,760,DC,Washington,100,False,False,False,Gas
74898,ed20fac1913ced227fdf66c8b2a0c3aa49316eaa,2017-06-25 17:50:00 UTC,cool,auto,768,760,760,DC,Washington,100,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/DC/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/DC-day/2018-jun-day-DC.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,32ad8af95ae08ea2b18a6c19bc6ffd42c756d7f0,2018-06-07 15:50:00 UTC,cool,hold,784,800,800,DC,Washington,115,False,False,True,Electric
1,32ad8af95ae08ea2b18a6c19bc6ffd42c756d7f0,2018-06-04 15:00:00 UTC,cool,hold,788,800,800,DC,Washington,115,False,False,True,Electric
2,939b8cfb049fd0e08b2627f0fb1a166ad697ac6c,2018-06-04 13:55:00 UTC,cool,hold,730,745,745,DC,Washington,20,False,False,False,Gas
3,939b8cfb049fd0e08b2627f0fb1a166ad697ac6c,2018-06-02 15:35:00 UTC,cool,hold,758,755,755,DC,Washington,20,False,False,False,Gas
4,32ad8af95ae08ea2b18a6c19bc6ffd42c756d7f0,2018-06-04 14:25:00 UTC,cool,hold,783,800,800,DC,Washington,115,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149283,744113fa6489b733b94ee8c68516b7320449eeff,2018-06-07 11:35:00 UTC,cool,hold,754,760,760,DC,Washington,35,False,False,False,Gas
149284,744113fa6489b733b94ee8c68516b7320449eeff,2018-06-07 12:40:00 UTC,cool,hold,757,760,760,DC,Washington,35,False,False,False,Gas
149285,744113fa6489b733b94ee8c68516b7320449eeff,2018-06-11 10:50:00 UTC,cool,hold,754,760,760,DC,Washington,35,False,False,False,Gas
149286,744113fa6489b733b94ee8c68516b7320449eeff,2018-06-07 12:45:00 UTC,cool,hold,757,760,760,DC,Washington,35,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/DC/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/DC-day/2019-jun-day-DC.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6cbcfe6a8ab1b88ca2eb371f82a17a6b23c7c6da,2019-06-21 18:05:00 UTC,cool,hold,747,745,745,DC,Washington,77,False,False,False,Gas
1,71dfd05ed9d25ad64653fc69999e3c2c7a8b7ff1,2019-06-08 16:30:00 UTC,cool,hold,754,749,749,DC,Washington,20,False,False,True,Electric
2,2c416acdac92290927562967243027a870ed8051,2019-06-28 13:55:00 UTC,auto,hold,685,681,624,DC,Washington,10,False,False,True,Electric
3,2c416acdac92290927562967243027a870ed8051,2019-06-25 12:45:00 UTC,auto,hold,689,681,624,DC,Washington,10,False,False,True,Electric
4,71dfd05ed9d25ad64653fc69999e3c2c7a8b7ff1,2019-06-07 19:40:00 UTC,cool,hold,737,739,739,DC,Washington,20,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173859,744113fa6489b733b94ee8c68516b7320449eeff,2019-06-22 14:15:00 UTC,cool,hold,761,760,760,DC,Washington,35,False,False,False,Gas
173860,744113fa6489b733b94ee8c68516b7320449eeff,2019-06-18 10:45:00 UTC,cool,hold,759,760,760,DC,Washington,35,False,False,False,Gas
173861,744113fa6489b733b94ee8c68516b7320449eeff,2019-06-25 12:05:00 UTC,cool,auto,748,760,760,DC,Washington,35,False,False,False,Gas
173862,7787b2d1ea119d7345162731e8532dd04667bd2d,2019-06-28 10:45:00 UTC,cool,hold,698,760,760,DC,Washington,77,False,False,True,Electric


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/DC/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/DC-day/2020-jun-day-DC.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4b40b74a3a7ec2f625ba374897103f81f613c70c,2020-06-01 17:40:00 UTC,auto,auto,729,743,636,DC,Washington,90,True,False,False,Gas
1,7e5bc97e4c6471d2d39a1eaeb1712bdd37e410ad,2020-06-02 10:30:00 UTC,cool,auto,733,800,745,DC,Washington,60,False,False,False,Gas
2,7e5bc97e4c6471d2d39a1eaeb1712bdd37e410ad,2020-06-03 11:10:00 UTC,cool,auto,769,800,745,DC,Washington,60,False,False,False,Gas
3,65d581acdaa4edd8762f1a183c177e90c502f2c5,2020-06-17 18:00:00 UTC,cool,hold,756,759,759,DC,Washington,15,False,False,True,Electric
4,ff2ddd309a304ef6a61b7564029f29e271eb9685,2020-06-19 15:15:00 UTC,cool,hold,792,787,787,DC,Washington,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174066,ed20fac1913ced227fdf66c8b2a0c3aa49316eaa,2020-06-18 13:20:00 UTC,cool,auto,761,760,760,DC,Washington,100,False,False,False,Gas
174067,ed20fac1913ced227fdf66c8b2a0c3aa49316eaa,2020-06-29 11:40:00 UTC,cool,auto,765,760,760,DC,Washington,100,False,False,False,Gas
174068,ed20fac1913ced227fdf66c8b2a0c3aa49316eaa,2020-06-15 11:45:00 UTC,cool,auto,748,760,760,DC,Washington,100,False,False,False,Gas
174069,ed20fac1913ced227fdf66c8b2a0c3aa49316eaa,2020-06-07 15:50:00 UTC,cool,auto,762,760,760,DC,Washington,100,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/DC/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/DC-day/2021-jun-day-DC.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b20f53f249509a313e694325a4433a15f5774d04,2021-06-19 14:40:00 UTC,cool,hold,725,731,609,DC,Washington,90,False,False,False,Gas
1,a89ed5c21dad08c99ec5b0d456d4b20064e1a8be,2021-06-05 19:10:00 UTC,auto,hold,705,705,655,DC,Washington,35,False,False,False,Gas
2,32ad8af95ae08ea2b18a6c19bc6ffd42c756d7f0,2021-06-25 13:40:00 UTC,cool,hold,780,800,800,DC,Washington,115,False,False,True,Electric
3,976c95bf0ecfd05b2d7e521a87f86500d6016654,2021-06-27 13:30:00 UTC,cool,hold,713,712,712,DC,Washington,79,False,False,False,Gas
4,976c95bf0ecfd05b2d7e521a87f86500d6016654,2021-06-17 15:50:00 UTC,cool,hold,731,737,737,DC,Washington,79,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92764,de380df23bb3ae110456aa06f5b261cf50ac0274,2021-06-23 15:25:00 UTC,cool,hold,696,690,690,DC,Washington,0,False,False,True,Electric
92765,de380df23bb3ae110456aa06f5b261cf50ac0274,2021-06-23 12:30:00 UTC,cool,hold,689,690,690,DC,Washington,0,False,False,True,Electric
92766,de380df23bb3ae110456aa06f5b261cf50ac0274,2021-06-25 13:20:00 UTC,cool,hold,689,690,690,DC,Washington,0,False,False,True,Electric
92767,de380df23bb3ae110456aa06f5b261cf50ac0274,2021-06-24 16:45:00 UTC,cool,hold,698,690,690,DC,Washington,0,False,False,True,Electric


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/DC/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/DC/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
DC_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/DC/jun/" + file)
    DC_jun = pd.concat([DC_jun, df])
    
DC_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0731de82edef8853f48e57b7e3af32f7db039935,jun,2017,auto,auto,Washington,742.603001,745.145975,647.407913,100.0,False,False,True
1,0731de82edef8853f48e57b7e3af32f7db039935,jun,2017,auto,hold,Washington,743.118644,749.384181,653.742938,100.0,False,False,True
2,0731de82edef8853f48e57b7e3af32f7db039935,jun,2017,cool,hold,Washington,793.529412,730.000000,680.000000,100.0,False,False,True
3,0977612bebbbbc2ff5a90040050f2a55607e6004,jun,2017,cool,auto,Washington,731.750000,723.765625,699.187500,5.0,False,False,False
4,0977612bebbbbc2ff5a90040050f2a55607e6004,jun,2017,cool,hold,Washington,727.072727,744.745455,738.618182,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
124,fc63ce0d6f6ebbd50a13655a38b42706779a276c,jun,2021,auto,hold,Washington,729.871212,727.651515,700.000000,85.0,True,False,False
125,fc7bd777fbf1d7b8333c39f07797ba2382ef456f,jun,2021,cool,hold,Washington,733.000000,720.000000,720.000000,90.0,False,False,False
126,fd5f6d27152f3a3139dd17d1997eb3fd1a51e913,jun,2021,auto,hold,Washington DC,752.420455,765.761364,653.750000,7.0,False,False,False
127,fe3fc705058e11bc89c6989ef320a003cf05b516,jun,2021,cool,hold,Washington,731.674699,732.903614,732.903614,40.0,True,False,True


In [100]:
DC_jun.to_csv("Scraper_Output/State_Month_Day/DC/DC_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/DC-day/2017-jul-day-DC.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2db691a18f43b6a0940805f9e0a6436a652eaf56,2017-07-29 18:00:00 UTC,cool,hold,744,757,757,DC,Washington,0,False,False,True,Electric
1,160417d105f2e89bd9614aa1380f0ac6f49fa360,2017-07-09 15:25:00 UTC,auto,auto,740,715,665,DC,Washington,10,False,False,False,Gas
2,ff2ddd309a304ef6a61b7564029f29e271eb9685,2017-07-07 18:30:00 UTC,cool,hold,771,769,769,DC,Washington,10,False,False,True,Electric
3,ff2ddd309a304ef6a61b7564029f29e271eb9685,2017-07-14 10:40:00 UTC,cool,hold,762,759,759,DC,Washington,10,False,False,True,Electric
4,2efaf48c36680e6cef59ad36615ea2c48369ce72,2017-07-02 12:00:00 UTC,cool,hold,726,732,727,DC,Washington,100,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86184,1a243af962525a82097a7d37d6b9c986d533cc59,2017-07-14 18:25:00 UTC,cool,hold,716,710,710,DC,Washington,30,False,False,False,Gas
86185,1a243af962525a82097a7d37d6b9c986d533cc59,2017-07-01 11:30:00 UTC,cool,hold,717,710,710,DC,Washington,30,False,False,False,Gas
86186,1a243af962525a82097a7d37d6b9c986d533cc59,2017-07-17 12:50:00 UTC,cool,auto,710,710,710,DC,Washington,30,False,False,False,Gas
86187,1a243af962525a82097a7d37d6b9c986d533cc59,2017-07-24 13:45:00 UTC,cool,auto,706,700,710,DC,Washington,30,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/DC/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/DC-day/2018-jul-day-DC.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,32ad8af95ae08ea2b18a6c19bc6ffd42c756d7f0,2018-07-20 14:20:00 UTC,cool,hold,808,820,800,DC,Washington,115,False,False,True,Electric
1,f3200dcdf76d98f0843987ccd32ff8ee9f97bede,2018-07-13 11:55:00 UTC,cool,auto,736,736,686,DC,Washington,95,False,False,False,Gas
2,f3200dcdf76d98f0843987ccd32ff8ee9f97bede,2018-07-08 15:25:00 UTC,cool,hold,739,746,746,DC,Washington,95,False,False,False,Gas
3,32ad8af95ae08ea2b18a6c19bc6ffd42c756d7f0,2018-07-28 16:35:00 UTC,cool,hold,807,820,800,DC,Washington,115,False,False,True,Electric
4,71dfd05ed9d25ad64653fc69999e3c2c7a8b7ff1,2018-07-14 15:40:00 UTC,cool,hold,792,788,788,DC,Washington,20,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173088,c2aabc2a819e03786a52add287f51b9cec167d39,2018-07-27 17:50:00 UTC,cool,hold,760,760,760,DC,Washington,0,False,False,True,Electric
173089,c2aabc2a819e03786a52add287f51b9cec167d39,2018-07-29 13:10:00 UTC,cool,hold,739,760,760,DC,Washington,0,False,False,True,Electric
173090,c2aabc2a819e03786a52add287f51b9cec167d39,2018-07-05 12:45:00 UTC,cool,hold,745,760,760,DC,Washington,0,False,False,True,Electric
173091,c2aabc2a819e03786a52add287f51b9cec167d39,2018-07-11 18:45:00 UTC,cool,auto,764,760,760,DC,Washington,0,False,False,True,Electric


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/DC/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/DC-day/2019-jul-day-DC.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,71dfd05ed9d25ad64653fc69999e3c2c7a8b7ff1,2019-07-16 17:40:00 UTC,cool,auto,766,760,749,DC,Washington,20,False,False,True,Electric
1,2c416acdac92290927562967243027a870ed8051,2019-07-12 12:05:00 UTC,auto,auto,676,680,626,DC,Washington,10,False,False,True,Electric
2,aa353292a5c313a3764433d8ae8d8044a6e6b947,2019-07-11 11:05:00 UTC,cool,auto,693,702,652,DC,Washington,5,False,False,False,Gas
3,aa353292a5c313a3764433d8ae8d8044a6e6b947,2019-07-09 11:10:00 UTC,cool,auto,690,702,652,DC,Washington,5,False,False,False,Gas
4,aa353292a5c313a3764433d8ae8d8044a6e6b947,2019-07-07 15:30:00 UTC,cool,auto,692,702,652,DC,Washington,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
193546,7d9d2d824408dd79f46a66dc1307c326cbbaa6a3,2019-07-14 17:25:00 UTC,cool,hold,719,675,675,DC,Washington,20,False,False,False,Gas
193547,7d9d2d824408dd79f46a66dc1307c326cbbaa6a3,2019-07-14 18:45:00 UTC,cool,hold,734,675,675,DC,Washington,20,False,False,False,Gas
193548,7d9d2d824408dd79f46a66dc1307c326cbbaa6a3,2019-07-14 19:30:00 UTC,cool,hold,730,675,675,DC,Washington,20,False,False,False,Gas
193549,7d9d2d824408dd79f46a66dc1307c326cbbaa6a3,2019-07-14 17:35:00 UTC,cool,hold,718,675,675,DC,Washington,20,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/DC/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/DC-day/2020-jul-day-DC.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9a1a0af8e0823c37e792039e1fd0609e7fb56494,2020-07-20 18:40:00 UTC,auto,auto,766,763,673,DC,Washington,5,False,False,False,Gas
1,976c95bf0ecfd05b2d7e521a87f86500d6016654,2020-07-30 17:50:00 UTC,cool,hold,742,741,741,DC,Washington,79,False,False,False,Gas
2,9a1a0af8e0823c37e792039e1fd0609e7fb56494,2020-07-24 14:55:00 UTC,auto,auto,757,763,673,DC,Washington,5,False,False,False,Gas
3,9a1a0af8e0823c37e792039e1fd0609e7fb56494,2020-07-21 18:00:00 UTC,auto,auto,767,763,673,DC,Washington,5,False,False,False,Gas
4,9a1a0af8e0823c37e792039e1fd0609e7fb56494,2020-07-22 18:25:00 UTC,auto,auto,757,763,673,DC,Washington,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194663,bc7e8a02025a5ddd4a07d04b5207b402e47d89e9,2020-07-19 18:15:00 UTC,cool,hold,690,690,690,DC,Washington,70,False,False,False,Gas
194664,bc7e8a02025a5ddd4a07d04b5207b402e47d89e9,2020-07-19 19:15:00 UTC,cool,hold,693,690,690,DC,Washington,70,False,False,False,Gas
194665,bc7e8a02025a5ddd4a07d04b5207b402e47d89e9,2020-07-20 18:05:00 UTC,cool,hold,689,690,690,DC,Washington,70,False,False,False,Gas
194666,bc7e8a02025a5ddd4a07d04b5207b402e47d89e9,2020-07-20 13:50:00 UTC,cool,hold,688,690,690,DC,Washington,70,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/DC/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/DC-day/2021-jul-day-DC.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,976c95bf0ecfd05b2d7e521a87f86500d6016654,2021-07-27 18:25:00 UTC,cool,hold,754,752,752,DC,Washington,79,False,False,False,Gas
1,976c95bf0ecfd05b2d7e521a87f86500d6016654,2021-07-25 19:35:00 UTC,cool,hold,714,712,712,DC,Washington,79,False,False,False,Gas
2,976c95bf0ecfd05b2d7e521a87f86500d6016654,2021-07-21 15:15:00 UTC,cool,hold,763,762,762,DC,Washington,79,False,False,False,Gas
3,976c95bf0ecfd05b2d7e521a87f86500d6016654,2021-07-11 19:40:00 UTC,cool,hold,742,742,742,DC,Washington,79,False,False,False,Gas
4,cfb77b9dc1a5867970758fad84a638d56dfacb34,2021-07-15 14:10:00 UTC,cool,hold,705,707,707,DC,Washington,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
91773,7d1119c8c13452f61be6aa202ea4303e82d20cfc,2021-07-11 13:05:00 UTC,cool,hold,719,720,720,DC,Washington,15,False,False,False,Gas
91774,7d1119c8c13452f61be6aa202ea4303e82d20cfc,2021-07-06 12:30:00 UTC,cool,hold,723,720,720,DC,Washington,15,False,False,False,Gas
91775,7d1119c8c13452f61be6aa202ea4303e82d20cfc,2021-07-01 13:15:00 UTC,cool,hold,723,720,720,DC,Washington,15,False,False,False,Gas
91776,939b8cfb049fd0e08b2627f0fb1a166ad697ac6c,2021-07-06 18:10:00 UTC,cool,hold,729,720,720,DC,Washington,20,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/DC/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/DC/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
DC_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/DC/jul/" + file)
    DC_jul = pd.concat([DC_jul, df])
    
DC_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0977612bebbbbc2ff5a90040050f2a55607e6004,jul,2017,cool,auto,Washington,742.368421,749.578947,694.105263,5.0,False,False,False
1,0977612bebbbbc2ff5a90040050f2a55607e6004,jul,2017,cool,hold,Washington,729.575472,724.566038,723.962264,5.0,False,False,False
2,0ef03d5ff6a191d52626482c35b8ca26a346f033,jul,2017,auto,hold,Washington,750.675000,747.575000,687.725000,120.0,False,False,False
3,0ef03d5ff6a191d52626482c35b8ca26a346f033,jul,2017,cool,auto,Washington,730.833333,730.916667,708.833333,120.0,False,False,False
4,0ef03d5ff6a191d52626482c35b8ca26a346f033,jul,2017,cool,hold,Washington,743.405405,789.117117,781.972973,120.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
104,fc63ce0d6f6ebbd50a13655a38b42706779a276c,jul,2021,auto,hold,Washington,733.853403,735.926702,699.748691,85.0,True,False,False
105,fc7bd777fbf1d7b8333c39f07797ba2382ef456f,jul,2021,cool,hold,Washington,748.666667,749.000000,746.500000,90.0,False,False,False
106,fd5f6d27152f3a3139dd17d1997eb3fd1a51e913,jul,2021,auto,hold,Washington DC,745.008108,748.394595,658.189189,7.0,False,False,False
107,fe3fc705058e11bc89c6989ef320a003cf05b516,jul,2021,cool,hold,Washington,724.658730,715.206349,714.928571,40.0,True,False,True


In [133]:
DC_jul.to_csv("Scraper_Output/State_Month_Day/DC/DC_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/DC-day/2017-aug-day-DC.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2efaf48c36680e6cef59ad36615ea2c48369ce72,2017-08-26 17:55:00 UTC,cool,hold,738,757,757,DC,Washington,100,False,False,True,Electric
1,7aff4e2495493970ca1744d2b9bc4de19bdddfc0,2017-08-26 17:10:00 UTC,cool,auto,745,741,684,DC,Washington,95,False,False,False,Gas
2,ff2ddd309a304ef6a61b7564029f29e271eb9685,2017-08-07 12:40:00 UTC,cool,hold,765,764,764,DC,Washington,10,False,False,True,Electric
3,ff2ddd309a304ef6a61b7564029f29e271eb9685,2017-08-07 14:40:00 UTC,cool,hold,765,784,784,DC,Washington,10,False,False,True,Electric
4,e10c2a6bc935212ce3623ee51bd4c6da3a456d57,2017-08-08 10:00:00 UTC,cool,auto,692,708,688,DC,Washington,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88019,dff9dfa40600bd94e5aee4ea5faeb1be8c59af64,2017-08-24 15:30:00 UTC,cool,hold,724,720,720,DC,Washington,0,False,False,False,Gas
88020,e10c2a6bc935212ce3623ee51bd4c6da3a456d57,2017-08-16 15:15:00 UTC,cool,hold,733,730,720,DC,Washington,10,False,False,True,Electric
88021,2efaf48c36680e6cef59ad36615ea2c48369ce72,2017-08-27 12:10:00 UTC,cool,hold,720,720,720,DC,Washington,100,False,False,True,Electric
88022,e10c2a6bc935212ce3623ee51bd4c6da3a456d57,2017-08-19 13:10:00 UTC,cool,auto,707,700,720,DC,Washington,10,False,False,True,Electric


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/DC/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/DC-day/2018-aug-day-DC.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0dab5564077db11d375f2c8138456bd970432b9a,2018-08-05 19:05:00 UTC,auto,hold,720,715,665,DC,Washington,99,False,False,False,Gas
1,5189b21d5aeea9c4ce00019c92ccc8df14f3d956,2018-08-19 18:10:00 UTC,cool,hold,757,761,756,DC,Washington,105,True,False,True,Electric
2,007b9ae939161adcf55e644a3a31bed6f9545465,2018-08-04 19:05:00 UTC,cool,hold,729,737,722,DC,Washington,98,False,False,False,Gas
3,f3200dcdf76d98f0843987ccd32ff8ee9f97bede,2018-08-23 11:55:00 UTC,cool,auto,741,740,686,DC,Washington,95,False,False,False,Gas
4,aba2a96d74b366b3b1e882bc69717211fb1d46e8,2018-08-26 17:30:00 UTC,auto,hold,773,785,755,DC,Washington,20,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162991,eb673e8a430846067e9311ff669f15fc53664b34,2018-08-16 16:40:00 UTC,cool,hold,760,760,760,DC,Washington,95,False,False,False,Gas
162992,eb673e8a430846067e9311ff669f15fc53664b34,2018-08-16 13:50:00 UTC,cool,hold,758,760,760,DC,Washington,95,False,False,False,Gas
162993,eb673e8a430846067e9311ff669f15fc53664b34,2018-08-16 15:50:00 UTC,cool,hold,762,760,760,DC,Washington,95,False,False,False,Gas
162994,eb673e8a430846067e9311ff669f15fc53664b34,2018-08-16 13:15:00 UTC,cool,hold,764,760,760,DC,Washington,95,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/DC/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/DC-day/2019-aug-day-DC.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,71dfd05ed9d25ad64653fc69999e3c2c7a8b7ff1,2019-08-02 15:00:00 UTC,cool,auto,750,750,749,DC,Washington,20,False,False,True,Electric
1,92a16d6abc820d0745122441c2414ad222ae3464,2019-08-22 19:05:00 UTC,cool,auto,719,716,716,DC,Washington,0,True,False,True,Electric
2,aa353292a5c313a3764433d8ae8d8044a6e6b947,2019-08-31 15:35:00 UTC,cool,auto,696,702,652,DC,Washington,5,False,False,False,Gas
3,92a16d6abc820d0745122441c2414ad222ae3464,2019-08-02 12:50:00 UTC,cool,auto,797,734,734,DC,Washington,0,True,False,True,Electric
4,2c416acdac92290927562967243027a870ed8051,2019-08-04 15:25:00 UTC,auto,auto,695,689,626,DC,Washington,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182060,ee4246f4a8557f95b8ffcf6c22b39c65a2ebdd42,2019-08-11 18:25:00 UTC,cool,hold,759,760,760,DC,Washington,90,True,False,False,Gas
182061,ee4246f4a8557f95b8ffcf6c22b39c65a2ebdd42,2019-08-11 12:00:00 UTC,cool,hold,757,760,760,DC,Washington,90,True,False,False,Gas
182062,ee4246f4a8557f95b8ffcf6c22b39c65a2ebdd42,2019-08-11 10:35:00 UTC,cool,hold,763,760,760,DC,Washington,90,True,False,False,Gas
182063,ee4246f4a8557f95b8ffcf6c22b39c65a2ebdd42,2019-08-11 17:35:00 UTC,cool,hold,761,760,760,DC,Washington,90,True,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/DC/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/DC-day/2020-aug-day-DC.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dff9dfa40600bd94e5aee4ea5faeb1be8c59af64,2020-08-25 19:40:00 UTC,cool,hold,743,723,723,DC,Washington,0,False,False,False,Gas
1,825401e34b1fc30e64ef1180849cc4ae5ded349d,2020-08-30 11:10:00 UTC,cool,auto,686,700,676,DC,Washington,0,True,False,True,Electric
2,825401e34b1fc30e64ef1180849cc4ae5ded349d,2020-08-16 12:30:00 UTC,cool,auto,687,700,676,DC,Washington,0,True,False,True,Electric
3,e32927514b1e47ea5e4c1c6204660fbc22c7b572,2020-08-10 17:05:00 UTC,cool,hold,746,745,745,DC,Washington,0,False,False,False,Gas
4,dff9dfa40600bd94e5aee4ea5faeb1be8c59af64,2020-08-21 16:30:00 UTC,cool,hold,741,743,743,DC,Washington,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190434,ed20fac1913ced227fdf66c8b2a0c3aa49316eaa,2020-08-21 11:55:00 UTC,cool,hold,761,760,760,DC,Washington,100,False,False,False,Gas
190435,ed20fac1913ced227fdf66c8b2a0c3aa49316eaa,2020-08-18 18:55:00 UTC,cool,hold,769,760,760,DC,Washington,100,False,False,False,Gas
190436,ed20fac1913ced227fdf66c8b2a0c3aa49316eaa,2020-08-18 19:30:00 UTC,cool,hold,755,760,760,DC,Washington,100,False,False,False,Gas
190437,ed20fac1913ced227fdf66c8b2a0c3aa49316eaa,2020-08-13 11:35:00 UTC,cool,hold,764,760,760,DC,Washington,100,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/DC/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/DC/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
DC_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/DC/aug/" + file)
    DC_aug = pd.concat([DC_aug, df])
    
DC_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0977612bebbbbc2ff5a90040050f2a55607e6004,aug,2017,cool,hold,Washington,730.315789,726.842105,717.315789,5.0,False,False,False
1,0dab5564077db11d375f2c8138456bd970432b9a,aug,2017,auto,auto,Washington,728.435484,730.306452,670.000000,99.0,False,False,False
2,0dab5564077db11d375f2c8138456bd970432b9a,aug,2017,auto,hold,Washington,730.600000,730.000000,670.000000,99.0,False,False,False
3,0ef03d5ff6a191d52626482c35b8ca26a346f033,aug,2017,cool,hold,Washington,745.354232,753.081505,752.877743,120.0,False,False,False
4,12a74288cc6f74cdf4d8fc4641d806a8e4f3a7b2,aug,2017,cool,auto,Washington,769.537653,771.073555,744.128722,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,fd5f6d27152f3a3139dd17d1997eb3fd1a51e913,aug,2020,auto,auto,Washington DC,748.212500,746.875000,663.006250,7.0,False,False,False
246,fd5f6d27152f3a3139dd17d1997eb3fd1a51e913,aug,2020,auto,hold,Washington DC,745.779412,753.220588,657.735294,7.0,False,False,False
247,fe3fc705058e11bc89c6989ef320a003cf05b516,aug,2020,cool,auto,Washington,722.083333,720.375000,695.000000,40.0,True,False,True
248,ff2ddd309a304ef6a61b7564029f29e271eb9685,aug,2020,cool,hold,Washington,805.266667,806.806061,788.890909,10.0,False,False,True


In [160]:
DC_aug.to_csv("Scraper_Output/State_Month_Day/DC/DC_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/DC-day/2017-dec-day-DC.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,939b8cfb049fd0e08b2627f0fb1a166ad697ac6c,2017-12-19 14:45:00 UTC,heat,hold,759,751,751,DC,Washington,20,False,False,False,Gas
1,939b8cfb049fd0e08b2627f0fb1a166ad697ac6c,2017-12-18 16:40:00 UTC,heat,hold,766,751,751,DC,Washington,20,False,False,False,Gas
2,2db691a18f43b6a0940805f9e0a6436a652eaf56,2017-12-24 17:20:00 UTC,heat,hold,737,704,704,DC,Washington,0,False,False,True,Electric
3,2db691a18f43b6a0940805f9e0a6436a652eaf56,2017-12-18 18:25:00 UTC,heat,hold,748,678,678,DC,Washington,0,False,False,True,Electric
4,2db691a18f43b6a0940805f9e0a6436a652eaf56,2017-12-19 19:55:00 UTC,heat,hold,765,650,648,DC,Washington,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124755,9d8430a2d40456e91f7cd084563b8bd23a03c115,2017-12-27 14:40:00 UTC,heat,hold,751,760,760,DC,Washington,105,False,False,False,Gas
124756,bcd834bcc1865fd141bb5652d54649a98bcca5eb,2017-12-03 12:10:00 UTC,auto,auto,750,810,760,DC,Washington,5,False,False,True,Electric
124757,bcd834bcc1865fd141bb5652d54649a98bcca5eb,2017-12-03 12:45:00 UTC,auto,auto,753,810,760,DC,Washington,5,False,False,True,Electric
124758,bcd834bcc1865fd141bb5652d54649a98bcca5eb,2017-12-03 11:45:00 UTC,auto,auto,766,810,760,DC,Washington,5,False,False,True,Electric


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/DC/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/DC-day/2018-dec-day-DC.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,9b99bc3639b08576fbe7a553d86805aafe5c436c,2018-12-10 11:10:00 UTC,heat,hold,641,650,639,DC,Washington,50,False,False,True,Electric
2,2db691a18f43b6a0940805f9e0a6436a652eaf56,2018-12-30 11:30:00 UTC,heat,auto,696,722,722,DC,Washington,0,False,False,True,Electric
3,b50cf208f44d5b0c4182daceaa2aaca1cfe01e33,2018-12-09 18:10:00 UTC,heat,auto,652,809,636,DC,Washington,9,False,False,False,Gas
4,2db691a18f43b6a0940805f9e0a6436a652eaf56,2018-12-18 12:35:00 UTC,heat,hold,733,738,738,DC,Washington,0,False,False,True,Electric
6,2c416acdac92290927562967243027a870ed8051,2018-12-30 16:25:00 UTC,auto,auto,718,824,644,DC,Washington,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170631,26423c0824d6b079eaf5544722bb80137eca53a4,2018-12-04 11:45:00 UTC,heat,auto,697,760,710,DC,Washington,0,False,False,False,Gas
170632,26423c0824d6b079eaf5544722bb80137eca53a4,2018-12-28 13:50:00 UTC,heat,auto,709,680,710,DC,Washington,0,False,False,False,Gas
170633,26423c0824d6b079eaf5544722bb80137eca53a4,2018-12-28 17:05:00 UTC,heat,auto,708,680,710,DC,Washington,0,False,False,False,Gas
170634,26423c0824d6b079eaf5544722bb80137eca53a4,2018-12-28 14:10:00 UTC,heat,auto,706,680,710,DC,Washington,0,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/DC/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/DC-day/2019-dec-day-DC.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dff9dfa40600bd94e5aee4ea5faeb1be8c59af64,2019-12-10 17:45:00 UTC,heat,hold,720,713,713,DC,Washington,0,False,False,False,Gas
1,e10c2a6bc935212ce3623ee51bd4c6da3a456d57,2019-12-21 13:35:00 UTC,heat,hold,726,734,734,DC,Washington,10,False,False,True,Electric
2,60dfd6c936d793d8ad3a6753c94cc3c4c943bcf8,2019-12-21 16:30:00 UTC,heat,auto,661,692,662,DC,Washington,115,True,False,False,Gas
3,9b99bc3639b08576fbe7a553d86805aafe5c436c,2019-12-06 16:10:00 UTC,heat,hold,699,669,669,DC,Washington,50,False,False,True,Electric
4,dff9dfa40600bd94e5aee4ea5faeb1be8c59af64,2019-12-05 17:35:00 UTC,heat,hold,706,713,713,DC,Washington,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176012,ea02375c56a5287fde771be01aee81d3fd5543bf,2019-12-15 14:00:00 UTC,heat,auto,746,760,760,DC,Washington,0,False,False,False,Gas
176013,ea02375c56a5287fde771be01aee81d3fd5543bf,2019-12-15 14:15:00 UTC,heat,auto,758,760,760,DC,Washington,0,False,False,False,Gas
176014,ea02375c56a5287fde771be01aee81d3fd5543bf,2019-12-15 14:05:00 UTC,heat,auto,751,760,760,DC,Washington,0,False,False,False,Gas
176015,ea02375c56a5287fde771be01aee81d3fd5543bf,2019-12-15 14:40:00 UTC,heat,auto,756,760,760,DC,Washington,0,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/DC/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/DC-day/2020-dec-day-DC.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,976c95bf0ecfd05b2d7e521a87f86500d6016654,2020-12-07 17:15:00 UTC,heat,hold,733,732,732,DC,Washington,79,False,False,False,Gas
1,e32927514b1e47ea5e4c1c6204660fbc22c7b572,2020-12-14 15:25:00 UTC,heat,hold,738,745,745,DC,Washington,0,False,False,False,Gas
2,976c95bf0ecfd05b2d7e521a87f86500d6016654,2020-12-03 18:35:00 UTC,heat,hold,802,802,812,DC,Washington,79,False,False,False,Gas
3,46b88f1ad72a8c73d44ba5e10c99a69f81224b40,2020-12-11 13:05:00 UTC,heat,hold,718,717,717,DC,Washington,0,True,False,True,Electric
4,29c6ab4bdab51b4cff102ce7aef8582fc76bb363,2020-12-19 17:50:00 UTC,auto,hold,746,808,758,DC,Washington,29,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135546,bb6acfb59961111ff231ff63250e82353cb49112,2020-12-20 18:40:00 UTC,heat,auto,757,760,760,DC,Washington,55,False,False,False,Gas
135547,bb6acfb59961111ff231ff63250e82353cb49112,2020-12-21 14:30:00 UTC,heat,auto,756,760,760,DC,Washington,55,False,False,False,Gas
135548,bb6acfb59961111ff231ff63250e82353cb49112,2020-12-21 13:55:00 UTC,heat,auto,760,760,760,DC,Washington,55,False,False,False,Gas
135549,bb6acfb59961111ff231ff63250e82353cb49112,2020-12-21 10:35:00 UTC,heat,auto,754,760,760,DC,Washington,55,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/DC/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/DC/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
DC_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/DC/dec/" + file)
    DC_dec = pd.concat([DC_dec, df])
    
DC_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,08049f898acb8591a4cad8c0900db78ad73b307f,dec,2017,heat,auto,Washington,692.950307,691.926298,691.926298,90.0,False,False,False
1,08049f898acb8591a4cad8c0900db78ad73b307f,dec,2017,heat,hold,Washington,697.379404,691.791328,691.791328,90.0,False,False,False
2,0c64b17b991767ef275de42088e69d0986620c82,dec,2017,auto,hold,Washington,697.824561,760.000000,700.000000,0.0,True,False,False
3,0dab5564077db11d375f2c8138456bd970432b9a,dec,2017,auto,auto,Washington,696.205128,749.230769,699.205128,99.0,False,False,False
4,0dab5564077db11d375f2c8138456bd970432b9a,dec,2017,auto,hold,Washington,695.690909,754.236364,699.490909,99.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
234,fd5f6d27152f3a3139dd17d1997eb3fd1a51e913,dec,2020,auto,auto,Washington DC,669.802120,747.904594,672.134276,7.0,False,False,False
235,fd5f6d27152f3a3139dd17d1997eb3fd1a51e913,dec,2020,auto,hold,Washington DC,653.777778,710.555556,660.555556,7.0,False,False,False
236,fe3fc705058e11bc89c6989ef320a003cf05b516,dec,2020,heat,auto,Washington,728.104651,734.034884,734.034884,40.0,True,False,True
237,fe3fc705058e11bc89c6989ef320a003cf05b516,dec,2020,heat,hold,Washington,730.308357,734.512968,734.403458,40.0,True,False,True


In [187]:
DC_dec.to_csv("Scraper_Output/State_Month_Day/DC/DC_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/DC/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
DC_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/DC/" + file)
    DC_all = pd.concat([DC_all, df])
    
DC_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0977612bebbbbc2ff5a90040050f2a55607e6004,aug,2017,cool,hold,Washington,730.315789,726.842105,717.315789,5.0,False,False,False
1,0dab5564077db11d375f2c8138456bd970432b9a,aug,2017,auto,auto,Washington,728.435484,730.306452,670.000000,99.0,False,False,False
2,0dab5564077db11d375f2c8138456bd970432b9a,aug,2017,auto,hold,Washington,730.600000,730.000000,670.000000,99.0,False,False,False
3,0ef03d5ff6a191d52626482c35b8ca26a346f033,aug,2017,cool,hold,Washington,745.354232,753.081505,752.877743,120.0,False,False,False
4,12a74288cc6f74cdf4d8fc4641d806a8e4f3a7b2,aug,2017,cool,auto,Washington,769.537653,771.073555,744.128722,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1013,fc63ce0d6f6ebbd50a13655a38b42706779a276c,jun,2021,auto,hold,Washington,729.871212,727.651515,700.000000,85.0,True,False,False
1014,fc7bd777fbf1d7b8333c39f07797ba2382ef456f,jun,2021,cool,hold,Washington,733.000000,720.000000,720.000000,90.0,False,False,False
1015,fd5f6d27152f3a3139dd17d1997eb3fd1a51e913,jun,2021,auto,hold,Washington DC,752.420455,765.761364,653.750000,7.0,False,False,False
1016,fe3fc705058e11bc89c6989ef320a003cf05b516,jun,2021,cool,hold,Washington,731.674699,732.903614,732.903614,40.0,True,False,True


In [190]:
DC_all.to_csv("Scraper_Output/State_Month_Day/DC_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mDCe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['DC']
Unique jan_2018: ['DC']
Unique jan_2019: ['DC']
Unique jan_2020: ['DC']
Unique jan_2021: ['DC']
Unique feb_2017: ['DC']
Unique feb_2018: ['DC']
Unique feb_2019: ['DC']
Unique feb_2020: ['DC']
Unique feb_2021: ['DC']
Unique jun_2017: ['DC']
Unique jun_2018: ['DC']
Unique jun_2019: ['DC']
Unique jun_2020: ['DC']
Unique jun_2021: ['DC']
Unique jul_2017: ['DC']
Unique jul_2018: ['DC']
Unique jul_2019: ['DC']
Unique jul_2020: ['DC']
Unique jul_2021: ['DC']
Unique aug_2017: ['DC']
Unique aug_2018: ['DC']
Unique aug_2019: ['DC']
Unique aug_2020: ['DC']
Unique dec_2017: ['DC']
Unique dec_2018: ['DC']
Unique dec_2019: ['DC']
Unique dec_2020: ['DC']
