# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/GA-day/2017-jan-day-GA.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e3fc274caeac0d041bead29ab1fd3fb454b8a523,2017-01-26 11:55:00 UTC,heat,hold,726,687,680,GA,Union City,5,False,False,False,Gas
1,6452313525e6760469a7a2c12e80eaf85e2668f9,2017-01-13 12:05:00 UTC,auto,hold,719,685,605,GA,Atlanta,15,False,False,False,Gas
2,619f5d52683639debdaa9365aaad8c60fa11cb99,2017-01-09 15:15:00 UTC,auto,auto,710,743,683,GA,Marietta,40,False,False,False,Gas
3,3009c359e0a7f7aa816219e7c5834883da2c3730,2017-01-21 18:20:00 UTC,cool,auto,662,698,660,GA,Atlanta,10,False,False,True,Electric
5,c02e1c9775ceb9a612a637d2b70b1598f32eec6b,2017-01-01 19:05:00 UTC,heat,hold,708,729,690,GA,Roswell,35,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
625064,7c75ad2f1beb586a32104e6d4847ffb81cb355e3,2017-01-25 17:35:00 UTC,auto,hold,729,765,715,GA,Kennesaw,0,False,False,False,Gas
625065,9d03761562004cd91c052230a4b4cf88c0cfde1f,2017-01-27 11:30:00 UTC,auto,hold,685,765,685,GA,Powder Springs,15,False,False,False,Gas
625066,5f49bae2010b6d49b4ef769567d59cc7d81ab2ee,2017-01-29 14:45:00 UTC,auto,hold,715,765,715,GA,Alpharetta,0,False,False,False,Gas
625067,367fb020bf09db4f53251613927d3d204e3846af,2017-01-21 12:05:00 UTC,auto,hold,713,765,675,GA,Cumming,0,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0070efdb5ff09d96c8cc5aab956ed49ef5b838b1,Jan,2017,auto,auto,Duluth,749.800539,780.000000,750.000000,0.0,False,False,False
0070efdb5ff09d96c8cc5aab956ed49ef5b838b1,Jan,2017,auto,hold,Duluth,767.396825,799.896825,769.896825,0.0,False,False,False
007b945172cb726b360ffa1ab95f88d706fe99ac,Jan,2017,heat,auto,Marietta,671.083333,687.750000,680.666667,20.0,False,False,False
007b945172cb726b360ffa1ab95f88d706fe99ac,Jan,2017,heat,hold,Marietta,739.000000,787.000000,705.000000,20.0,False,False,False
00ab57c6dcdcfd4036692fd4a461a692798a8fde,Jan,2017,auto,hold,Marietta,674.736842,730.000000,680.000000,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
ff5f0a70bfb5b807ae2c907a3077cf0abf8e27da,Jan,2017,auto,hold,Woodstock,672.867368,750.000000,660.000000,25.0,False,False,False
ff5f561cae741505744575815926d82451bbaf1d,Jan,2017,auto,auto,Atlanta,686.881356,770.000000,690.000000,10.0,False,False,False
ff5f561cae741505744575815926d82451bbaf1d,Jan,2017,heat,auto,Atlanta,696.729167,699.272059,699.272059,10.0,False,False,False
ffda88b274cf9dca9794aba9e9859bcecf5107ff,Jan,2017,heat,auto,Atlanta,643.521277,651.308511,650.095745,5.0,True,False,True


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/GA/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/GA-day/2018-jan-day-GA.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c41b76e0462a86a7915f50d2e788c39ae4408822,2018-01-17 13:10:00 UTC,heat,hold,687,705,705,GA,Mableton,7,False,False,False,Gas
1,4781e68218762066f365919713eaaf5240cdf8cb,2018-01-25 13:45:00 UTC,heat,hold,686,685,685,GA,Griffin,90,False,False,True,Electric
3,c816c0ef957e164d7da833860bf59368eba81ecf,2018-01-30 18:30:00 UTC,auto,hold,660,825,665,GA,Brookhaven,50,True,False,False,Gas
4,741b36a583db2fcd324be4721eb4114f737fc723,2018-01-24 18:25:00 UTC,auto,hold,695,795,695,GA,Grovetown,5,False,False,True,Electric
5,c226015427cd36486814133a469d094d60820be3,2018-01-23 19:30:00 UTC,heat,hold,687,676,676,GA,Dunwoody,10,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2259630,05e165a23357cfc35b2e9f5b8f5e8514e5f4acc0,2018-01-31 16:15:00 UTC,auto,hold,718,765,715,GA,Cumming,10,False,False,False,Gas
2259631,73ef505de9becbdddfb20f959adfe251c298f45c,2018-01-27 15:20:00 UTC,auto,auto,729,765,715,GA,Sandy Springs,50,False,False,False,Gas
2259632,3d8ac39030e617827d155b71abb7270eb9da2cfc,2018-01-08 16:45:00 UTC,auto,hold,710,765,715,GA,Grovetown,0,False,False,True,Electric
2259633,6e19fed3a019855b90f38ba350a04570e002e569,2018-01-02 11:30:00 UTC,auto,hold,724,765,725,GA,Smyrna,0,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/GA/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/GA-day/2019-jan-day-GA.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7088bb90c59304d5547f08f4bdc152cf691c480f,2019-01-04 12:20:00 UTC,heat,auto,656,640,640,GA,Peachtree City,30,False,False,False,Gas
1,c5f71dd08df2140b43dfcd73b3f48bc0c2d096e9,2019-01-11 12:20:00 UTC,heat,hold,660,665,665,GA,Sandy Springs,30,False,False,False,Gas
2,4e78eb56b7126d1ae1360f477dc446a6a339bb61,2019-01-08 17:30:00 UTC,heat,hold,679,675,675,GA,Cumming,0,True,False,False,Gas
3,5921e2cd8ccf0178ad4326070da1709e5effdb91,2019-01-31 08:35:00 UTC,auto,hold,681,793,693,GA,Snellville,70,False,False,False,Gas
4,50534e04c61fc044fe49335622cf5bd88578e41e,2019-01-29 17:00:00 UTC,auto,hold,680,757,682,GA,Cumming,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3277178,f196f285122fba460dfcbd6c473296d479b1bd24,2019-01-01 07:05:00 UTC,auto,hold,686,765,675,GA,Powder Springs,10,False,False,False,Gas
3277179,159b692d7b9d45291eade350e1d61666becf3056,2019-01-14 11:25:00 UTC,auto,hold,710,765,715,GA,cumming,0,True,False,False,Gas
3277180,ad94649994fbc595d39e1450b820b504bb6ea8c8,2019-01-31 11:55:00 UTC,heat,hold,765,765,765,GA,Fairburn,0,True,False,False,Gas
3277181,741b36a583db2fcd324be4721eb4114f737fc723,2019-01-06 15:20:00 UTC,auto,hold,705,765,695,GA,Grovetown,5,False,False,True,Electric


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/GA/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/GA-day/2020-jan-day-GA.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,53fd9c9230cadd5d3439e5a8b132853ce88ba90d,2020-01-28 12:30:00 UTC,auto,hold,644,747,647,GA,Atlanta,20,True,False,False,Gas
1,185b963f58cb0315556d6460c4b82b197a88174e,2020-01-09 19:35:00 UTC,heat,auto,724,709,709,GA,McDonough,17,False,False,False,Gas
2,b55cc89c832cc4c4df99c61f186537b1e232dac1,2020-01-12 15:25:00 UTC,auto,hold,707,748,698,GA,Atlanta,0,False,False,False,Gas
3,ad94649994fbc595d39e1450b820b504bb6ea8c8,2020-01-27 17:55:00 UTC,auto,hold,721,825,725,GA,Fairburn,0,True,False,False,Gas
4,489b27ccdac8d5cf0dd6203dd25c6cc503a3b3d2,2020-01-27 16:20:00 UTC,heat,hold,689,685,685,GA,Atlanta,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3450828,d58cf21512cd8a01f57436398baff28e6d22a2f1,2020-01-02 15:05:00 UTC,auto,auto,720,765,685,GA,Atlanta,10,False,False,False,Gas
3450829,8b044472dcf7fa89297a7dd2e35544f3c97e4002,2020-01-04 14:10:00 UTC,cool,hold,662,765,765,GA,Monroe,0,False,False,False,Gas
3450830,70610c65ec2f08e9eaee26107e56c3f7cd024aa9,2020-01-08 13:40:00 UTC,auto,hold,758,765,665,GA,Woodstock,17,False,False,False,Gas
3450831,7d00322ef51f649541b8ceab3147405df1a32a81,2020-01-12 16:35:00 UTC,auto,hold,713,765,715,GA,Atlanta,30,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/GA/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/GA-day/2021-jan-day-GA.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e3f74fdfefe090635ec8b640bb16e73b41383d67,2021-01-17 19:25:00 UTC,auto,hold,705,748,680,GA,Atlanta,0,True,False,False,Gas
1,e0401bebb572c83e43118eea3a0504f81e80cf55,2021-01-30 14:50:00 UTC,heat,hold,638,694,640,GA,Dunwoody,40,True,False,True,Electric
2,36b213740806664da66909844d16e63e7abc876c,2021-01-11 15:05:00 UTC,auto,hold,714,775,725,GA,Atlanta,0,False,False,False,Gas
3,5921e2cd8ccf0178ad4326070da1709e5effdb91,2021-01-01 12:20:00 UTC,auto,hold,687,723,683,GA,Snellville,70,False,False,False,Gas
4,0871396e0fa765d670fdf39c7bef94b7178f453f,2021-01-08 17:20:00 UTC,auto,hold,689,830,670,GA,Carrollton,10,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019835,36b623dfa0826bada482b8ec3ce7e6cee9ac8a9f,2021-01-10 13:35:00 UTC,auto,hold,708,765,715,GA,Marietta,0,False,False,True,Electric
2019836,bfc9884685a595c28430d33aeb2a3ceeba335d2c,2021-01-28 19:30:00 UTC,auto,hold,695,765,695,GA,Lithonia,37,False,False,False,Gas
2019837,3fb4644e10d2fb657bd17c114673722585fc3cc7,2021-01-22 07:10:00 UTC,heat,hold,764,765,765,GA,alpharetta,0,False,False,False,Gas
2019838,decd045d6362ebc420aa99152fc9e687bd0f1c6a,2021-01-30 18:45:00 UTC,auto,hold,678,765,685,GA,Braselton,10,True,False,True,Electric


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/GA/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/GA/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
GA_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/GA/jan/" + file)
    GA_jan = pd.concat([GA_jan, df])
    
GA_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0070efdb5ff09d96c8cc5aab956ed49ef5b838b1,Jan,2017,auto,auto,Duluth,749.800539,780.000000,750.000000,0.0,False,False,False
1,0070efdb5ff09d96c8cc5aab956ed49ef5b838b1,Jan,2017,auto,hold,Duluth,767.396825,799.896825,769.896825,0.0,False,False,False
2,007b945172cb726b360ffa1ab95f88d706fe99ac,Jan,2017,heat,auto,Marietta,671.083333,687.750000,680.666667,20.0,False,False,False
3,007b945172cb726b360ffa1ab95f88d706fe99ac,Jan,2017,heat,hold,Marietta,739.000000,787.000000,705.000000,20.0,False,False,False
4,00ab57c6dcdcfd4036692fd4a461a692798a8fde,Jan,2017,auto,hold,Marietta,674.736842,730.000000,680.000000,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2471,ff5f0a70bfb5b807ae2c907a3077cf0abf8e27da,Jan,2021,auto,hold,Woodstock,701.657179,780.000000,687.419355,25.0,False,False,False
2472,ff6c0f4ac2f05ac6dfb24841435f0280d9b4d7f1,Jan,2021,auto,hold,Sandy Springs,702.607143,752.000000,702.000000,20.0,False,False,False
2473,ffdee847f984151b669c018253c331e880297aa1,Jan,2021,auto,hold,Lyons,675.416667,770.388889,686.833333,40.0,False,False,True
2474,fffe0f5f2d321824ebf3f44ea4dc49031251879a,Jan,2021,cool,hold,Atlanta,726.833333,738.000000,738.000000,10.0,True,False,False


In [34]:
GA_jan.to_csv("Scraper_Output/State_Month_Day/GA/GA_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/GA-day/2017-feb-day-GA.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,31f3bd0092cd7cc4baffedf6fc2dffbb535ad20c,2017-02-20 11:55:00 UTC,heat,hold,707,709,709,GA,Dunwoody,40,False,False,False,Gas
3,197b8ff8d22d4bf4977d318f21975402277972b0,2017-02-26 19:55:00 UTC,cool,hold,734,697,697,GA,Austell,10,False,False,False,Gas
4,55efb643a26ea0da09359799ec79b4608b468601,2017-02-05 12:30:00 UTC,heat,hold,649,620,610,GA,Atlanta,0,False,False,False,Gas
5,c6d4c45794cd1069bdb285b05ba6662d6cbd6980,2017-02-25 12:10:00 UTC,auto,auto,746,758,708,GA,Johns Creek,25,False,False,False,Gas
7,cb82cea11d12052d0ab7540a1acadcce1f3a03e5,2017-02-27 15:05:00 UTC,heat,hold,673,677,677,GA,Forsyth,5,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
579567,91686f211ea979ec2212016fe91d2175b966d462,2017-02-03 18:15:00 UTC,auto,auto,708,760,710,GA,Atlanta,35,False,False,False,Gas
579568,08572c78fb39d29bf173fc792c96f414edcd1945,2017-02-02 18:05:00 UTC,auto,auto,716,760,710,GA,Sugar Hill,30,False,False,False,Gas
579569,de534a57c5f4726bebbe221a823cb6366f461df7,2017-02-07 10:40:00 UTC,auto,hold,708,760,710,GA,Cartersville,20,False,False,False,Gas
579570,d54ca053f2dfab7da49e7c39e1690135b1c9a37e,2017-02-10 13:40:00 UTC,heat,hold,746,760,760,GA,Covington,20,True,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/GA/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/GA-day/2018-feb-day-GA.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0b225419cfbba8957536e9d6460fd5345866cb64,2018-02-04 14:25:00 UTC,heat,auto,705,630,710,GA,Alpharetta,20,False,False,False,Gas
1,2c4ab2bf2b06d3cf0f871c7efe501f04646b827b,2018-02-12 16:55:00 UTC,heat,hold,695,655,655,GA,Atlanta,30,False,False,False,Gas
2,c226015427cd36486814133a469d094d60820be3,2018-02-14 16:40:00 UTC,heat,hold,700,658,658,GA,Dunwoody,10,True,False,True,Electric
3,5363b8dc74865cfaa1395328d2c3d6a5def8378c,2018-02-23 18:15:00 UTC,cool,hold,694,699,699,GA,Keysville,20,True,False,True,Electric
4,c816c0ef957e164d7da833860bf59368eba81ecf,2018-02-13 12:20:00 UTC,auto,hold,664,825,655,GA,Brookhaven,50,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2009844,ecde66de925df08bfb4d42fa516b229c61486682,2018-02-09 19:35:00 UTC,auto,hold,717,765,705,GA,Clarkesville,110,False,False,False,Gas
2009845,ecde66de925df08bfb4d42fa516b229c61486682,2018-02-23 15:00:00 UTC,auto,hold,738,765,705,GA,Clarkesville,110,False,False,False,Gas
2009846,e1fafb1b86ed23f35725452dba2f2fc6b0734535,2018-02-27 14:25:00 UTC,auto,hold,711,765,715,GA,Cumming,5,False,False,False,Gas
2009847,e1fafb1b86ed23f35725452dba2f2fc6b0734535,2018-02-15 19:20:00 UTC,auto,hold,715,765,715,GA,Cumming,5,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/GA/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/GA-day/2019-feb-day-GA.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,735a4424f34939b0ff79d6f2145d6931c97a8a14,2019-02-17 18:35:00 UTC,heat,hold,724,729,729,GA,Kennesaw,15,False,False,False,Gas
1,a31ab2d4cf0c2bc46f1376ae5d4d559961e996b7,2019-02-12 18:10:00 UTC,heat,hold,718,685,685,GA,Lilburn,0,False,False,False,Gas
2,e02e10109bb4a8326d9689d7ac48c9a16586dc38,2019-02-21 11:50:00 UTC,heat,hold,683,689,689,GA,palmetto,10,False,False,False,Gas
3,fd5a57ca6afac3349382738491387f3385003cea,2019-02-08 18:25:00 UTC,heat,hold,716,716,716,GA,Watkinsville,0,False,False,False,Gas
4,b57315f94b024cb53daa526d9a33a91b1d2a5832,2019-02-04 15:15:00 UTC,auto,auto,615,842,608,GA,Marietta,20,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2210008,159b692d7b9d45291eade350e1d61666becf3056,2019-02-01 09:40:00 UTC,auto,hold,710,765,715,GA,cumming,0,True,False,False,Gas
2210009,199f617281a6e0c96d29b26a9746a9f99a5718e4,2019-02-18 19:05:00 UTC,auto,hold,712,765,715,GA,Dunwoody,50,True,False,False,Gas
2210010,159b692d7b9d45291eade350e1d61666becf3056,2019-02-27 17:25:00 UTC,auto,hold,719,765,715,GA,cumming,0,True,False,False,Gas
2210011,159b692d7b9d45291eade350e1d61666becf3056,2019-02-08 10:10:00 UTC,auto,hold,751,765,715,GA,cumming,0,True,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/GA/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/GA-day/2020-feb-day-GA.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9b58ebba3cf9da14528f79cf0845f4fd768519ab,2020-02-08 12:50:00 UTC,heat,hold,649,652,652,GA,Ringgold,0,False,False,True,Electric
1,6c2774b85d606a6465b258e2659923f409220985,2020-02-02 14:20:00 UTC,heat,hold,643,653,653,GA,Senoia,20,False,False,False,Gas
2,baa3cf3c1a06e908e893e177418328311bc4b6a2,2020-02-28 15:35:00 UTC,heat,hold,637,630,630,GA,Marietta,56,False,False,False,Gas
3,8afd40744cef26a6a766104821a6148047d25747,2020-02-06 19:45:00 UTC,auto,auto,684,736,686,GA,Alpharetta,30,False,False,False,Gas
4,cdd89b887b38e5ad7687a647a636ed9ab82253cb,2020-02-02 13:10:00 UTC,heat,hold,602,610,610,GA,Hartwell,117,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3063004,e0da5e7cd2951636f39030f87c02d960e35fcf20,2020-02-21 15:00:00 UTC,auto,auto,715,765,715,GA,Newnan,20,False,False,False,Gas
3063005,6705343948419b4f039262c0647d200d3f76d0c4,2020-02-10 19:05:00 UTC,auto,hold,704,765,705,GA,Cumming,0,False,False,False,Gas
3063006,f196f285122fba460dfcbd6c473296d479b1bd24,2020-02-10 19:25:00 UTC,auto,hold,683,765,685,GA,Powder Springs,10,False,False,False,Gas
3063007,1aa32c760783f788bc8d2c698fb5cbf4f294b0da,2020-02-06 18:15:00 UTC,auto,hold,685,765,685,GA,Woodstock,25,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/GA/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/GA-day/2021-feb-day-GA.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ce437dd94f67f7086dc6c4c7e77ba30381dda63e,2021-02-12 13:50:00 UTC,auto,hold,718,771,721,GA,Smyrna,35,False,False,False,Gas
1,489b27ccdac8d5cf0dd6203dd25c6cc503a3b3d2,2021-02-07 17:10:00 UTC,heat,hold,681,685,685,GA,Atlanta,10,False,False,False,Gas
2,87b1f4feea9db14b21208b04dc75bbfbfb980dfb,2021-02-03 17:05:00 UTC,auto,hold,718,785,685,GA,Atlanta,10,True,False,True,Electric
3,5921e2cd8ccf0178ad4326070da1709e5effdb91,2021-02-17 12:50:00 UTC,auto,hold,685,733,693,GA,Snellville,70,False,False,False,Gas
4,ce437dd94f67f7086dc6c4c7e77ba30381dda63e,2021-02-11 14:05:00 UTC,auto,hold,716,771,721,GA,Smyrna,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1747856,c264475f61444f3c6d14cd7d4a1436d624ffa97e,2021-02-11 16:50:00 UTC,auto,hold,713,765,715,GA,Peachtree Corners,30,False,False,True,Electric
1747857,decd045d6362ebc420aa99152fc9e687bd0f1c6a,2021-02-27 17:10:00 UTC,auto,hold,683,765,685,GA,Braselton,10,True,False,True,Electric
1747858,7332f8f84060ff2eda74fa9602b72876c5586051,2021-02-21 17:40:00 UTC,auto,hold,712,765,685,GA,Suwanee,17,False,False,False,Gas
1747859,8cf1090b892d4f86e8cd06c0584f189023f79463,2021-02-08 14:55:00 UTC,auto,hold,728,765,715,GA,Dahlonega,19,True,False,True,Electric


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/GA/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/GA/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
GA_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/GA/feb/" + file)
    GA_feb = pd.concat([GA_feb, df])
    
GA_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0070efdb5ff09d96c8cc5aab956ed49ef5b838b1,feb,2017,auto,auto,Duluth,746.645833,770.000000,740.000000,0.0,False,False,False
1,007b945172cb726b360ffa1ab95f88d706fe99ac,feb,2017,heat,hold,Marietta,694.777778,723.666667,713.222222,20.0,False,False,False
2,0179392d99e6ae71c82458839d275a267b128999,feb,2017,auto,auto,Augusta,719.711679,779.912409,723.259124,0.0,False,False,False
3,0179392d99e6ae71c82458839d275a267b128999,feb,2017,auto,hold,Augusta,708.320896,771.761194,664.746269,0.0,False,False,False
4,02637cf52ec847f0847fa2d4fcf7e74395b2d72c,feb,2017,auto,auto,Newnan,693.083333,744.916667,694.791667,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2306,ff5f0a70bfb5b807ae2c907a3077cf0abf8e27da,feb,2021,auto,hold,Woodstock,700.212334,780.000000,680.445690,25.0,False,False,False
2307,ff6c0f4ac2f05ac6dfb24841435f0280d9b4d7f1,feb,2021,auto,hold,Sandy Springs,702.972992,751.311634,701.311634,20.0,False,False,False
2308,ffdee847f984151b669c018253c331e880297aa1,feb,2021,auto,hold,Lyons,677.274074,770.000000,682.688889,40.0,False,False,True
2309,fffe0f5f2d321824ebf3f44ea4dc49031251879a,feb,2021,cool,hold,Atlanta,728.941176,729.000000,729.000000,10.0,True,False,False


In [67]:
GA_feb.to_csv("Scraper_Output/State_Month_Day/GA/GA_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/GA-day/2017-jun-day-GA.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c2ae01d684878434da4c4d0877b1a7646b5925a6,2017-06-24 13:35:00 UTC,auto,hold,718,720,670,GA,Hull,15,False,False,True,Electric
2,951ba3f8b40a39278a5c27df4cabf2852af034d2,2017-06-30 17:05:00 UTC,auto,auto,734,730,680,GA,Flowery Branch,10,False,False,False,Gas
3,a2c1feea553c98c8381564dd4531c92d98b591ba,2017-06-23 16:25:00 UTC,cool,auto,759,770,710,GA,Cataula,7,True,False,True,Electric
4,1563a13f8fe60f53c9e0c37b2788c32a8e7253fb,2017-06-13 18:00:00 UTC,cool,hold,781,780,780,GA,Augusta,0,False,False,False,Gas
5,02a368c803cfdae5ef4acb7cac1448b266d0ba95,2017-06-23 10:45:00 UTC,cool,hold,737,740,740,GA,Poulan,15,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1127818,0938e73064f099d2ba02b3b223813a974d2452ab,2017-06-21 18:50:00 UTC,auto,hold,788,800,610,GA,Saint Simons Island,0,False,False,True,Electric
1127819,0938e73064f099d2ba02b3b223813a974d2452ab,2017-06-26 13:45:00 UTC,auto,hold,793,800,690,GA,Saint Simons Island,0,False,False,True,Electric
1127820,55464350cd65ed791d262513c8b24b97800b76b0,2017-06-02 14:30:00 UTC,auto,hold,751,750,670,GA,Saint Simons Island,0,False,False,True,Electric
1127821,55464350cd65ed791d262513c8b24b97800b76b0,2017-06-15 11:20:00 UTC,auto,auto,741,748,680,GA,Saint Simons Island,0,False,False,True,Electric


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/GA/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/GA-day/2018-jun-day-GA.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,509c68bbabdbc325200f71d70aa2689990829038,2018-06-30 13:10:00 UTC,cool,hold,742,745,745,GA,Tucker,0,False,False,False,Gas
2,214a9b0173553b8fc6ebef77c198ff7bfee81ac1,2018-06-03 12:35:00 UTC,cool,hold,701,698,698,GA,Atlanta,10,False,False,False,Gas
3,33417dfa9ffb0684a8e1614dcd5912cedd3d4ae3,2018-06-10 12:15:00 UTC,cool,hold,709,708,708,GA,Atlanta,0,True,False,True,Electric
4,7056bc4740437e3a510f95c10c739d5d0fe011fe,2018-06-24 18:10:00 UTC,cool,hold,738,715,715,GA,Augusta,40,False,False,False,Gas
5,1a1644663228c88071aff89a1a949ea408ab5521,2018-06-30 18:15:00 UTC,auto,hold,763,764,644,GA,Cumming,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2771509,fc04b380ba1777c1c3d66ea3fa283f37f61a00e8,2018-06-27 14:35:00 UTC,cool,hold,752,760,760,GA,Alpharetta,40,False,False,False,Gas
2771510,82503c764936fa15126a8a554c5945355974274b,2018-06-07 16:40:00 UTC,cool,hold,757,760,760,GA,Tucker,0,False,False,False,Gas
2771511,df15da13efcddbdc26013044f401790152e55b66,2018-06-16 17:05:00 UTC,cool,hold,764,760,760,GA,Duluth,20,False,False,False,Gas
2771512,a2c1feea553c98c8381564dd4531c92d98b591ba,2018-06-04 11:45:00 UTC,cool,auto,752,760,760,GA,Cataula,7,True,False,True,Electric


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/GA/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/GA-day/2019-jun-day-GA.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8987146de21e34af888a6a3d349885314bc89cf3,2019-06-21 13:30:00 UTC,cool,auto,708,705,655,GA,Buford,0,True,False,False,Gas
1,10c62cdc07f4a331e1894dc5896a09ca52268881,2019-06-22 17:15:00 UTC,auto,hold,818,815,653,GA,Marietta,0,True,False,True,Electric
2,360fee5dae2f19fdf7178a9edea045f1eca651b0,2019-06-27 12:50:00 UTC,auto,hold,745,743,684,GA,Lawrenceville,35,True,False,False,Gas
4,e02e10109bb4a8326d9689d7ac48c9a16586dc38,2019-06-16 15:00:00 UTC,cool,hold,782,779,779,GA,palmetto,10,False,False,False,Gas
6,e1fafb1b86ed23f35725452dba2f2fc6b0734535,2019-06-30 14:10:00 UTC,auto,hold,714,715,615,GA,Cumming,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3817136,2e446a2f12c00539d2e18d2cbbb1cf765bf868e1,2019-06-15 14:10:00 UTC,cool,auto,736,760,760,GA,Statesboro,45,True,False,True,Electric
3817137,584c4376b126a506c75e349df9ac82a40aee7dfb,2019-06-20 11:15:00 UTC,cool,auto,716,760,760,GA,Suwanee,10,False,False,False,Gas
3817138,14b84fbb56ee21317fc37d2e7399f419b439b946,2019-06-21 09:45:00 UTC,cool,hold,763,760,760,GA,Hinesville,15,False,False,False,Gas
3817139,5e90e1e9cb4ed99911a6f2590cf5385c79a9a154,2019-06-15 18:30:00 UTC,cool,hold,763,760,760,GA,Savannah,65,True,False,True,Electric


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/GA/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/GA-day/2020-jun-day-GA.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,006f827019c7979541901360ed5bbdf1ef265887,2020-06-19 16:30:00 UTC,cool,auto,799,800,800,GA,Grovetown,10,True,False,True,Electric
1,18242fc3b11de8fa4673d46ee0f66d435cb3c556,2020-06-15 17:50:00 UTC,cool,hold,740,800,800,GA,Rockmart,5,True,False,True,Electric
4,10d226b1a165ef4fed852d9aa32c33e892ea308a,2020-06-20 11:15:00 UTC,cool,hold,694,715,715,GA,Cumming,19,True,False,False,Gas
5,870c00900f501dc61a00f22eb1c4abda1f8d87d9,2020-06-27 18:55:00 UTC,cool,auto,778,790,746,GA,columbus,40,False,False,False,Gas
6,e2700d05d28a866a860afed9c6d4b4940c61fa74,2020-06-14 19:45:00 UTC,cool,hold,751,749,749,GA,Atlanta,99,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3474503,40efb586959ae01573ae15d3b6164d16f09cb0db,2020-06-10 09:20:00 UTC,cool,hold,763,760,760,GA,Cumming,0,True,False,True,Electric
3474504,640446cb7525a8c54b9bebe192b5a6647308481b,2020-06-28 16:25:00 UTC,cool,hold,736,760,760,GA,Leesburg,0,True,False,True,Electric
3474505,ef6324a357f8cf35070c29c3825d1ac2bddf6ceb,2020-06-11 14:25:00 UTC,cool,auto,749,760,760,GA,Atlanta,0,False,False,True,Electric
3474506,00f7421db7f18ca88fc2278349eb2b907da9ac5e,2020-06-24 07:10:00 UTC,cool,auto,759,760,760,GA,Dacula,10,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/GA/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/GA-day/2021-jun-day-GA.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,6a702dc1da07fa6fa1078d8d7bad3157a4cabf30,2021-06-12 08:15:00 UTC,auto,hold,700,699,649,GA,Shiloh Woods,5,True,False,False,Gas
2,b46447e9c04ca13489ff3a794999f6b8a645fc49,2021-06-26 14:45:00 UTC,cool,hold,736,735,735,GA,Savannah,35,True,False,True,Electric
3,3daee6c20501374936ac4c8a3023793e252b7f93,2021-06-18 16:00:00 UTC,auto,hold,735,732,682,GA,Gainesville,5,False,False,True,Electric
5,217c6274601706865723a44e05f4079ea54e3ba0,2021-06-19 19:25:00 UTC,cool,hold,723,725,725,GA,Atlanta,0,True,False,True,Electric
6,3d8385db8dc055716eec16f7d1ddd7bf147d5e88,2021-06-20 13:30:00 UTC,cool,hold,721,713,713,GA,Austell,65,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2179078,8daf6781147f70bd778076d12886d878be4e9960,2021-06-10 11:45:00 UTC,cool,hold,696,760,760,GA,Sandy Springs,0,False,False,True,Electric
2179079,5cf851b19db43bb0d7846132e2132e36366a50b4,2021-06-22 13:40:00 UTC,cool,hold,759,760,760,GA,Duluth,10,False,False,False,Gas
2179080,8aef5804caa77a472dd120c25659f29cc79ffe12,2021-06-25 10:50:00 UTC,cool,hold,744,760,760,GA,Mableton,17,False,False,False,Gas
2179081,736157f7e4767929455bd6861b768d9b48c94c89,2021-06-18 12:25:00 UTC,cool,hold,743,760,760,GA,McDonough,15,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/GA/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/GA/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
GA_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/GA/jun/" + file)
    GA_jun = pd.concat([GA_jun, df])
    
GA_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00474e9c5a4122916b24150ad706014b0c17eb90,jun,2017,cool,auto,Newnan,717.648618,717.312212,731.566820,10.0,False,False,False
1,00474e9c5a4122916b24150ad706014b0c17eb90,jun,2017,cool,hold,Newnan,720.780879,720.925178,720.925178,10.0,False,False,False
2,007b945172cb726b360ffa1ab95f88d706fe99ac,jun,2017,cool,hold,Marietta,765.645833,775.000000,775.000000,20.0,False,False,False
3,00ab57c6dcdcfd4036692fd4a461a692798a8fde,jun,2017,auto,hold,Marietta,709.585366,701.341463,651.341463,5.0,False,False,False
4,00dcce15115ae3f87a915d5f4d9076f78723c331,jun,2017,auto,auto,Holly Springs,734.000000,727.000000,697.000000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2255,ff5f0a70bfb5b807ae2c907a3077cf0abf8e27da,jun,2021,auto,hold,Woodstock,742.629023,753.586110,680.000000,25.0,False,False,False
2256,ff6c0f4ac2f05ac6dfb24841435f0280d9b4d7f1,jun,2021,auto,hold,Sandy Springs,721.126225,720.000000,670.000000,20.0,False,False,False
2257,ff7ae718457c9b0af5824fb517a5b42acac8a30a,jun,2021,auto,hold,Villa Rica,764.938547,760.000000,680.000000,0.0,False,False,False
2258,ffdee847f984151b669c018253c331e880297aa1,jun,2021,auto,hold,Lyons,751.886598,748.659794,660.000000,40.0,False,False,True


In [100]:
GA_jun.to_csv("Scraper_Output/State_Month_Day/GA/GA_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/GA-day/2017-jul-day-GA.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b945db1e5908fd78558e19c9096066f8c9382911,2017-07-09 16:05:00 UTC,cool,hold,768,770,770,GA,Rome,20,False,False,False,Gas
1,c9f3f0c6270c71769c6a96ca7f4d538899be345d,2017-07-13 18:55:00 UTC,cool,auto,776,770,750,GA,Woolsey,9,True,False,True,Electric
2,1a0dbc7268713cb31ce672eeab67bb21bed7c4a5,2017-07-07 19:50:00 UTC,cool,hold,844,840,790,GA,Rutledge,15,False,False,True,Electric
3,d358aa20a8f915ee33b7b7055823236c71531e74,2017-07-30 12:20:00 UTC,auto,hold,724,720,650,GA,Leesburg,25,False,False,False,Gas
4,354b98aa832796a0a5123abe3d6a037e62e53a19,2017-07-13 19:50:00 UTC,auto,auto,768,780,690,GA,Maysville,90,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1332524,55464350cd65ed791d262513c8b24b97800b76b0,2017-07-23 14:40:00 UTC,auto,hold,783,780,670,GA,Saint Simons Island,0,False,False,True,Electric
1332525,f7b7f5c48925de12c130bacbd2f99704a8fd85e5,2017-07-10 13:25:00 UTC,auto,hold,752,750,620,GA,Saint Simons Island,0,False,False,True,Electric
1332526,0938e73064f099d2ba02b3b223813a974d2452ab,2017-07-21 18:30:00 UTC,auto,hold,796,795,685,GA,Saint Simons Island,0,False,False,True,Electric
1332527,e0fd67595c41c49edc24e00a3ca232e679cd84b4,2017-07-29 17:45:00 UTC,cool,hold,780,760,720,GA,Saint Simons Island,20,False,False,True,Electric


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/GA/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/GA-day/2018-jul-day-GA.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1a2ccfb8d0b5b659682680ab452821799bcbb496,2018-07-01 18:40:00 UTC,auto,auto,760,755,705,GA,Atlanta,10,False,False,False,Gas
1,45a2fdace5a7f3febac1ae612ff9c81330d34056,2018-07-01 17:05:00 UTC,cool,hold,753,751,751,GA,McDonough,17,False,False,False,Gas
2,c226015427cd36486814133a469d094d60820be3,2018-07-19 17:05:00 UTC,cool,hold,725,738,738,GA,Dunwoody,10,True,False,True,Electric
3,1a1644663228c88071aff89a1a949ea408ab5521,2018-07-07 15:05:00 UTC,auto,hold,746,754,644,GA,Cumming,0,True,False,False,Gas
4,d25d87fff30f79fafc37afcc5b2894fc30100ef3,2018-07-06 11:00:00 UTC,cool,hold,757,775,775,GA,Decatur,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3114579,9c6a7ac50a81b92153dc01eede1c6d0658ac11b0,2018-07-02 15:40:00 UTC,cool,auto,763,760,760,GA,White,10,False,False,False,Gas
3114580,0bdf68ae09c3f8a93eee7e4d7d1cab55cb549da8,2018-07-29 11:35:00 UTC,cool,auto,751,760,760,GA,Canton,5,False,False,True,Electric
3114581,92c33bc308f931973f1afdccd09a01108cdb0d53,2018-07-10 17:45:00 UTC,cool,auto,790,760,760,GA,Lawrenceville,20,False,False,False,Gas
3114582,b2306cae9cd30671ee91d4840406424411b540db,2018-07-31 18:30:00 UTC,cool,hold,760,760,760,GA,Johns Creek,20,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/GA/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/GA-day/2019-jul-day-GA.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,67156386734685e33c0a2fc99e002480cd12cf12,2019-07-18 10:50:00 UTC,cool,hold,752,750,750,GA,Augusta,0,False,False,False,Gas
1,05e165a23357cfc35b2e9f5b8f5e8514e5f4acc0,2019-07-29 11:10:00 UTC,cool,hold,744,745,745,GA,Cumming,10,False,False,False,Gas
2,955466db4fb092f0b8ff58350ab158b3dfc687dc,2019-07-07 17:55:00 UTC,cool,hold,751,745,745,GA,Hampton,7,False,False,False,Gas
3,6069a6d0c07dbccf20bd469fb5f6e9238fc7ce26,2019-07-24 11:55:00 UTC,cool,auto,754,750,750,GA,Columbus,49,False,False,False,Gas
4,add68be7be34440666692dd35fdbfba1abc4c516,2019-07-02 17:45:00 UTC,auto,hold,739,730,650,GA,Marietta,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3971707,cc996c7848145692a412faaf83aa675921405397,2019-07-11 16:00:00 UTC,auto,hold,676,670,610,GA,Warner Robins,10,False,False,True,Electric
3971708,624a42352aef3609bf9a9c5f869a3673877c2484,2019-07-15 10:45:00 UTC,cool,auto,740,740,740,GA,Stockbridge,20,True,False,False,Gas
3971709,a6245e9885390e2a1cb910c4543921df2dec22c7,2019-07-07 14:20:00 UTC,cool,hold,705,707,707,GA,Stone Mountain,35,False,False,False,Gas
3971710,c6d735bf903c036583949ab177079ff1cefbc27e,2019-07-30 13:25:00 UTC,auto,auto,761,760,710,GA,Marietta,0,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/GA/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/GA-day/2020-jul-day-GA.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3db60354932fd7732fac5991a82ea9ff3aa8c226,2020-07-21 16:45:00 UTC,auto,hold,700,725,635,GA,Decatur,0,False,False,False,Gas
2,e3e73b8aacaa9a502137fd72f1ddddb6088213f3,2020-07-05 18:05:00 UTC,cool,hold,744,735,665,GA,Marietta,35,True,False,False,Gas
4,f990188497ba1901e39503d3f8d5e6c9eec7e38a,2020-07-27 16:10:00 UTC,cool,hold,770,770,703,GA,Loganville,0,False,False,False,Gas
5,e3f74fdfefe090635ec8b640bb16e73b41383d67,2020-07-07 16:10:00 UTC,cool,hold,704,734,734,GA,Atlanta,0,True,False,False,Gas
6,264cf0f6aa20b39f69dfda3e7f532fedbd436ca5,2020-07-31 14:50:00 UTC,cool,hold,750,745,745,GA,Senoia,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3658211,15fcdcac75447bb898d6e923095221a8822eed2c,2020-07-23 19:10:00 UTC,cool,hold,773,760,760,GA,Marietta,45,True,False,True,Electric
3658212,fb85fe6446f6c40404e5fd3d5d9312a131d61c32,2020-07-13 17:35:00 UTC,cool,hold,755,760,760,GA,Peachtree Corners,47,False,False,False,Gas
3658213,f8a730bae814196e852554ed2cd2a9667fb2dd2c,2020-07-05 18:50:00 UTC,cool,hold,763,760,760,GA,Smyrna,0,False,False,False,Gas
3658214,4422851eaab3da089c81e84089f8f837e8efe7f9,2020-07-01 14:05:00 UTC,cool,auto,737,760,760,GA,Atlanta,0,False,False,True,Electric


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/GA/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/GA-day/2021-jul-day-GA.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f28d4f049d537297371a6a8687be323b068f1905,2021-07-17 19:35:00 UTC,auto,hold,725,717,667,GA,Cumming,0,False,False,False,Gas
1,d9050965a90de098af2278b96b35bfe812aec331,2021-07-08 13:05:00 UTC,auto,hold,691,689,629,GA,Rockmart,5,True,False,True,Electric
2,decd045d6362ebc420aa99152fc9e687bd0f1c6a,2021-07-16 18:20:00 UTC,auto,hold,713,715,605,GA,Braselton,10,True,False,True,Electric
3,e874ba039bb21430ac5e8787d382fbc92e5edc71,2021-07-27 10:10:00 UTC,auto,hold,708,712,662,GA,Roswell,30,False,False,False,Gas
4,c8ecef540b0fb480c5548d3bb034b0aa4aef685f,2021-07-05 12:55:00 UTC,auto,hold,722,712,662,GA,Tybee Island,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2045355,a886239ea0ab2a8731284260ffb753773a8944f1,2021-07-20 13:20:00 UTC,cool,hold,738,760,760,GA,Covington,0,True,False,True,Electric
2045356,545eb94f188a3a31a526a8402dcae17a45aea106,2021-07-04 14:35:00 UTC,cool,hold,753,760,760,GA,Cedartown,15,False,False,True,Electric
2045357,d2fbedbf7d3496628f606cb58d5a63787412ac7f,2021-07-24 11:25:00 UTC,cool,hold,758,760,760,GA,Atlanta,0,False,False,False,Gas
2045358,e6b63f7261a97db7e3088779565221001b27c684,2021-07-06 11:40:00 UTC,cool,hold,798,800,760,GA,Blackshear,19,True,False,True,Electric


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/GA/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/GA/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
GA_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/GA/jul/" + file)
    GA_jul = pd.concat([GA_jul, df])
    
GA_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00474e9c5a4122916b24150ad706014b0c17eb90,jul,2017,cool,auto,Newnan,717.281707,714.209756,726.791463,10.0,False,False,False
1,00474e9c5a4122916b24150ad706014b0c17eb90,jul,2017,cool,hold,Newnan,720.139646,717.453943,717.450258,10.0,False,False,False
2,0070efdb5ff09d96c8cc5aab956ed49ef5b838b1,jul,2017,auto,auto,Duluth,742.645833,712.729167,682.729167,0.0,False,False,False
3,007b945172cb726b360ffa1ab95f88d706fe99ac,jul,2017,cool,auto,Marietta,803.983425,824.883978,656.850829,20.0,False,False,False
4,007b945172cb726b360ffa1ab95f88d706fe99ac,jul,2017,cool,hold,Marietta,789.791667,789.930556,789.097222,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2137,ff5b52b08f80c8e01b773889c2a0fd7968f61fc0,jul,2021,auto,hold,Rome,715.812500,698.625000,643.437500,10.0,False,False,True
2138,ff5f0a70bfb5b807ae2c907a3077cf0abf8e27da,jul,2021,auto,hold,Woodstock,761.983597,766.014581,671.385176,25.0,False,False,False
2139,ff7ae718457c9b0af5824fb517a5b42acac8a30a,jul,2021,auto,hold,Villa Rica,748.291692,744.409642,680.000000,0.0,False,False,False
2140,ffdee847f984151b669c018253c331e880297aa1,jul,2021,auto,hold,Lyons,744.725490,739.921569,659.921569,40.0,False,False,True


In [133]:
GA_jul.to_csv("Scraper_Output/State_Month_Day/GA/GA_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/GA-day/2017-aug-day-GA.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,61df4e9496fea21bb45b32c77381b3de7370b8c1,2017-08-21 19:05:00 UTC,auto,auto,744,740,660,GA,Waleska,0,False,False,True,Electric
1,6dc9416be77469058d48504af2f6099744d0f927,2017-08-06 19:15:00 UTC,auto,hold,715,710,650,GA,Maysville,90,False,False,False,Gas
2,465fdbd93ad62e84d0060a82d27742a8b5441a3e,2017-08-08 10:40:00 UTC,cool,hold,752,750,750,GA,Jasper,9,True,False,True,Electric
3,6b152bee489d7c4f9361d162ce47573242f33106,2017-08-30 19:00:00 UTC,cool,hold,780,780,780,GA,Greensboro,20,True,False,True,Electric
4,0ac8db4bd78c277b10d32077965afb7322f274b8,2017-08-11 15:30:00 UTC,cool,hold,734,740,740,GA,Waleska,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1434966,f7b7f5c48925de12c130bacbd2f99704a8fd85e5,2017-08-18 12:35:00 UTC,auto,hold,755,760,620,GA,Saint Simons Island,0,False,False,True,Electric
1434967,0938e73064f099d2ba02b3b223813a974d2452ab,2017-08-21 16:20:00 UTC,auto,auto,798,800,690,GA,Saint Simons Island,0,False,False,True,Electric
1434968,55464350cd65ed791d262513c8b24b97800b76b0,2017-08-22 19:15:00 UTC,auto,hold,757,760,670,GA,Saint Simons Island,0,False,False,True,Electric
1434969,f7b7f5c48925de12c130bacbd2f99704a8fd85e5,2017-08-14 12:05:00 UTC,auto,hold,763,760,620,GA,Saint Simons Island,0,False,False,True,Electric


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/GA/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/GA-day/2018-aug-day-GA.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9dbe30720814518d27c6b92ac3cb58f18dc8261c,2018-08-02 14:55:00 UTC,cool,hold,785,784,784,GA,Brookhaven,0,False,False,True,Electric
1,20549055fd4e52d7a007c090f162c66310fd2888,2018-08-07 16:55:00 UTC,auto,hold,711,705,655,GA,Suwanee,10,False,False,False,Gas
2,67cb75a15948e4f8098d0a953e64604ea0c95c73,2018-08-16 12:15:00 UTC,cool,hold,747,745,745,GA,Johns Creek,20,False,False,False,Gas
5,ba402161e30dc0ae96d9717efbcd90bf4b17a356,2018-08-21 08:10:00 UTC,cool,auto,759,750,719,GA,Newnan,20,True,False,True,Electric
6,eefe1ff9239e19ecad94f0607f14e5e9d9226a91,2018-08-23 12:55:00 UTC,auto,hold,701,700,610,GA,Atlanta,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3135789,d64a6665ec41d701fb3103713368ace35e248f32,2018-08-24 13:55:00 UTC,cool,hold,759,760,760,GA,Atlanta,15,False,False,False,Gas
3135790,7e2b8fae295c797aba4f2abe015ec4b31f32c2a1,2018-08-05 18:45:00 UTC,cool,hold,766,760,760,GA,Gainesville,10,True,False,True,Electric
3135791,74e7f4b955fa08fc35c382d073c2e983cc24e652,2018-08-10 18:00:00 UTC,cool,hold,764,760,760,GA,Fayetteville,0,False,False,False,Gas
3135792,de96070aeefbee5c13fa3a36b169037ed2534159,2018-08-05 17:55:00 UTC,cool,auto,755,750,760,GA,Dahlonega,27,True,False,True,Electric


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/GA/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/GA-day/2019-aug-day-GA.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,24758ca4338caa5b9e9391bfbb31ce6c7b769158,2019-08-25 12:55:00 UTC,cool,auto,768,770,770,GA,Newnan,5,True,False,True,Electric
2,f34b58281937fcf0d2669b51fdb7dc4352fad1f4,2019-08-13 14:10:00 UTC,cool,auto,749,820,640,GA,Evans,28,False,False,False,Gas
3,c6f5ea9871eae63f00a3a9f3ee288da457dfade8,2019-08-26 16:25:00 UTC,cool,hold,708,705,705,GA,Suwanee,25,False,False,False,Gas
4,8903c9b00f3ab5849cba86e93856c31060032f8f,2019-08-28 11:45:00 UTC,cool,auto,719,720,720,GA,Evans,10,False,False,False,Gas
5,e538c75b7250fa8d7dfadcb8f2d9784923553ba2,2019-08-31 15:00:00 UTC,auto,auto,755,770,640,GA,Hoschton,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3914122,d30e8ea358ffb94645a09eb876f678057d278955,2019-08-09 16:00:00 UTC,cool,auto,722,720,720,GA,Marietta,0,False,False,False,Gas
3914123,1ff29445b1ea316076da79700952722c18734c44,2019-08-12 09:05:00 UTC,cool,hold,749,750,750,GA,Dunwoody,45,False,False,False,Gas
3914124,676efc2e93c3acc69f1e50532f8c51da80e558b5,2019-08-17 16:00:00 UTC,auto,auto,707,700,620,GA,Taylorsville,0,False,False,True,Electric
3914125,9a8a6fc22b82f74fe226e18ffc753425bfbc548d,2019-08-18 14:10:00 UTC,cool,hold,783,800,790,GA,Alpharetta,20,True,False,True,Electric


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/GA/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/GA-day/2020-aug-day-GA.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d8859c53435f90238f97b6d465a9cc60f82e6d2a,2020-08-11 19:20:00 UTC,cool,auto,722,720,695,GA,Duluth,0,False,False,False,Gas
1,cff2f73a2d134e52e6b18cba7883999c7c313634,2020-08-09 12:35:00 UTC,cool,hold,735,735,735,GA,Alpharetta,30,True,False,False,Gas
2,5b067b0ae2f621e2734703b5d88637c02c38b475,2020-08-08 17:15:00 UTC,auto,hold,707,705,655,GA,Savannah,5,False,False,True,Electric
3,dc5bf0343db1409e5e761f44967a231bc6d7a768,2020-08-04 13:35:00 UTC,auto,auto,744,760,665,GA,LaGrange,0,True,False,True,Electric
4,e4266d6799bdd854cf95d3045ca8a637b679a520,2020-08-04 15:30:00 UTC,auto,hold,695,682,632,GA,Atlanta,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3618926,e469d2731c542981d79b7b73e4623465316bc6d7,2020-08-22 11:40:00 UTC,cool,hold,749,760,760,GA,Atlanta,0,False,False,True,Electric
3618927,55fd15d57cd3d850f050525f7a987328d8bec7c9,2020-08-26 16:50:00 UTC,cool,hold,759,760,760,GA,Holly Springs,30,False,False,False,Gas
3618928,855f63bc95e066071341ed48a0b2ac2160161ace,2020-08-31 19:15:00 UTC,cool,auto,762,760,760,GA,Columbus,10,False,False,False,Gas
3618929,e6fe03e4b4dc888166d0125bfe03454a0bd984f2,2020-08-25 18:50:00 UTC,cool,hold,762,760,760,GA,Jesup,25,True,False,True,Electric


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/GA/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/GA/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
GA_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/GA/aug/" + file)
    GA_aug = pd.concat([GA_aug, df])
    
GA_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00474e9c5a4122916b24150ad706014b0c17eb90,aug,2017,cool,auto,Newnan,748.895105,755.160839,688.379953,10.0,False,False,False
1,00474e9c5a4122916b24150ad706014b0c17eb90,aug,2017,cool,hold,Newnan,729.837896,723.516371,723.521202,10.0,False,False,False
2,0070efdb5ff09d96c8cc5aab956ed49ef5b838b1,aug,2017,auto,auto,Duluth,755.000000,746.000000,716.000000,0.0,False,False,False
3,007b945172cb726b360ffa1ab95f88d706fe99ac,aug,2017,cool,hold,Marietta,782.460674,786.101124,782.842697,20.0,False,False,False
4,00ab57c6dcdcfd4036692fd4a461a692798a8fde,aug,2017,auto,hold,Marietta,711.742268,702.835052,652.835052,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3846,ffbd8ef5abdea98e61589e1b9431e38cc6ca6f67,aug,2020,auto,hold,Bremen,687.222308,685.000000,635.000000,0.0,True,False,True
3847,ffcb0a4adc362407fafb10122c71be8ccb03178a,aug,2020,auto,auto,Lawrenceville,778.750000,760.000000,680.000000,20.0,False,False,False
3848,ffcb0a4adc362407fafb10122c71be8ccb03178a,aug,2020,auto,hold,Lawrenceville,764.083333,760.083333,680.000000,20.0,False,False,False
3849,fffe0f5f2d321824ebf3f44ea4dc49031251879a,aug,2020,cool,auto,Atlanta,753.521739,798.782609,764.695652,10.0,True,False,False


In [160]:
GA_aug.to_csv("Scraper_Output/State_Month_Day/GA/GA_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/GA-day/2017-dec-day-GA.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,e283694447fc3fcc00b84e232ba9a31be30338d6,2017-12-10 11:40:00 UTC,auto,hold,724,805,725,GA,Lilburn,0,True,False,False,Gas
3,548c6b17457e7771ba2c99a820ae506e530d5b19,2017-12-03 19:55:00 UTC,heat,hold,724,695,695,GA,Mableton,0,False,False,False,Gas
4,62cdca49b50198ee59b16522db70c95799d36a89,2017-12-19 17:35:00 UTC,heat,hold,751,747,747,GA,Conyers,0,False,False,False,Gas
5,d645503f669829d01430ca435b69334133c42fe4,2017-12-28 15:15:00 UTC,heat,auto,692,703,703,GA,Duluth,20,True,False,False,Gas
6,6a67ebaad83a6abcc4bb087995d1ce54f9ac320f,2017-12-16 15:25:00 UTC,auto,hold,736,785,735,GA,Athens,10,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1971024,f5616adf62adb10e6d72d09897aafb5ebd9c9f52,2017-12-29 13:45:00 UTC,auto,hold,709,765,715,GA,Decatur,70,False,False,False,Gas
1971025,0c68bf2b2bb5cd3923f24fdfa7cbf91991870073,2017-12-19 17:45:00 UTC,auto,auto,717,765,715,GA,Newnan,7,False,False,True,Electric
1971026,390f0a7b227a3e6e68890f1cd35ac68ce578cdee,2017-12-03 13:15:00 UTC,auto,auto,714,765,715,GA,Newnan,10,False,False,True,Electric
1971027,37a4b4b526466c15e47d8f256ecad5798184b8e9,2017-12-02 16:40:00 UTC,auto,hold,718,765,715,GA,Manchester,30,True,False,True,Electric


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/GA/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/GA-day/2018-dec-day-GA.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f9ddfa19ae543fb45e868a5852aeb90ad93d6f9c,2018-12-21 18:45:00 UTC,heat,hold,701,692,692,GA,Valdosta,45,True,False,True,Electric
1,eb3b807663ef2316b4e98dc443daf4758869523c,2018-12-26 12:30:00 UTC,auto,auto,747,805,755,GA,Grovetown,0,False,False,True,Electric
2,8dd724ac10924b6e5611a4d8d1b7eb7f08f89dbb,2018-12-27 12:35:00 UTC,heat,hold,665,718,668,GA,Peachtree City,20,True,False,False,Gas
3,8b044472dcf7fa89297a7dd2e35544f3c97e4002,2018-12-27 18:15:00 UTC,cool,hold,625,785,785,GA,Monroe,0,False,False,False,Gas
4,9e471bf0881842e37326efb0ce0e2e53007f0a15,2018-12-23 14:55:00 UTC,heat,hold,691,695,695,GA,Decatur,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3265183,c9a16de7c564a6ea0ba88e97f02dd487c73c6331,2018-12-13 13:25:00 UTC,auto,hold,714,765,715,GA,Cartersville,0,False,False,False,Gas
3265184,decd045d6362ebc420aa99152fc9e687bd0f1c6a,2018-12-23 17:40:00 UTC,auto,hold,693,765,695,GA,Braselton,10,True,False,True,Electric
3265185,f196f285122fba460dfcbd6c473296d479b1bd24,2018-12-20 10:05:00 UTC,auto,hold,713,765,715,GA,Powder Springs,10,False,False,False,Gas
3265186,f196f285122fba460dfcbd6c473296d479b1bd24,2018-12-27 18:15:00 UTC,auto,hold,693,765,695,GA,Powder Springs,10,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/GA/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/GA-day/2019-dec-day-GA.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1eed1cfc16e396599fdf0bacb354cc1fd6d42862,2019-12-29 18:25:00 UTC,auto,hold,703,737,686,GA,Dallas,5,True,False,True,Electric
1,34d9ada5e2f25c61352c2b87f055c79006eebf8d,2019-12-25 15:05:00 UTC,heat,hold,698,706,706,GA,Griffin,49,False,False,False,Gas
2,88ea7087e0aa0961555b6ced060ec5415fafb704,2019-12-07 17:55:00 UTC,heat,hold,698,695,695,GA,Marietta,15,False,False,False,Gas
3,7b5c40c4608af710455255376b949ad9ab7c70a8,2019-12-02 13:25:00 UTC,auto,hold,700,753,651,GA,Flowery Branch,20,False,False,False,Gas
4,03672da094d061540f81928beb1a8fa71754a969,2019-12-09 19:00:00 UTC,auto,hold,685,675,625,GA,Stone Mountain,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3505773,429b3723032f7d0ce23d54d383d8fa03c834a13f,2019-12-04 14:10:00 UTC,auto,hold,715,765,695,GA,Alpharetta,5,False,False,False,Gas
3505774,7490570f773d338a1b543aade4efbb883ff230f1,2019-12-28 14:50:00 UTC,heat,hold,764,765,715,GA,Powder Springs,10,True,False,False,Gas
3505775,6bae2e8dfac105e129db09b21dedef7a57abd9a0,2019-12-09 18:40:00 UTC,auto,hold,670,765,675,GA,Marietta,0,False,False,False,Gas
3505776,6bae2e8dfac105e129db09b21dedef7a57abd9a0,2019-12-04 17:55:00 UTC,auto,hold,644,765,635,GA,Marietta,0,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/GA/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/GA-day/2020-dec-day-GA.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e538c75b7250fa8d7dfadcb8f2d9784923553ba2,2020-12-13 15:40:00 UTC,heat,auto,661,610,610,GA,Hoschton,15,False,False,False,Gas
2,4220b49b00ebe9a844feb592410666436aded0a9,2020-12-08 13:45:00 UTC,heat,auto,632,640,640,GA,Fortson,25,False,False,False,Gas
3,e58cc19703e010fd496d8a5517a1e0d0c11992b2,2020-12-08 12:35:00 UTC,heat,hold,693,697,697,GA,Dacula,10,True,False,True,Electric
4,dd9eb67300d3b04b66896aa2c8f1d0daa1aef3a3,2020-12-12 09:30:00 UTC,auto,hold,696,764,704,GA,Elberton,30,False,False,False,Gas
5,5a05c5fb52e0a8c9b5fee705d5d96eb70c21423c,2020-12-18 19:10:00 UTC,heat,auto,606,610,610,GA,Atlanta,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2948977,676efc2e93c3acc69f1e50532f8c51da80e558b5,2020-12-19 11:15:00 UTC,auto,auto,657,760,660,GA,Taylorsville,0,False,False,True,Electric
2948978,867e2776db50b08278d5c4ea7f89a5fe265bbecb,2020-12-28 15:15:00 UTC,auto,hold,707,760,710,GA,Grovetown,5,False,True,True,Electric
2948979,6d60f452dfb281ad5e5220749dd3593ea6724775,2020-12-13 13:40:00 UTC,auto,auto,703,760,700,GA,Woodstock,35,True,False,False,Gas
2948980,6c2774b85d606a6465b258e2659923f409220985,2020-12-18 14:35:00 UTC,auto,hold,672,760,680,GA,Senoia,20,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/GA/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/GA/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
GA_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/GA/dec/" + file)
    GA_dec = pd.concat([GA_dec, df])
    
GA_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,002928222f5a2b5cfeba53f3aca5d5f24f6c4fe8,dec,2017,heat,auto,Columbus,716.051579,715.066316,717.084211,40.0,False,False,False
1,002928222f5a2b5cfeba53f3aca5d5f24f6c4fe8,dec,2017,heat,hold,Columbus,716.964427,717.857708,717.857708,40.0,False,False,False
2,00474e9c5a4122916b24150ad706014b0c17eb90,dec,2017,heat,auto,Newnan,708.330508,711.027542,710.911017,10.0,False,False,False
3,00474e9c5a4122916b24150ad706014b0c17eb90,dec,2017,heat,hold,Newnan,707.172712,706.753886,706.559585,10.0,False,False,False
4,0070efdb5ff09d96c8cc5aab956ed49ef5b838b1,dec,2017,auto,hold,Duluth,740.048780,740.000000,703.658537,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
4391,ffcb0a4adc362407fafb10122c71be8ccb03178a,dec,2020,heat,hold,Lawrenceville,732.666667,745.479167,744.312500,20.0,False,False,False
4392,ffdee847f984151b669c018253c331e880297aa1,dec,2020,auto,hold,Lyons,665.714286,770.000000,677.000000,40.0,False,False,True
4393,fffe0f5f2d321824ebf3f44ea4dc49031251879a,dec,2020,cool,auto,Atlanta,724.684211,720.000000,720.000000,10.0,True,False,False
4394,fffe0f5f2d321824ebf3f44ea4dc49031251879a,dec,2020,heat,auto,Atlanta,683.320611,668.816794,662.206107,10.0,True,False,False


In [187]:
GA_dec.to_csv("Scraper_Output/State_Month_Day/GA/GA_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/GA/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
GA_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/GA/" + file)
    GA_all = pd.concat([GA_all, df])
    
GA_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00474e9c5a4122916b24150ad706014b0c17eb90,aug,2017,cool,auto,Newnan,748.895105,755.160839,688.379953,10.0,False,False,False
1,00474e9c5a4122916b24150ad706014b0c17eb90,aug,2017,cool,hold,Newnan,729.837896,723.516371,723.521202,10.0,False,False,False
2,0070efdb5ff09d96c8cc5aab956ed49ef5b838b1,aug,2017,auto,auto,Duluth,755.000000,746.000000,716.000000,0.0,False,False,False
3,007b945172cb726b360ffa1ab95f88d706fe99ac,aug,2017,cool,hold,Marietta,782.460674,786.101124,782.842697,20.0,False,False,False
4,00ab57c6dcdcfd4036692fd4a461a692798a8fde,aug,2017,auto,hold,Marietta,711.742268,702.835052,652.835052,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15825,ff5f0a70bfb5b807ae2c907a3077cf0abf8e27da,jun,2021,auto,hold,Woodstock,742.629023,753.586110,680.000000,25.0,False,False,False
15826,ff6c0f4ac2f05ac6dfb24841435f0280d9b4d7f1,jun,2021,auto,hold,Sandy Springs,721.126225,720.000000,670.000000,20.0,False,False,False
15827,ff7ae718457c9b0af5824fb517a5b42acac8a30a,jun,2021,auto,hold,Villa Rica,764.938547,760.000000,680.000000,0.0,False,False,False
15828,ffdee847f984151b669c018253c331e880297aa1,jun,2021,auto,hold,Lyons,751.886598,748.659794,660.000000,40.0,False,False,True


In [190]:
GA_all.to_csv("Scraper_Output/State_Month_Day/GA_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mGAe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['GA']
Unique jan_2018: ['GA']
Unique jan_2019: ['GA']
Unique jan_2020: ['GA']
Unique jan_2021: ['GA']
Unique feb_2017: ['GA']
Unique feb_2018: ['GA']
Unique feb_2019: ['GA']
Unique feb_2020: ['GA']
Unique feb_2021: ['GA']
Unique jun_2017: ['GA']
Unique jun_2018: ['GA']
Unique jun_2019: ['GA']
Unique jun_2020: ['GA']
Unique jun_2021: ['GA']
Unique jul_2017: ['GA']
Unique jul_2018: ['GA']
Unique jul_2019: ['GA']
Unique jul_2020: ['GA']
Unique jul_2021: ['GA']
Unique aug_2017: ['GA']
Unique aug_2018: ['GA']
Unique aug_2019: ['GA']
Unique aug_2020: ['GA']
Unique dec_2017: ['GA']
Unique dec_2018: ['GA']
Unique dec_2019: ['GA']
Unique dec_2020: ['GA']
