# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/TN-day/2017-jan-day-TN.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f2b57877fbd9c90c626d47b5b65f1733563fa284,2017-01-07 15:30:00 UTC,heat,auto,716,720,720,TN,Knoxville,5,False,False,False,Gas
1,502860376ff414373c0d8f340b97cc7c1708b889,2017-01-30 18:40:00 UTC,auto,auto,679,720,680,TN,Eagleville,5,False,False,True,Electric
2,13019ff4001aff2078ea9d7cfffd98047dfb17d3,2017-01-31 13:30:00 UTC,auto,auto,678,725,675,TN,Maryville,25,False,False,False,Gas
3,da018df787b98b3131e935cfe29bfbfd69a077bc,2017-01-06 19:05:00 UTC,heat,hold,649,650,650,TN,Columbia,0,False,False,True,Electric
4,17767d97cccacfd4b549914d05bc31d868a85191,2017-01-01 14:35:00 UTC,heat,auto,665,650,620,TN,Murfreesboro,5,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229906,eedcd8a23ae210facc7258cd53ca00396343f1e7,2017-01-29 15:35:00 UTC,auto,hold,685,740,690,TN,Nashville,15,False,False,False,Gas
229907,eedcd8a23ae210facc7258cd53ca00396343f1e7,2017-01-29 16:30:00 UTC,auto,hold,684,740,690,TN,Nashville,15,False,False,False,Gas
229908,eedcd8a23ae210facc7258cd53ca00396343f1e7,2017-01-02 19:25:00 UTC,auto,hold,682,730,680,TN,Nashville,15,False,False,False,Gas
229909,eedcd8a23ae210facc7258cd53ca00396343f1e7,2017-01-07 12:30:00 UTC,auto,hold,673,730,680,TN,Nashville,15,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
01691a75c606f8718e28a6b747c4cb9675a9060b,Jan,2017,heat,auto,Collierville,691.250000,690.000000,690.000000,20.0,False,False,False
01691a75c606f8718e28a6b747c4cb9675a9060b,Jan,2017,heat,hold,Collierville,673.958565,675.885591,675.885591,20.0,False,False,False
03528fd0510ac074f319f4d196f87f0d2b19fc26,Jan,2017,cool,hold,Clarksville,695.650000,713.600000,713.600000,10.0,False,False,True
03528fd0510ac074f319f4d196f87f0d2b19fc26,Jan,2017,heat,auto,Clarksville,699.500000,708.000000,708.000000,10.0,False,False,True
03528fd0510ac074f319f4d196f87f0d2b19fc26,Jan,2017,heat,hold,Clarksville,695.244393,696.871616,696.849961,10.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...
ff7e0f2e982b085bf01d460db7670249e73af621,Jan,2017,auto,auto,Knoxville,686.909091,745.454545,689.954545,46.0,False,False,True
ff7e0f2e982b085bf01d460db7670249e73af621,Jan,2017,auto,hold,Knoxville,667.833333,750.000000,670.000000,46.0,False,False,True
ff8eb8fa2d95dad875f0fea5d0685ea03ee73961,Jan,2017,heat,auto,Clarksville,686.410714,820.000000,640.000000,5.0,True,False,True
ff8eb8fa2d95dad875f0fea5d0685ea03ee73961,Jan,2017,heat,hold,Clarksville,695.000000,712.000000,700.000000,5.0,True,False,True


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/TN/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/TN-day/2018-jan-day-TN.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,def4d2f546181ca8a77634f8aabb880285adcb5f,2018-01-17 13:25:00 UTC,auto,auto,646,820,650,TN,Johnson City,0,False,False,False,Gas
1,c19adba907c5e47ca49242b3a3f1d16394ada583,2018-01-07 16:55:00 UTC,auto,hold,663,780,670,TN,memphis,0,False,False,False,Gas
2,66781b7faa9d3874eeaa1ebe9c960d08700578bc,2018-01-17 18:10:00 UTC,heat,auto,717,730,730,TN,Cookeville,25,True,False,True,Electric
3,c228f59c5c7001815da94b239c913889ee4f738b,2018-01-22 15:20:00 UTC,auto,auto,752,825,775,TN,CORDOVA,10,True,False,False,Gas
4,8c87ccb624725fed2f1490015f04593f70a96fd7,2018-01-17 15:55:00 UTC,heat,hold,634,650,630,TN,Dyersburg,27,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
781389,5f58cfda6e1406373cd9cdad2e96c80a4e4891a4,2018-01-27 16:35:00 UTC,auto,auto,717,740,690,TN,Thompson's Station,9,True,False,False,Gas
781390,5f58cfda6e1406373cd9cdad2e96c80a4e4891a4,2018-01-05 18:45:00 UTC,heat,hold,752,750,750,TN,Thompson's Station,9,True,False,False,Gas
781391,71a80c838c26d0403faca21e6d85025070c94f2c,2018-01-04 19:25:00 UTC,heat,hold,735,740,740,TN,Thompson's Station,0,True,False,True,Electric
781392,0def9c996ebdfa7e09e06e30b7a7e59362dc9d5d,2018-01-24 16:35:00 UTC,auto,auto,668,780,650,TN,Thompson's Station,7,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/TN/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/TN-day/2019-jan-day-TN.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c1e577571f9bef6dfff066243ee72f5c3e1999a6,2019-01-23 16:35:00 UTC,auto,hold,714,768,718,TN,Mount Juliet,25,False,False,False,Gas
1,246822e717b54301dcdd664882192757e97c1e1c,2019-01-26 18:40:00 UTC,heat,auto,669,665,665,TN,Lebanon,20,False,False,False,Gas
2,850207f1c14124e4313b80300be35854f5b8e6fa,2019-01-06 18:10:00 UTC,auto,auto,652,695,645,TN,Nashville,20,False,False,False,Gas
3,d9847a826ef57516183197918e6177b1c5c9f351,2019-01-18 13:00:00 UTC,auto,hold,706,742,712,TN,Cleveland,20,True,False,True,Electric
4,46ca62bd098ce2b85367fad4fe7df4098341ebab,2019-01-14 12:00:00 UTC,heat,hold,694,685,685,TN,Friendsville,30,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1213897,02292b52e205c2f06336b3c700c83f767b08294d,2019-01-07 19:25:00 UTC,heat,auto,679,760,650,TN,Lakeland,25,False,False,False,Gas
1213898,e84bd77ebe96b87e6f2f2cc8fbf41f0cf5c29fda,2019-01-29 16:15:00 UTC,heat,auto,705,760,710,TN,Franklin,25,False,False,False,Gas
1213899,2fac988270329b094312571306ec44164ac3ea6c,2019-01-05 19:00:00 UTC,auto,hold,727,760,690,TN,Pigeon Forge,10,True,False,True,Electric
1213901,b51a531cdbae140dfd195358a9a9747f10f28476,2019-01-10 13:00:00 UTC,auto,hold,706,760,710,TN,Medina,15,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/TN/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/TN-day/2020-jan-day-TN.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7918cf8b884ae264bc1159162210e3054a33e96d,2020-01-26 18:55:00 UTC,heat,auto,690,690,690,TN,Church Hill,6,True,False,True,Electric
1,9cdf192dc5c7da29e292ac285ef8ff4fb861f11e,2020-01-12 10:05:00 UTC,auto,hold,688,700,650,TN,Oak Ridge,28,False,False,False,Gas
2,5291f70e9f12f7d80b4624ec4add2fc6d61a6f62,2020-01-29 08:10:00 UTC,auto,hold,684,735,685,TN,Manchester,50,True,False,True,Electric
3,426f6dd48b9eb72d1182acf8e4346b1d44ac8b85,2020-01-15 16:45:00 UTC,auto,auto,700,730,680,TN,Bells,0,True,False,True,Electric
4,2590ff129e946052cfcf7c8b5680b6b79f8840ec,2020-01-17 14:15:00 UTC,heat,auto,688,690,690,TN,Tusculum,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1365779,94ba48679496aadf23fb387580fc8eebbd64f6b6,2020-01-22 19:10:00 UTC,heat,hold,729,730,730,TN,Thompsons Station,0,True,False,True,Electric
1365780,2da807f5029782a8e7677d02094423c52bd5b297,2020-01-19 14:15:00 UTC,heat,hold,700,700,700,TN,Thompsons Station,0,False,False,False,Gas
1365781,ac3329aa1b90433b515d08d6690d8a65280b34f6,2020-01-26 13:35:00 UTC,heat,hold,674,680,680,TN,Thompsons Station,5,False,False,False,Gas
1365782,04adebc254991a86381911de7c9d269f2c498bbf,2020-01-09 18:40:00 UTC,heat,auto,737,740,740,TN,Thompsons Station,9,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/TN/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/TN-day/2021-jan-day-TN.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,27f8c51fba9ab1fd74970485469e0c1dbba604c4,2021-01-22 18:45:00 UTC,auto,hold,674,795,675,TN,Lebanon,20,False,False,True,Electric
1,1cc4d9f36303fa896a9f1a21424fbdc2a85d8a3b,2021-01-29 13:15:00 UTC,heat,hold,694,692,692,TN,Memphis,7,False,False,False,Gas
2,9c21845d6cd191d8c537828edf2b0283e96b57fe,2021-01-03 14:10:00 UTC,heat,hold,680,721,680,TN,Walland,9,True,False,True,Electric
3,91bb01318831303802537e86477e573aa035ea29,2021-01-21 12:40:00 UTC,heat,hold,637,644,640,TN,goodletsville,70,True,False,True,Electric
4,936a3bc532b36560be9ed48f06b014141207ed10,2021-01-23 13:35:00 UTC,heat,hold,687,696,696,TN,Chattanooga,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
866417,0d4a29d5fe247541f8b9a18082f8f0737d881ae6,2021-01-04 16:35:00 UTC,auto,hold,709,765,715,TN,Brentwood,15,False,False,False,Gas
866418,dd1b8d7541375555113093c9f998b4700b128173,2021-01-16 17:20:00 UTC,auto,hold,698,765,695,TN,Apison,7,False,False,True,Electric
866419,2fac988270329b094312571306ec44164ac3ea6c,2021-01-04 13:35:00 UTC,auto,hold,688,765,695,TN,Pigeon Forge,10,True,False,True,Electric
866420,dd1b8d7541375555113093c9f998b4700b128173,2021-01-20 12:00:00 UTC,auto,hold,688,765,695,TN,Apison,7,False,False,True,Electric


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/TN/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/TN/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
TN_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/TN/jan/" + file)
    TN_jan = pd.concat([TN_jan, df])
    
TN_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01691a75c606f8718e28a6b747c4cb9675a9060b,Jan,2017,heat,auto,Collierville,691.250000,690.000000,690.000000,20.0,False,False,False
1,01691a75c606f8718e28a6b747c4cb9675a9060b,Jan,2017,heat,hold,Collierville,673.958565,675.885591,675.885591,20.0,False,False,False
2,03528fd0510ac074f319f4d196f87f0d2b19fc26,Jan,2017,cool,hold,Clarksville,695.650000,713.600000,713.600000,10.0,False,False,True
3,03528fd0510ac074f319f4d196f87f0d2b19fc26,Jan,2017,heat,auto,Clarksville,699.500000,708.000000,708.000000,10.0,False,False,True
4,03528fd0510ac074f319f4d196f87f0d2b19fc26,Jan,2017,heat,hold,Clarksville,695.244393,696.871616,696.849961,10.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1086,ff7e0f2e982b085bf01d460db7670249e73af621,Jan,2021,auto,hold,Knoxville,681.005714,741.097143,669.485714,46.0,False,False,True
1087,ff7e608937812d31b26012c64a6fc58ef9e364a2,Jan,2021,auxHeatOnly,hold,Ooltewah,666.368421,699.526316,680.447368,35.0,True,False,False
1088,ff7e608937812d31b26012c64a6fc58ef9e364a2,Jan,2021,heat,hold,Ooltewah,671.469492,667.976246,667.965068,35.0,True,False,False
1089,ff8eb8fa2d95dad875f0fea5d0685ea03ee73961,Jan,2021,heat,hold,Clarksville,708.005751,710.494608,710.494608,5.0,True,False,True


In [34]:
TN_jan.to_csv("Scraper_Output/State_Month_Day/TN/TN_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/TN-day/2017-feb-day-TN.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0fdacb2a2c440b2016926e5ec1fb867dd892d7da,2017-02-09 12:40:00 UTC,heat,hold,699,710,710,TN,Nashville,0,True,False,True,Electric
1,4b01912bc630f0776b861ba26ea3a804c32da9ed,2017-02-07 18:20:00 UTC,heat,auto,660,650,650,TN,Memphis,65,False,False,False,Gas
2,e9922e820036e47433d5fe75e7cd8d20f25d35f7,2017-02-26 18:00:00 UTC,heat,auto,678,770,640,TN,Lakeland,5,False,False,False,Gas
4,f487cc1aaeaa7dfaba009d08297983d1851d779e,2017-02-10 12:35:00 UTC,heat,auto,723,730,730,TN,Mount Juliet,0,False,False,False,Gas
5,da018df787b98b3131e935cfe29bfbfd69a077bc,2017-02-11 16:05:00 UTC,heat,auto,659,650,650,TN,Columbia,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
224782,facd3d88f2a3ba9784c5af69124e7df696a7bdf8,2017-02-12 13:10:00 UTC,auto,auto,733,760,710,TN,Brentwood,0,False,False,False,Gas
224783,facd3d88f2a3ba9784c5af69124e7df696a7bdf8,2017-02-03 16:20:00 UTC,auto,auto,710,760,710,TN,Brentwood,0,False,False,False,Gas
224784,facd3d88f2a3ba9784c5af69124e7df696a7bdf8,2017-02-12 16:25:00 UTC,auto,auto,735,760,710,TN,Brentwood,0,False,False,False,Gas
224785,facd3d88f2a3ba9784c5af69124e7df696a7bdf8,2017-02-10 14:50:00 UTC,auto,auto,702,760,710,TN,Brentwood,0,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/TN/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/TN-day/2018-feb-day-TN.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4b8d17a30fc54e7cecdccf40e724811c61c6a88c,2018-02-12 12:20:00 UTC,heat,auto,706,740,700,TN,Powell,40,False,False,False,Gas
1,3db71cc46e7d1eabdca50464848b8d58e22ee864,2018-02-28 12:55:00 UTC,cool,auto,730,730,640,TN,,9,True,False,True,Electric
2,b51a531cdbae140dfd195358a9a9747f10f28476,2018-02-25 19:00:00 UTC,heat,auto,696,700,700,TN,Medina,15,False,False,False,Gas
3,8c87ccb624725fed2f1490015f04593f70a96fd7,2018-02-08 16:30:00 UTC,heat,auto,625,670,630,TN,Dyersburg,27,True,False,True,Electric
4,4aa27824d1217d7c2ae9f5b010d824413633aca5,2018-02-01 15:30:00 UTC,heat,auto,681,680,680,TN,Henderson,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
704901,0def9c996ebdfa7e09e06e30b7a7e59362dc9d5d,2018-02-07 13:10:00 UTC,auto,auto,683,725,675,TN,Thompson's Station,7,False,False,False,Gas
704902,5f58cfda6e1406373cd9cdad2e96c80a4e4891a4,2018-02-24 16:15:00 UTC,auto,auto,727,730,680,TN,Thompson's Station,9,True,False,False,Gas
704903,5f58cfda6e1406373cd9cdad2e96c80a4e4891a4,2018-02-23 13:55:00 UTC,auto,auto,731,730,680,TN,Thompson's Station,9,True,False,False,Gas
704904,0def9c996ebdfa7e09e06e30b7a7e59362dc9d5d,2018-02-13 15:50:00 UTC,auto,auto,678,740,670,TN,Thompson's Station,7,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/TN/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/TN-day/2019-feb-day-TN.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2590ff129e946052cfcf7c8b5680b6b79f8840ec,2019-02-27 12:30:00 UTC,auto,hold,649,695,645,TN,Tusculum,0,False,False,True,Electric
1,d069bdc987008e034917dda9bd07e58f14dc1e9b,2019-02-22 17:30:00 UTC,heat,hold,685,685,685,TN,Cookeville,0,True,False,True,Electric
2,4047f1bb37e9a9077c088ddd6608429f2061a907,2019-02-12 19:05:00 UTC,heat,auto,711,699,699,TN,Franklin,0,False,False,False,Gas
3,3a854daedc23c1525ff8b67daba816d0b223c12b,2019-02-20 11:55:00 UTC,heat,hold,607,714,664,TN,Johnson City,0,False,False,False,Gas
4,13516a1b38e9551ef0cea8b4136dd72c1d1c75b8,2019-02-14 12:30:00 UTC,auto,auto,700,830,700,TN,Greenback,30,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
807060,1d1bff331fe2109cd2666f828d2e854b97a5bb7f,2019-02-19 18:40:00 UTC,auto,auto,705,760,710,TN,Lebanon,17,False,False,False,Gas
807061,48eb7db8569c1cfa12f7b87c763cabc37eba51f4,2019-02-23 14:40:00 UTC,auto,auto,697,760,700,TN,Adams,5,False,False,False,Gas
807063,d5048f9f552dd4d90baff28c4bc1b1ba2ed78eb1,2019-02-28 17:05:00 UTC,auto,auto,735,760,700,TN,Adams,5,True,False,True,Electric
807064,e8c0bcfc2e289bc99befc9729f26e7a6b6d5cb57,2019-02-03 13:05:00 UTC,heat,auto,696,760,700,TN,Lewisburg,10,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/TN/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/TN-day/2020-feb-day-TN.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,48ff8161af06059b027bcbc41932234032dd2129,2020-02-21 12:50:00 UTC,heat,auto,681,702,702,TN,Dowelltown,10,False,False,True,Electric
1,0f5b783863e0b355601c004b1797f17ec457ccc0,2020-02-01 18:55:00 UTC,auto,hold,717,840,700,TN,Munford,0,False,False,False,Gas
2,6dbbc4006039f1f9d01220972496f9bfab053641,2020-02-26 18:45:00 UTC,auto,hold,726,777,727,TN,Knoxville,9,True,False,True,Electric
3,8cedab03e7f470ff7ed089e9205b2bbc7a748eee,2020-02-09 19:45:00 UTC,heat,hold,691,695,695,TN,Chattanooga,70,False,False,False,Gas
4,714e4a803b29f54af163a0441295c581c6dd6b46,2020-02-12 14:20:00 UTC,auto,hold,665,725,675,TN,Finger,10,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1220197,7884b571c921cc595dc711dc58b0422cdd4f01ed,2020-02-23 19:50:00 UTC,auto,auto,658,760,650,TN,Munford,0,False,False,False,Gas
1220198,874acac83d7b9c65ddb18b0d28fe869d4b0f975c,2020-02-20 16:50:00 UTC,auto,auto,679,760,680,TN,Nashville,39,False,False,True,Electric
1220199,552a89a9f7801890bd45428d454a21ae444a61bb,2020-02-02 15:35:00 UTC,auto,auto,669,760,670,TN,Atoka,5,False,False,False,Gas
1220200,953587daad2a93e09459114aa363717e4befde62,2020-02-03 16:35:00 UTC,heat,auto,680,760,650,TN,Franklin,5,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/TN/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/TN-day/2021-feb-day-TN.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,4179a91750a51474c92a7a842172453e17ad7451,2021-02-14 19:10:00 UTC,heat,hold,679,699,680,TN,Dandridge,19,False,False,True,Electric
2,714e4a803b29f54af163a0441295c581c6dd6b46,2021-02-13 14:15:00 UTC,auto,hold,686,820,690,TN,Finger,10,True,False,True,Electric
3,1cc4d9f36303fa896a9f1a21424fbdc2a85d8a3b,2021-02-04 15:35:00 UTC,heat,hold,692,692,692,TN,Memphis,7,False,False,False,Gas
4,1c98c15bf30e6cdf4ad106f579ac15e4be54065e,2021-02-28 16:30:00 UTC,auto,hold,715,737,677,TN,Oakland,17,False,False,False,Gas
5,5e8bd0a80a3a7b6c34aedd3d0343ba052cd34300,2021-02-01 14:35:00 UTC,auto,hold,661,729,659,TN,Atoka,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
755334,b09bf4555b6bf79c66f7d8322907202ee4ebe693,2021-02-21 15:00:00 UTC,auto,hold,707,760,710,TN,Franklin,9,False,False,True,Electric
755335,9299b3b5fb92bdd0de1c2f3bfa7febd27e0ca0af,2021-02-19 17:30:00 UTC,auto,hold,697,760,700,TN,Soddy-Daisy,27,False,False,False,Gas
755336,9cdf192dc5c7da29e292ac285ef8ff4fb861f11e,2021-02-14 10:00:00 UTC,auto,hold,698,760,700,TN,Oak Ridge,28,False,False,False,Gas
755337,1518e869f2de2c6343692ae7e4018d2b80c72782,2021-02-18 16:00:00 UTC,auto,hold,704,760,710,TN,Sevierville,25,True,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/TN/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/TN/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
TN_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/TN/feb/" + file)
    TN_feb = pd.concat([TN_feb, df])
    
TN_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01691a75c606f8718e28a6b747c4cb9675a9060b,feb,2017,heat,auto,Collierville,679.744991,651.648452,651.648452,20.0,False,False,False
1,01691a75c606f8718e28a6b747c4cb9675a9060b,feb,2017,heat,hold,Collierville,688.692875,672.993857,669.342752,20.0,False,False,False
2,03528fd0510ac074f319f4d196f87f0d2b19fc26,feb,2017,heat,auto,Clarksville,700.987124,697.742489,697.742489,10.0,False,False,True
3,03528fd0510ac074f319f4d196f87f0d2b19fc26,feb,2017,heat,hold,Clarksville,691.955598,692.967181,692.967181,10.0,False,False,True
4,0412796faa0aac6a304d59e260da884492969f38,feb,2017,cool,hold,Nashville,679.824645,685.497630,685.497630,5.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1101,ff0b01aa1f5f15ebcd7c7e02924e73e981f6cece,feb,2021,auto,hold,Bartlett,737.500000,799.666667,749.666667,40.0,True,False,False
1102,ff7e0f2e982b085bf01d460db7670249e73af621,feb,2021,auto,hold,Knoxville,680.165939,740.000000,668.248908,46.0,False,False,True
1103,ff7e608937812d31b26012c64a6fc58ef9e364a2,feb,2021,auxHeatOnly,hold,Ooltewah,669.194444,666.035948,666.035948,35.0,True,False,False
1104,ff8eb8fa2d95dad875f0fea5d0685ea03ee73961,feb,2021,heat,hold,Clarksville,717.487395,720.894358,720.894358,5.0,True,False,True


In [67]:
TN_feb.to_csv("Scraper_Output/State_Month_Day/TN/TN_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/TN-day/2017-jun-day-TN.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e27f9c0ffe8116f680d0d2962499526430c1a859,2017-06-12 15:05:00 UTC,cool,hold,736,730,700,TN,gallatin,5,False,False,False,Gas
1,638b82ce928dcedfb4c3f545be7a451c458baaa7,2017-06-17 18:10:00 UTC,auto,auto,730,695,645,TN,Nashville,27,False,False,False,Gas
2,8011d7f373ea95a7382c94277d2e47ff77a2342a,2017-06-11 14:30:00 UTC,cool,hold,768,768,768,TN,Memphis,5,False,False,False,Gas
3,fd04844af4037256c84ba1a2b4e939123afef200,2017-06-18 18:50:00 UTC,auto,hold,746,740,690,TN,Henderson,40,True,False,True,Electric
4,70853305c3d6ab12126c847c86540693d65d297f,2017-06-21 14:30:00 UTC,cool,auto,741,770,730,TN,Franklin,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427215,facd3d88f2a3ba9784c5af69124e7df696a7bdf8,2017-06-10 12:10:00 UTC,cool,hold,731,740,740,TN,Brentwood,0,False,False,False,Gas
427216,facd3d88f2a3ba9784c5af69124e7df696a7bdf8,2017-06-19 16:25:00 UTC,cool,hold,737,740,740,TN,Brentwood,0,False,False,False,Gas
427217,facd3d88f2a3ba9784c5af69124e7df696a7bdf8,2017-06-28 18:40:00 UTC,cool,hold,735,730,730,TN,Brentwood,0,False,False,False,Gas
427218,facd3d88f2a3ba9784c5af69124e7df696a7bdf8,2017-06-30 15:35:00 UTC,cool,hold,733,730,730,TN,Brentwood,0,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/TN/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/TN-day/2018-jun-day-TN.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,90d7fd4dd4dee6d4850f53fa9ceb45741c9306d3,2018-06-03 16:30:00 UTC,cool,hold,777,790,790,TN,Rutledge,17,True,False,True,Electric
1,e35180bc834428751301505665db143cfe3cf60b,2018-06-27 16:10:00 UTC,cool,hold,727,730,730,TN,South Pittsburg,0,False,False,True,Electric
2,4121232aef3d0de52b908017e69e1688d0a785e7,2018-06-02 12:50:00 UTC,auto,hold,724,725,675,TN,Silver Point,10,True,False,True,Electric
3,c15d00219b8e9b7a4cc6f49f03e990fe77fadf48,2018-06-30 11:55:00 UTC,auto,auto,732,730,670,TN,White House,0,False,False,False,Gas
4,827c9421c79ee7b4c2340a3bcaa1b171d1546ab8,2018-06-22 18:05:00 UTC,cool,auto,699,700,700,TN,Paris,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1029156,1f892ee0ffcb5d9188c3720954f6b32090e9ea66,2018-06-22 16:20:00 UTC,cool,hold,694,700,700,TN,Thompsons Station,5,False,False,False,Gas
1029158,57dc5bd36a0fd64db9272575fb091436c04898a1,2018-06-08 13:45:00 UTC,auto,hold,681,680,610,TN,Thompsons Station,0,False,False,False,Gas
1029159,359616012ba36d809082f3414b35c7be946b5b0b,2018-06-27 14:15:00 UTC,cool,hold,762,760,760,TN,Thompsons Station,0,True,False,True,Electric
1029160,0595a0cca77c13f5405c88dbdcf9edbc64df7169,2018-06-01 19:15:00 UTC,auto,auto,775,800,640,TN,Thompsons Station,5,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/TN/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/TN-day/2019-jun-day-TN.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5de4bbe9fe7b808f5b7e9ace9fcc1efbfa061c9d,2019-06-20 15:15:00 UTC,auto,hold,696,697,647,TN,Nashville,0,False,False,False,Gas
2,c0172d1fbbf63b7a1fec0b535dca45d2ddbb6ccf,2019-06-17 16:30:00 UTC,cool,hold,792,830,830,TN,Walland,0,True,False,True,Electric
3,06e44928e286c31032e99523e06f1e3ba9b3b502,2019-06-20 16:50:00 UTC,auto,hold,704,703,653,TN,East Memphis,70,True,False,False,Gas
5,c356096e2f21fab6de947841aed7ef28ae956ad2,2019-06-24 11:05:00 UTC,cool,auto,746,750,738,TN,Powell,10,False,False,False,Gas
6,91bb01318831303802537e86477e573aa035ea29,2019-06-16 13:00:00 UTC,cool,auto,759,780,721,TN,goodletsville,70,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1458926,953956eca7499ab0fccf4e504497f94dfd1bfc46,2019-06-14 14:40:00 UTC,cool,auto,716,760,760,TN,Knoxville,20,False,False,False,Gas
1458927,da4f72cc7f12a901d9dc7c9208f8ca8c35548edb,2019-06-18 19:25:00 UTC,cool,auto,766,760,760,TN,Clarksville,45,False,False,False,Gas
1458928,5816348333e57171e4f32144c8157dc0aefd5093,2019-06-23 18:45:00 UTC,cool,hold,735,760,760,TN,Watertown,20,False,False,True,Electric
1458929,6ac1923025cb364bcd24ea0e080bcbd4b5597618,2019-06-29 19:00:00 UTC,cool,auto,764,760,760,TN,Collierville,30,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/TN/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/TN-day/2020-jun-day-TN.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2e5e666d476e656d632e6897007a2b3f842ca884,2020-06-30 18:15:00 UTC,auto,hold,680,675,625,TN,Knoxville,10,False,False,False,Gas
1,45dac439214203f0151495ecd8c47ec0c52ff16a,2020-06-10 14:10:00 UTC,auto,hold,684,685,635,TN,Lakeland,0,False,False,False,Gas
2,8cedab03e7f470ff7ed089e9205b2bbc7a748eee,2020-06-27 15:40:00 UTC,cool,auto,708,707,707,TN,Chattanooga,70,False,False,False,Gas
3,442ea8e38f889fa647decea7f061cfe40086661b,2020-06-21 14:55:00 UTC,cool,hold,705,705,705,TN,Harrison,45,True,False,True,Electric
4,900b4a59ea0ea9f20b59b14a985c1606b44699d4,2020-06-03 19:05:00 UTC,cool,hold,752,750,743,TN,Lascassas,5,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1476071,48cd80e39e07e77a8fe18d850f94319e1585f527,2020-06-12 11:35:00 UTC,cool,hold,754,760,760,TN,Knoxville,90,True,False,False,Gas
1476072,3869334b151a2dcc575d18ea9923c0dda941cb54,2020-06-30 16:15:00 UTC,cool,hold,767,760,760,TN,Collierville,0,False,False,False,Gas
1476073,48cd80e39e07e77a8fe18d850f94319e1585f527,2020-06-20 17:25:00 UTC,cool,hold,760,760,760,TN,Knoxville,90,True,False,False,Gas
1476074,9de8a55999bb509cbb9c07c09645754f2e246eec,2020-06-12 19:05:00 UTC,cool,hold,764,760,760,TN,Nashville,18,True,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/TN/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/TN-day/2021-jun-day-TN.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,183d52cfccb62951c622ba6b8b16f1b1ea623744,2021-06-07 07:10:00 UTC,auto,hold,686,682,632,TN,Sevierville,10,False,False,True,Electric
1,74c697e9dc7ea3cfa5568a54d85e347cef188282,2021-06-13 19:40:00 UTC,cool,hold,745,749,749,TN,Apison,0,True,False,True,Electric
3,fe590a938128622a7ba7d9a9331332376f3ec8a7,2021-06-03 17:05:00 UTC,auto,hold,750,747,707,TN,Knoxville,8,False,False,False,Gas
4,74f337459b087bc526753c0d161d20bb0e1d3f4d,2021-06-13 16:20:00 UTC,cool,hold,741,748,724,TN,Rossville,0,False,False,False,Gas
5,10697dd045f3597b008b0dcb7acee3e8115aad89,2021-06-02 14:25:00 UTC,auto,hold,697,692,612,TN,Sevierville,10,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
911994,497897b2395fb3cef980cdef4302f1febe221129,2021-06-20 14:55:00 UTC,cool,hold,759,760,760,TN,Chattanooga,0,False,False,False,Gas
911995,b17581a2d8c80f2a6f0c766c31b735f6e6a4ca20,2021-06-23 09:45:00 UTC,cool,hold,741,760,760,TN,Orlinda,39,True,False,False,Gas
911996,eeae4718d7b49e101cc82ee754a47aedfdd49a30,2021-06-04 16:10:00 UTC,cool,hold,738,760,760,TN,Nashville,80,False,False,True,Electric
911997,dbee960cb49bac915b94a475c01d47fb4afe9426,2021-06-11 14:35:00 UTC,cool,hold,764,760,760,TN,Nashville,47,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/TN/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/TN/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
TN_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/TN/jun/" + file)
    TN_jun = pd.concat([TN_jun, df])
    
TN_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0113bf09dfae09204aefa3924186bc90a90264e0,jun,2017,cool,hold,Collierville,753.895349,756.046512,749.220930,30.0,False,False,False
1,01691a75c606f8718e28a6b747c4cb9675a9060b,jun,2017,cool,auto,Collierville,762.441176,768.571429,785.714286,20.0,False,False,False
2,01691a75c606f8718e28a6b747c4cb9675a9060b,jun,2017,cool,hold,Collierville,764.392770,778.750000,778.750000,20.0,False,False,False
3,0327e2f6416912c7320bedd41df16f269d02da62,jun,2017,auto,auto,Maryville,699.624106,700.001021,679.964249,27.0,True,False,True
4,0327e2f6416912c7320bedd41df16f269d02da62,jun,2017,auto,hold,Maryville,702.266667,700.000000,680.000000,27.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1046,fe590a938128622a7ba7d9a9331332376f3ec8a7,jun,2021,auto,hold,Knoxville,734.788121,733.413121,691.340426,8.0,False,False,False
1047,febb646f0b318176433f35c33efa01976c64d4df,jun,2021,auto,hold,Gatlinburg,706.589008,704.954600,643.037037,19.0,True,False,True
1048,ff0b01aa1f5f15ebcd7c7e02924e73e981f6cece,jun,2021,cool,hold,Bartlett,721.375000,690.000000,690.000000,40.0,True,False,False
1049,ff7e608937812d31b26012c64a6fc58ef9e364a2,jun,2021,cool,hold,Ooltewah,745.349280,750.300272,750.300272,35.0,True,False,False


In [100]:
TN_jun.to_csv("Scraper_Output/State_Month_Day/TN/TN_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/TN-day/2017-jul-day-TN.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8ed41841705611a0d554e6b59bb56dad7e298445,2017-07-22 14:10:00 UTC,cool,hold,771,740,740,TN,Franklin,20,False,False,False,Gas
1,660d942e2515ffa320427416d0ecf82187b574f0,2017-07-15 14:15:00 UTC,cool,auto,722,720,660,TN,Spring Hill,15,False,False,False,Gas
2,3ecb939296d9ac5a96455183c9f013a1eddd154d,2017-07-04 17:10:00 UTC,cool,auto,791,780,640,TN,Memphis,5,True,False,False,Gas
3,46c103c8fa403636c1da6e510a0dc9859ebe244f,2017-07-05 14:50:00 UTC,auto,hold,723,720,660,TN,Franklin,20,False,False,False,Gas
4,bb37622e2893d99ea0dc2f70eca323584eb98b1c,2017-07-29 16:35:00 UTC,cool,hold,762,760,760,TN,Lakeland,37,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
508832,f8e02d36f8f1bc7797898a09a0db818bf6c0a5d6,2017-07-29 19:20:00 UTC,cool,hold,714,710,710,TN,Nashville,0,False,False,False,Gas
508833,f8e02d36f8f1bc7797898a09a0db818bf6c0a5d6,2017-07-11 12:05:00 UTC,auto,hold,713,720,670,TN,Nashville,0,False,False,False,Gas
508834,f8e02d36f8f1bc7797898a09a0db818bf6c0a5d6,2017-07-11 14:45:00 UTC,auto,hold,712,720,670,TN,Nashville,0,False,False,False,Gas
508835,f8e02d36f8f1bc7797898a09a0db818bf6c0a5d6,2017-07-21 15:40:00 UTC,cool,hold,682,670,670,TN,Nashville,0,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/TN/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/TN-day/2018-jul-day-TN.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0def9c996ebdfa7e09e06e30b7a7e59362dc9d5d,2018-07-02 12:30:00 UTC,auto,hold,707,700,650,TN,Thompson's Station,7,False,False,False,Gas
1,f5eeadc778a843ea74aa690f2b2075dc758fa0a1,2018-07-12 08:00:00 UTC,cool,auto,700,708,658,TN,Soddy-Daisy,45,False,False,True,Electric
2,188e0510fe4fbbad85a417b74431402dee2664ea,2018-07-15 13:50:00 UTC,cool,auto,722,720,720,TN,Covington,45,True,False,True,Electric
3,f8bfadf83840e394b678803615c61d68b2ab9f8b,2018-07-14 19:10:00 UTC,auto,hold,743,740,680,TN,White House,40,False,False,False,Gas
4,9299b3b5fb92bdd0de1c2f3bfa7febd27e0ca0af,2018-07-26 12:15:00 UTC,auto,hold,723,720,650,TN,Soddy-Daisy,27,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1136327,359616012ba36d809082f3414b35c7be946b5b0b,2018-07-06 12:55:00 UTC,cool,hold,765,770,770,TN,Thompsons Station,0,True,False,True,Electric
1136328,67d40146cd4d925e7b7b6f614becb7a26b261816,2018-07-21 15:15:00 UTC,cool,auto,767,760,760,TN,Thompsons Station,0,False,False,True,Electric
1136329,359616012ba36d809082f3414b35c7be946b5b0b,2018-07-19 12:35:00 UTC,cool,hold,765,770,770,TN,Thompsons Station,0,True,False,True,Electric
1136330,1f892ee0ffcb5d9188c3720954f6b32090e9ea66,2018-07-21 16:55:00 UTC,cool,hold,684,680,680,TN,Thompsons Station,5,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/TN/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/TN-day/2019-jul-day-TN.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ca462020f4bfcf5fc5820c6d3a86b6af6786699d,2019-07-10 18:35:00 UTC,cool,auto,660,671,671,TN,Sweetwater,9,False,False,True,Electric
1,6dbbc4006039f1f9d01220972496f9bfab053641,2019-07-09 17:05:00 UTC,cool,hold,734,725,725,TN,Knoxville,9,True,False,True,Electric
2,d9847a826ef57516183197918e6177b1c5c9f351,2019-07-12 17:40:00 UTC,cool,hold,729,722,722,TN,Cleveland,20,True,False,True,Electric
3,91bb01318831303802537e86477e573aa035ea29,2019-07-02 14:15:00 UTC,cool,auto,783,780,721,TN,goodletsville,70,True,False,True,Electric
4,59a08cf6699235c607602e0545aa1784469de411,2019-07-30 07:10:00 UTC,auto,auto,713,721,671,TN,Johnson City,40,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1577224,af0ea0d49040c7e2202794b82079122aedc95906,2019-07-07 12:30:00 UTC,cool,hold,766,760,760,TN,Knoxville,70,False,False,False,Gas
1577225,48cd80e39e07e77a8fe18d850f94319e1585f527,2019-07-03 17:00:00 UTC,cool,hold,755,760,760,TN,Knoxville,90,True,False,False,Gas
1577226,48cd80e39e07e77a8fe18d850f94319e1585f527,2019-07-27 16:40:00 UTC,cool,hold,764,760,760,TN,Knoxville,90,True,False,False,Gas
1577227,b58e9cf5bcc4d7e27b88461a759f40855853a4d8,2019-07-08 16:35:00 UTC,cool,auto,763,760,760,TN,Memphis,10,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/TN/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/TN-day/2020-jul-day-TN.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,95c3e0ef5d2160d40bad180147a99fee346cb132,2020-07-14 08:50:00 UTC,cool,hold,712,776,776,TN,Nashville,0,False,False,False,Gas
1,6c22b29949c52e97406c32805b3d534f0c76f324,2020-07-16 09:55:00 UTC,cool,hold,681,685,685,TN,Knoxville,0,False,False,False,Gas
2,16a2fe256adef8dab9386d6c36ba73eea234b765,2020-07-17 18:10:00 UTC,cool,hold,742,739,739,TN,Nashville,10,False,False,False,Gas
3,204d8f2d5444cadfed08f34b0961701510cd46ff,2020-07-26 16:15:00 UTC,auto,hold,714,705,655,TN,Collierville,0,False,False,False,Gas
4,fc8934892241c0af10534da73ba93685423980e9,2020-07-12 14:05:00 UTC,auto,hold,732,735,685,TN,Farragut,19,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1565526,4686be39dfd50c114e9c9f7fa50d28643b0da71e,2020-07-26 16:45:00 UTC,cool,hold,767,760,760,TN,Nashville,10,False,False,False,Gas
1565527,ff7e608937812d31b26012c64a6fc58ef9e364a2,2020-07-02 14:35:00 UTC,cool,hold,763,760,760,TN,Ooltewah,35,True,False,False,Gas
1565528,226d3751d186c420edd54b6de8fdcc228e16767c,2020-07-19 15:15:00 UTC,cool,auto,754,760,760,TN,Knoxville,5,True,False,True,Electric
1565529,c76aca7b68a4f0014d462071353e949dd8137302,2020-07-06 16:30:00 UTC,cool,hold,761,760,760,TN,Collierville,0,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/TN/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/TN-day/2021-jul-day-TN.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,00acc682e6fcdd8bb3564978f6baaf6e2d720a41,2021-07-12 13:55:00 UTC,cool,hold,688,687,687,TN,Nashville,0,False,False,False,Gas
1,59ebe66e90373bda35c5eedfcbaae6a50ad623b7,2021-07-16 16:50:00 UTC,cool,hold,715,705,705,TN,Nashville,0,False,False,False,Gas
2,30e5efcbd0e53ba518c65c73775c8f28252edf49,2021-07-29 14:40:00 UTC,cool,hold,750,779,779,TN,Germantown,40,False,False,False,Gas
3,183d52cfccb62951c622ba6b8b16f1b1ea623744,2021-07-17 12:25:00 UTC,auto,hold,681,682,632,TN,Sevierville,10,False,False,True,Electric
4,8895049457daf73fec093267c5977237c437c5f2,2021-07-03 10:10:00 UTC,cool,hold,680,682,682,TN,Mount Juliet,7,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
907235,4bb9faaeadde3280526a5461144379633763a747,2021-07-05 17:05:00 UTC,cool,hold,684,760,760,TN,Franklin,45,True,False,True,Electric
907236,bb2a44a619851b70b696e20fe7c2551b7a9d8e4f,2021-07-16 09:05:00 UTC,cool,hold,764,760,760,TN,Knoxville,0,False,False,False,Gas
907237,5281280839bc290d2c3ce382ecac04e9b69927c7,2021-07-24 19:50:00 UTC,cool,hold,768,760,760,TN,collierville,30,False,False,False,Gas
907238,91d93cbfcfea15e49f87509b4791f80a43095fde,2021-07-26 13:00:00 UTC,cool,hold,764,760,760,TN,Germantown,39,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/TN/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/TN/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
TN_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/TN/jul/" + file)
    TN_jul = pd.concat([TN_jul, df])
    
TN_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0113bf09dfae09204aefa3924186bc90a90264e0,jul,2017,cool,hold,Collierville,764.890041,765.636929,749.728216,30.0,False,False,False
1,01691a75c606f8718e28a6b747c4cb9675a9060b,jul,2017,cool,auto,Collierville,765.181373,766.632353,790.000000,20.0,False,False,False
2,01691a75c606f8718e28a6b747c4cb9675a9060b,jul,2017,cool,hold,Collierville,788.257647,789.461569,781.861569,20.0,False,False,False
3,01b244c6278bc022e1b13b3826143b733278d6bb,jul,2017,cool,auto,Knoxville,749.323529,748.676471,668.617647,15.0,False,False,False
4,031c3898c8f66050458f2072a6e88d6513734a5f,jul,2017,cool,hold,College Grove,709.882353,710.000000,710.000000,7.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
973,febb646f0b318176433f35c33efa01976c64d4df,jul,2021,cool,hold,Gatlinburg,699.705344,697.299237,697.299237,19.0,True,False,True
974,ff0b01aa1f5f15ebcd7c7e02924e73e981f6cece,jul,2021,cool,hold,Bartlett,714.166667,690.250000,689.916667,40.0,True,False,False
975,ff6343e836823967e029ca1e481745612e18206d,jul,2021,auto,hold,Mount Juliet,747.476190,759.761905,690.000000,5.0,False,False,False
976,ff7e608937812d31b26012c64a6fc58ef9e364a2,jul,2021,cool,hold,Ooltewah,733.412858,730.339742,730.339742,35.0,True,False,False


In [133]:
TN_jul.to_csv("Scraper_Output/State_Month_Day/TN/TN_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/TN-day/2017-aug-day-TN.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fc6232649f0406a7a5ae2f67daaf5cce9096f536,2017-08-31 12:20:00 UTC,cool,hold,665,724,724,TN,Memphis,0,False,False,False,Gas
1,d8a8d2dc431e8f6b828b075e94839fed073d9bd2,2017-08-02 14:00:00 UTC,cool,hold,702,700,700,TN,Thompsons Station,6,False,False,False,Gas
2,13917a596d6f5d2597442926cb674a7f9adb4b2e,2017-08-20 19:55:00 UTC,cool,auto,664,660,660,TN,Knoxville,0,False,False,False,Gas
3,facd3d88f2a3ba9784c5af69124e7df696a7bdf8,2017-08-23 16:15:00 UTC,auto,hold,733,735,685,TN,Brentwood,0,False,False,False,Gas
4,7920654ea7c7cbc4c4be0fc7d49cfa9073594849,2017-08-14 12:45:00 UTC,cool,hold,765,770,770,TN,Mount Juliet,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
540727,f4b322166e3400c25b4b1c804b755f7500417c1a,2017-08-15 15:00:00 UTC,cool,hold,733,734,734,TN,Franklin,10,False,False,False,Gas
540728,f4b322166e3400c25b4b1c804b755f7500417c1a,2017-08-10 19:35:00 UTC,cool,hold,739,734,734,TN,Franklin,10,False,False,False,Gas
540729,f4b322166e3400c25b4b1c804b755f7500417c1a,2017-08-14 10:30:00 UTC,cool,hold,736,734,734,TN,Franklin,10,False,False,False,Gas
540730,f4b322166e3400c25b4b1c804b755f7500417c1a,2017-08-12 12:45:00 UTC,cool,hold,739,734,734,TN,Franklin,10,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/TN/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/TN-day/2018-aug-day-TN.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0dd74eeca1d87234da5ae1bcdc4569a7bed3b890,2018-08-09 09:45:00 UTC,cool,hold,774,775,775,TN,Knoxville,0,True,False,True,Electric
1,64debf3b7b825cea756a8914530c43a92fc88095,2018-08-04 14:25:00 UTC,auto,hold,722,759,669,TN,Maryville,76,False,False,True,Electric
2,17a13c03097ce1b43a2f92a7a98a16cc224b2d1d,2018-08-18 15:50:00 UTC,cool,hold,741,726,726,TN,Collierville,10,False,False,False,Gas
3,28ab679f40f640a851fdcfb96e78d3687b1645f9,2018-08-01 15:15:00 UTC,cool,hold,727,725,725,TN,Franklin,20,False,False,True,Electric
4,d683e15efe36d698881b3471ab6323ebd36fa483,2018-08-08 19:30:00 UTC,cool,hold,782,800,800,TN,Apison,0,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1151539,adeecfc5d6578060cb0c4de344284195c31a297e,2018-08-24 13:15:00 UTC,cool,hold,751,760,760,TN,Knoxville,28,False,False,True,Electric
1151540,a91bc0fc24cbfc0a543b6f423431ef71b64614d5,2018-08-08 18:20:00 UTC,cool,auto,762,760,760,TN,Nashville,10,False,False,False,Gas
1151541,935e2ec685e60aeeb790d8a3df6675c145482dd6,2018-08-04 19:40:00 UTC,cool,hold,749,760,760,TN,Knoxville,10,False,False,False,Gas
1151542,a271b5b307748358544cb48e6e62190b1152b4d3,2018-08-24 16:55:00 UTC,cool,hold,720,760,760,TN,Ooltewah,20,False,False,True,Electric


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/TN/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/TN-day/2019-aug-day-TN.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ae87a6c1531d9685114e65de0a0fbfa385812c75,2019-08-29 15:55:00 UTC,cool,auto,739,770,745,TN,Smyrna,16,False,False,True,Electric
1,d9abed9a26ae8e7feb86d884a0d82b8dddfa3d1a,2019-08-14 15:35:00 UTC,cool,hold,732,725,725,TN,Ooltewah,5,False,False,False,Gas
2,5de4bbe9fe7b808f5b7e9ace9fcc1efbfa061c9d,2019-08-15 14:05:00 UTC,auto,hold,690,687,617,TN,Nashville,0,False,False,False,Gas
3,abda53c73a73248e92ae887872774eb9c83082e7,2019-08-19 12:35:00 UTC,cool,hold,707,705,705,TN,Smyrna,5,False,False,True,Electric
4,b09f8f08aa5d1c712682571c01f1371ffa8d8498,2019-08-20 16:45:00 UTC,cool,hold,709,704,704,TN,Dandridge,9,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1571515,80f377decd4cd391da13ba9edfc8d1dc26f6a214,2019-08-31 19:15:00 UTC,cool,hold,759,760,760,TN,Lenoir City,9,False,False,False,Gas
1571516,e3640b28f3ec08df49cef3752fd292e05eadddbc,2019-08-18 11:55:00 UTC,cool,hold,757,760,760,TN,Maryville,70,False,False,False,Gas
1571517,af0ea0d49040c7e2202794b82079122aedc95906,2019-08-26 16:40:00 UTC,cool,hold,768,760,760,TN,Knoxville,70,False,False,False,Gas
1571518,d873bdb4086791f5313cc6ba7c28a87f5bb863b1,2019-08-21 16:00:00 UTC,cool,hold,763,760,760,TN,Bartlett,20,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/TN/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/TN-day/2020-aug-day-TN.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,025d0536636686b956df8dfbe0fe6c8a9ddcfa7b,2020-08-24 12:45:00 UTC,auto,hold,749,755,645,TN,Ooltewah,0,True,False,False,Gas
1,8d02b7226577f7153a220869c92f9ea21b56f1e3,2020-08-13 14:00:00 UTC,cool,hold,679,719,719,TN,Clarksville,0,True,False,True,Electric
4,b09f8f08aa5d1c712682571c01f1371ffa8d8498,2020-08-23 17:55:00 UTC,cool,hold,771,780,753,TN,Dandridge,9,False,False,True,Electric
6,6abe7b7681bd905cb47679cc24149483b25c545c,2020-08-17 18:30:00 UTC,cool,hold,681,675,675,TN,Jamestown,60,False,False,False,Gas
7,1972ac3eea6fa26b0d9ee999d33579b949a0fcd1,2020-08-22 14:25:00 UTC,cool,hold,643,653,653,TN,Smyrna,50,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1527734,2e2cb8bf7b3b1c8cb9dc2ec8de071c3246db4736,2020-08-25 16:40:00 UTC,cool,auto,765,760,760,TN,Lewisburg,10,False,False,True,Electric
1527735,04adebc254991a86381911de7c9d269f2c498bbf,2020-08-19 15:15:00 UTC,cool,auto,753,760,760,TN,Thompsons Station,9,False,False,False,Gas
1527736,aefdeb2a5eaa198d27816bbad4c0fae07a7579ab,2020-08-17 11:45:00 UTC,cool,auto,757,760,760,TN,Memphis,10,False,False,False,Gas
1527737,45683302029a1670f2ddac16a4f050b94ae12233,2020-08-11 13:45:00 UTC,cool,hold,755,760,760,TN,Lakeland,20,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/TN/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/TN/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
TN_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/TN/aug/" + file)
    TN_aug = pd.concat([TN_aug, df])
    
TN_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0113bf09dfae09204aefa3924186bc90a90264e0,aug,2017,cool,auto,Collierville,752.340909,750.000000,713.409091,30.0,False,False,False
1,0113bf09dfae09204aefa3924186bc90a90264e0,aug,2017,cool,hold,Collierville,758.923588,757.408638,749.132890,30.0,False,False,False
2,01691a75c606f8718e28a6b747c4cb9675a9060b,aug,2017,cool,auto,Collierville,763.702532,769.981013,789.943038,20.0,False,False,False
3,01691a75c606f8718e28a6b747c4cb9675a9060b,aug,2017,cool,hold,Collierville,765.026371,771.344585,769.961674,20.0,False,False,False
4,01b244c6278bc022e1b13b3826143b733278d6bb,aug,2017,cool,auto,Knoxville,706.891892,701.675676,680.000000,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1720,ff7e0f2e982b085bf01d460db7670249e73af621,aug,2020,cool,hold,Knoxville,727.071776,724.905109,724.708029,46.0,False,False,True
1721,ff7e608937812d31b26012c64a6fc58ef9e364a2,aug,2020,cool,hold,Ooltewah,757.639560,756.023443,756.023443,35.0,True,False,False
1722,ff8eb8fa2d95dad875f0fea5d0685ea03ee73961,aug,2020,cool,hold,Clarksville,709.295217,705.345581,705.345581,5.0,True,False,True
1723,ffed1fbb1ffd47bca87ef2ec871ff17d145285e5,aug,2020,auto,auto,Chattanooga,730.363636,720.030303,656.535354,81.0,False,False,True


In [160]:
TN_aug.to_csv("Scraper_Output/State_Month_Day/TN/TN_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/TN-day/2017-dec-day-TN.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,827c9421c79ee7b4c2340a3bcaa1b171d1546ab8,2017-12-21 14:40:00 UTC,heat,auto,675,750,700,TN,Paris,0,False,False,False,Gas
1,827c9421c79ee7b4c2340a3bcaa1b171d1546ab8,2017-12-10 19:35:00 UTC,heat,hold,697,700,700,TN,Paris,0,False,False,False,Gas
2,426f6dd48b9eb72d1182acf8e4346b1d44ac8b85,2017-12-16 18:50:00 UTC,heat,hold,646,650,650,TN,Bells,0,True,False,True,Electric
4,e27f9c0ffe8116f680d0d2962499526430c1a859,2017-12-30 17:35:00 UTC,auto,auto,701,750,700,TN,gallatin,5,False,False,False,Gas
5,d8529981da1ca15fa2a446d7fff840a33a75c195,2017-12-18 13:05:00 UTC,auto,hold,675,780,680,TN,Johnson City,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
723124,71a80c838c26d0403faca21e6d85025070c94f2c,2017-12-08 17:55:00 UTC,auto,auto,756,760,700,TN,Thompson's Station,0,True,False,True,Electric
723125,71a80c838c26d0403faca21e6d85025070c94f2c,2017-12-27 19:50:00 UTC,auto,auto,746,780,650,TN,Thompson's Station,0,True,False,True,Electric
723126,0def9c996ebdfa7e09e06e30b7a7e59362dc9d5d,2017-12-15 16:35:00 UTC,auto,auto,658,780,650,TN,Thompson's Station,7,False,False,False,Gas
723127,5f58cfda6e1406373cd9cdad2e96c80a4e4891a4,2017-12-12 18:20:00 UTC,heat,auto,745,740,740,TN,Thompson's Station,9,True,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/TN/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/TN-day/2018-dec-day-TN.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dc6a32f5fd6e8822623115dd1ce18158f4504cae,2018-12-27 16:40:00 UTC,heat,hold,719,715,715,TN,Tullahoma,15,True,False,True,Electric
1,d57565d4481558a61562959273181d8d691ad6ab,2018-12-10 13:35:00 UTC,heat,hold,686,712,712,TN,Clarksville,0,True,False,True,Electric
2,fd28f09a398e7c02c0fb85af9cb4561a27ed52cb,2018-12-08 15:40:00 UTC,auto,hold,667,715,665,TN,Maryville,0,True,False,True,Electric
3,b2b020737f518038013e17d316ad05fcc61740f4,2018-12-11 18:30:00 UTC,heat,hold,732,719,719,TN,Franklin,0,True,False,False,Gas
4,50404f13d71e2a1c15ab72939345c1f7a257a4f3,2018-12-16 18:25:00 UTC,heat,hold,707,719,719,TN,Memphis,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1199077,27743a6ef727b889138746b71c6830e079ff67e1,2018-12-12 14:00:00 UTC,auto,hold,698,760,710,TN,Hartsville,30,False,False,True,Electric
1199078,ddeade01f560e8ab7479dbd859f15d410e8f4d4d,2018-12-06 14:10:00 UTC,auto,hold,690,760,690,TN,Mount Juliet,0,False,False,False,Gas
1199079,255b4cc3674e694557cb069c00ddd5e3f6636f4f,2018-12-11 14:00:00 UTC,heat,auto,700,760,700,TN,Maryville,5,False,False,False,Gas
1199080,0c77d72ad97b801de3a6d95a360b16b884a95d12,2018-12-07 18:00:00 UTC,auto,hold,663,760,710,TN,Bartlett,40,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/TN/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/TN-day/2019-dec-day-TN.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,148456e4a45defd8a615d4629e392ed442bfac3b,2019-12-15 14:40:00 UTC,heat,auto,696,714,700,TN,Maryville,25,True,False,False,Gas
1,b09f8f08aa5d1c712682571c01f1371ffa8d8498,2019-12-01 17:55:00 UTC,heat,hold,688,704,617,TN,Dandridge,9,False,False,True,Electric
2,dbac2b2d6f63eacb587b6144d2894e59054e7178,2019-12-29 13:50:00 UTC,auto,auto,707,840,710,TN,Murfreesboro,0,True,False,False,Gas
3,933ca47a66c79941be6d67c0efb190beb8d27e6c,2019-12-24 18:30:00 UTC,heat,hold,723,727,727,TN,Chattanooga,0,False,False,False,Gas
5,3aa4c87a3258ebf2755dab92776b944c2bbd52d7,2019-12-25 19:05:00 UTC,auto,hold,689,746,676,TN,Franklin,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1335030,622816865f2d0112a6f72200f4c66a676c8593b3,2019-12-18 16:35:00 UTC,auto,auto,676,760,700,TN,Pikeville,0,True,False,True,Electric
1335031,fe56b83013d80e875dc61cf66063b05b7fa71ed5,2019-12-14 16:30:00 UTC,auto,hold,706,760,710,TN,Murfreesboro,0,True,False,True,Electric
1335032,b51a531cdbae140dfd195358a9a9747f10f28476,2019-12-01 19:10:00 UTC,auto,auto,705,760,710,TN,Medina,15,False,False,False,Gas
1335033,f50117f80fdc0954db30203dfb08aafa6cfa8e71,2019-12-16 10:40:00 UTC,auto,hold,697,760,700,TN,Columbia,5,False,False,True,Electric


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/TN/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/TN-day/2020-dec-day-TN.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,181d67242f8f657e2c49dc7583d6a96c6ee007ed,2020-12-01 19:05:00 UTC,heat,auto,675,739,680,TN,Collierville,10,False,False,False,Gas
1,b09f8f08aa5d1c712682571c01f1371ffa8d8498,2020-12-12 14:35:00 UTC,auto,hold,652,727,657,TN,Dandridge,9,False,False,True,Electric
4,5b7106362b019089975c9171b26df46474be4e3a,2020-12-13 18:10:00 UTC,heat,hold,655,676,658,TN,Nolensville,0,False,False,False,Gas
5,46ca62bd098ce2b85367fad4fe7df4098341ebab,2020-12-07 12:15:00 UTC,heat,hold,661,665,665,TN,Friendsville,30,True,False,True,Electric
6,4179a91750a51474c92a7a842172453e17ad7451,2020-12-20 11:30:00 UTC,auto,hold,713,765,715,TN,Dandridge,19,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1253697,0def9c996ebdfa7e09e06e30b7a7e59362dc9d5d,2020-12-16 17:30:00 UTC,auto,auto,710,760,710,TN,Thompson's Station,7,False,False,False,Gas
1253698,fe3fa4088ba1e307e64d0e7dc65c08fd1d3d9ee5,2020-12-25 15:05:00 UTC,heat,hold,742,760,760,TN,Germantown,20,False,False,False,Gas
1253699,81312a9e5baa3bed4115dc2cdfc17d49acd71f54,2020-12-15 15:10:00 UTC,auto,hold,673,760,680,TN,Hixson,0,True,False,False,Gas
1253700,4d1364e287f7bde199040162009972ddb198fdb3,2020-12-21 18:35:00 UTC,auto,auto,702,760,700,TN,Murfreesboro,10,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/TN/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/TN/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
TN_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/TN/dec/" + file)
    TN_dec = pd.concat([TN_dec, df])
    
TN_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00d762465f20f9432cfd998744ab841e82cb15bd,dec,2017,auto,hold,Cleveland,716.666667,770.000000,720.000000,20.0,False,False,False
1,0113bf09dfae09204aefa3924186bc90a90264e0,dec,2017,heat,hold,Collierville,675.375000,700.000000,650.000000,30.0,False,False,False
2,01691a75c606f8718e28a6b747c4cb9675a9060b,dec,2017,auto,auto,Collierville,675.286689,758.501706,670.102389,20.0,False,False,False
3,01691a75c606f8718e28a6b747c4cb9675a9060b,dec,2017,auto,hold,Collierville,682.745652,757.054348,686.358696,20.0,False,False,False
4,01691a75c606f8718e28a6b747c4cb9675a9060b,dec,2017,heat,auto,Collierville,642.033333,650.000000,630.000000,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1977,ff7e608937812d31b26012c64a6fc58ef9e364a2,dec,2020,heat,hold,Ooltewah,672.850463,669.882056,669.882056,35.0,True,False,False
1978,ff8eb8fa2d95dad875f0fea5d0685ea03ee73961,dec,2020,heat,hold,Clarksville,704.402819,706.339021,706.339021,5.0,True,False,True
1979,ffed1fbb1ffd47bca87ef2ec871ff17d145285e5,dec,2020,auto,auto,Chattanooga,641.500000,775.000000,670.000000,81.0,False,False,True
1980,ffed1fbb1ffd47bca87ef2ec871ff17d145285e5,dec,2020,heat,auto,Chattanooga,666.868512,742.352941,680.539792,81.0,False,False,True


In [187]:
TN_dec.to_csv("Scraper_Output/State_Month_Day/TN/TN_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/TN/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
TN_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/TN/" + file)
    TN_all = pd.concat([TN_all, df])
    
TN_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0113bf09dfae09204aefa3924186bc90a90264e0,aug,2017,cool,auto,Collierville,752.340909,750.000000,713.409091,30.0,False,False,False
1,0113bf09dfae09204aefa3924186bc90a90264e0,aug,2017,cool,hold,Collierville,758.923588,757.408638,749.132890,30.0,False,False,False
2,01691a75c606f8718e28a6b747c4cb9675a9060b,aug,2017,cool,auto,Collierville,763.702532,769.981013,789.943038,20.0,False,False,False
3,01691a75c606f8718e28a6b747c4cb9675a9060b,aug,2017,cool,hold,Collierville,765.026371,771.344585,769.961674,20.0,False,False,False
4,01b244c6278bc022e1b13b3826143b733278d6bb,aug,2017,cool,auto,Knoxville,706.891892,701.675676,680.000000,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7106,fe590a938128622a7ba7d9a9331332376f3ec8a7,jun,2021,auto,hold,Knoxville,734.788121,733.413121,691.340426,8.0,False,False,False
7107,febb646f0b318176433f35c33efa01976c64d4df,jun,2021,auto,hold,Gatlinburg,706.589008,704.954600,643.037037,19.0,True,False,True
7108,ff0b01aa1f5f15ebcd7c7e02924e73e981f6cece,jun,2021,cool,hold,Bartlett,721.375000,690.000000,690.000000,40.0,True,False,False
7109,ff7e608937812d31b26012c64a6fc58ef9e364a2,jun,2021,cool,hold,Ooltewah,745.349280,750.300272,750.300272,35.0,True,False,False


In [190]:
TN_all.to_csv("Scraper_Output/State_Month_Day/TN_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mTNe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['TN']
Unique jan_2018: ['TN']
Unique jan_2019: ['TN']
Unique jan_2020: ['TN']
Unique jan_2021: ['TN']
Unique feb_2017: ['TN']
Unique feb_2018: ['TN']
Unique feb_2019: ['TN']
Unique feb_2020: ['TN']
Unique feb_2021: ['TN']
Unique jun_2017: ['TN']
Unique jun_2018: ['TN']
Unique jun_2019: ['TN']
Unique jun_2020: ['TN']
Unique jun_2021: ['TN']
Unique jul_2017: ['TN']
Unique jul_2018: ['TN']
Unique jul_2019: ['TN']
Unique jul_2020: ['TN']
Unique jul_2021: ['TN']
Unique aug_2017: ['TN']
Unique aug_2018: ['TN']
Unique aug_2019: ['TN']
Unique aug_2020: ['TN']
Unique dec_2017: ['TN']
Unique dec_2018: ['TN']
Unique dec_2019: ['TN']
Unique dec_2020: ['TN']
