# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/AR-day/2017-jan-day-AR.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b9b5b3781739364abe020b01a74a6964c9cbb396,2017-01-13 12:30:00 UTC,auto,auto,683,743,692,AR,Texarkana,20,False,False,False,Gas
1,b9b5b3781739364abe020b01a74a6964c9cbb396,2017-01-21 18:25:00 UTC,auto,hold,713,715,645,AR,Texarkana,20,False,False,False,Gas
2,b9b5b3781739364abe020b01a74a6964c9cbb396,2017-01-12 11:35:00 UTC,auto,auto,673,675,625,AR,Texarkana,20,False,False,False,Gas
3,b9b5b3781739364abe020b01a74a6964c9cbb396,2017-01-11 13:05:00 UTC,auto,auto,680,715,665,AR,Texarkana,20,False,False,False,Gas
4,b9b5b3781739364abe020b01a74a6964c9cbb396,2017-01-21 17:50:00 UTC,auto,hold,705,715,645,AR,Texarkana,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137879,89b44c708a762cb01616ed8e55cc8fe660cffb43,2017-01-04 16:00:00 UTC,heat,hold,757,760,760,AR,Magnolia,0,False,False,False,Gas
137880,89b44c708a762cb01616ed8e55cc8fe660cffb43,2017-01-04 12:30:00 UTC,heat,hold,762,760,760,AR,Magnolia,0,False,False,False,Gas
137881,89b44c708a762cb01616ed8e55cc8fe660cffb43,2017-01-04 16:25:00 UTC,heat,hold,749,760,760,AR,Magnolia,0,False,False,False,Gas
137882,5233d9c1c5df89079125d71e93d1eb7217733b1d,2017-01-29 16:20:00 UTC,heat,hold,743,760,760,AR,Pine Bluff,0,False,False,True,Electric


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
07232c051ae191506a33f7b2cf57703c45edc81a,Jan,2017,auto,hold,Little Rock,667.268957,734.389501,632.620220,45.0,False,False,False
07232c051ae191506a33f7b2cf57703c45edc81a,Jan,2017,cool,hold,Little Rock,706.096491,722.989035,722.885965,45.0,False,False,False
07232c051ae191506a33f7b2cf57703c45edc81a,Jan,2017,heat,hold,Little Rock,655.813885,657.412112,650.269572,45.0,False,False,False
0919426c19f28902e7707b4be2e5c1ecdd2d2272,Jan,2017,auto,hold,Roland,663.243902,780.000000,662.243902,15.0,True,False,True
095ef9d837869bd8a6ebc999468cf2b0ed3667c3,Jan,2017,auto,auto,Little Rock,658.075314,729.974895,640.360879,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fd97c7fd6d2e6f08750babb9805babacae1c720c,Jan,2017,cool,auto,Little Rock,725.626253,762.975179,650.037709,75.0,True,False,False
fd97c7fd6d2e6f08750babb9805babacae1c720c,Jan,2017,cool,hold,Little Rock,729.516556,735.039735,732.470199,75.0,True,False,False
fff29f4a20cf21ebb64f5555af72670fb05a837b,Jan,2017,cool,auto,Marion,694.200000,690.485714,700.257143,0.0,False,False,False
fff29f4a20cf21ebb64f5555af72670fb05a837b,Jan,2017,heat,auto,Marion,692.707801,695.770922,689.178723,0.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/AR/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/AR-day/2018-jan-day-AR.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3f73ed2afc36c17cff2ea447c5af49d5b12a26c1,2018-01-12 15:55:00 UTC,heat,auto,762,800,800,AR,Forrest City,40,True,False,True,Electric
2,712d73c04bad3f434d1405fd0e26859e508a8037,2018-01-19 13:10:00 UTC,heat,auto,672,749,671,AR,Bella Vista,15,True,False,True,Electric
3,3f73ed2afc36c17cff2ea447c5af49d5b12a26c1,2018-01-16 18:25:00 UTC,heat,auto,786,800,800,AR,Forrest City,40,True,False,True,Electric
4,09d3f117f9f00618d1e0a7f403505c4ee30080b6,2018-01-17 10:45:00 UTC,heat,auto,660,664,664,AR,Hamburg,30,False,False,False,Gas
5,3f73ed2afc36c17cff2ea447c5af49d5b12a26c1,2018-01-19 16:35:00 UTC,heat,auto,795,800,800,AR,Forrest City,40,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455735,ff13f3ba234036121e341eb476cfa5d2c775350d,2018-01-17 14:40:00 UTC,heat,auto,763,760,760,AR,Little Rock,40,False,False,False,Gas
455736,f4d6abcef06426574cc8b5a80ca5a30116fafa5b,2018-01-10 19:35:00 UTC,heat,hold,755,760,760,AR,Fayetteville,0,False,False,False,Gas
455737,f4d6abcef06426574cc8b5a80ca5a30116fafa5b,2018-01-10 17:15:00 UTC,heat,hold,758,760,760,AR,Fayetteville,0,False,False,False,Gas
455738,f4d6abcef06426574cc8b5a80ca5a30116fafa5b,2018-01-10 19:05:00 UTC,heat,hold,761,760,760,AR,Fayetteville,0,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/AR/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/AR-day/2019-jan-day-AR.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a578bb21941003e7a58a399375b20f274fab5233,2019-01-24 16:25:00 UTC,heat,hold,635,650,635,AR,Lamar,5,True,False,True,Electric
1,6992fa7f5152d432d58ea4a195774e2dec813d42,2019-01-12 19:20:00 UTC,heat,hold,678,656,656,AR,Paragould,0,False,False,True,Electric
2,a578bb21941003e7a58a399375b20f274fab5233,2019-01-27 18:35:00 UTC,heat,hold,639,650,635,AR,Lamar,5,True,False,True,Electric
3,a578bb21941003e7a58a399375b20f274fab5233,2019-01-13 15:55:00 UTC,heat,hold,635,650,635,AR,Lamar,5,True,False,True,Electric
4,b9b5b3781739364abe020b01a74a6964c9cbb396,2019-01-05 19:50:00 UTC,heat,hold,677,682,682,AR,Texarkana,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
688593,463b4360478952c25707ff80a91b64ae5b9be258,2019-01-04 13:20:00 UTC,heat,auto,757,760,760,AR,Hot Springs Village,0,True,False,True,Electric
688594,463b4360478952c25707ff80a91b64ae5b9be258,2019-01-04 16:25:00 UTC,heat,auto,762,760,760,AR,Hot Springs Village,0,True,False,True,Electric
688595,463b4360478952c25707ff80a91b64ae5b9be258,2019-01-04 16:50:00 UTC,heat,auto,770,760,760,AR,Hot Springs Village,0,True,False,True,Electric
688596,463b4360478952c25707ff80a91b64ae5b9be258,2019-01-04 13:35:00 UTC,heat,auto,761,760,760,AR,Hot Springs Village,0,True,False,True,Electric


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/AR/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/AR-day/2020-jan-day-AR.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2a4a872961e54abc937085b49e51265949aad24b,2020-01-13 13:40:00 UTC,heat,hold,730,735,735,AR,pinebluff,0,False,False,False,Gas
1,5a0aba1d3a7575ceeefa979cce887c29c6cfefca,2020-01-07 12:55:00 UTC,heat,hold,714,719,719,AR,Paragould,9,True,False,True,Electric
2,2a4a872961e54abc937085b49e51265949aad24b,2020-01-24 16:45:00 UTC,heat,hold,741,745,745,AR,pinebluff,0,False,False,False,Gas
3,5a0aba1d3a7575ceeefa979cce887c29c6cfefca,2020-01-07 16:05:00 UTC,heat,hold,713,719,719,AR,Paragould,9,True,False,True,Electric
4,2a4a872961e54abc937085b49e51265949aad24b,2020-01-24 18:35:00 UTC,heat,hold,745,745,745,AR,pinebluff,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
635870,eca8b396993adbc49377e788e8109db435efc6f2,2020-01-14 14:40:00 UTC,auto,auto,761,810,760,AR,Little rock,20,False,False,False,Gas
635871,eca8b396993adbc49377e788e8109db435efc6f2,2020-01-01 13:30:00 UTC,auto,auto,749,810,760,AR,Little rock,20,False,False,False,Gas
635872,eca8b396993adbc49377e788e8109db435efc6f2,2020-01-01 13:20:00 UTC,auto,auto,734,810,760,AR,Little rock,20,False,False,False,Gas
635873,eca8b396993adbc49377e788e8109db435efc6f2,2020-01-01 14:00:00 UTC,auto,auto,750,810,760,AR,Little rock,20,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/AR/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/AR-day/2021-jan-day-AR.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2a4a872961e54abc937085b49e51265949aad24b,2021-01-03 15:50:00 UTC,heat,hold,743,745,745,AR,pinebluff,0,False,False,False,Gas
1,2a4a872961e54abc937085b49e51265949aad24b,2021-01-03 19:25:00 UTC,heat,hold,748,745,745,AR,pinebluff,0,False,False,False,Gas
2,5a0aba1d3a7575ceeefa979cce887c29c6cfefca,2021-01-18 15:00:00 UTC,heat,hold,721,729,729,AR,Paragould,9,True,False,True,Electric
3,2a4a872961e54abc937085b49e51265949aad24b,2021-01-02 16:35:00 UTC,heat,hold,743,745,745,AR,pinebluff,0,False,False,False,Gas
4,2a4a872961e54abc937085b49e51265949aad24b,2021-01-03 14:05:00 UTC,heat,hold,741,745,745,AR,pinebluff,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
376868,920be5514ed3670fd3b8764d29b8396f611c91d3,2021-01-04 15:05:00 UTC,heat,hold,764,760,760,AR,Little Rock,0,False,False,False,Gas
376869,920be5514ed3670fd3b8764d29b8396f611c91d3,2021-01-04 16:00:00 UTC,heat,hold,754,760,760,AR,Little Rock,0,False,False,False,Gas
376870,15dd1cd0cea3a01f8309bd4c36d5d4a28de6f668,2021-01-15 15:00:00 UTC,heat,hold,764,760,760,AR,Little Rock,0,False,False,False,Gas
376871,f97611b3f1b459289d6fb21a617d90ab8e08a331,2021-01-02 16:30:00 UTC,heat,hold,744,760,760,AR,Little Rock,20,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/AR/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AR/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AR_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AR/jan/" + file)
    AR_jan = pd.concat([AR_jan, df])
    
AR_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,07232c051ae191506a33f7b2cf57703c45edc81a,Jan,2017,auto,hold,Little Rock,667.268957,734.389501,632.620220,45.0,False,False,False
1,07232c051ae191506a33f7b2cf57703c45edc81a,Jan,2017,cool,hold,Little Rock,706.096491,722.989035,722.885965,45.0,False,False,False
2,07232c051ae191506a33f7b2cf57703c45edc81a,Jan,2017,heat,hold,Little Rock,655.813885,657.412112,650.269572,45.0,False,False,False
3,0919426c19f28902e7707b4be2e5c1ecdd2d2272,Jan,2017,auto,hold,Roland,663.243902,780.000000,662.243902,15.0,True,False,True
4,095ef9d837869bd8a6ebc999468cf2b0ed3667c3,Jan,2017,auto,auto,Little Rock,658.075314,729.974895,640.360879,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
459,fd97c7fd6d2e6f08750babb9805babacae1c720c,Jan,2021,auto,hold,Little Rock,719.527094,773.044335,722.502463,75.0,True,False,False
460,fdaa1ea391875e148146e9149f3ba56a69aaa10f,Jan,2021,heat,hold,Rogers,686.676039,691.028117,690.393643,5.0,False,False,True
461,fdb0a5f08eff61d50b5f015f77ffc79039cd2fd6,Jan,2021,heat,hold,Little Rock,657.986792,658.832704,658.832704,50.0,False,False,False
462,fe5438cc1ceceb7fb36b7eadefe51bd3d01a7da4,Jan,2021,auto,hold,Fayetteville,688.794311,740.000000,690.000000,7.0,False,False,False


In [34]:
AR_jan.to_csv("Scraper_Output/State_Month_Day/AR/AR_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/AR-day/2017-feb-day-AR.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3f73ed2afc36c17cff2ea447c5af49d5b12a26c1,2017-02-20 15:00:00 UTC,heat,auto,701,720,682,AR,Forrest City,40,True,False,True,Electric
1,3f73ed2afc36c17cff2ea447c5af49d5b12a26c1,2017-02-24 19:10:00 UTC,heat,auto,737,712,712,AR,Forrest City,40,True,False,True,Electric
2,b9b5b3781739364abe020b01a74a6964c9cbb396,2017-02-07 11:30:00 UTC,auto,hold,693,695,645,AR,Texarkana,20,False,False,False,Gas
3,524c2b1513fde9bd4b78c791c44b0ec58b97b946,2017-02-19 13:35:00 UTC,heat,hold,682,712,712,AR,Charlotte,0,False,False,True,Electric
4,478499cec6167b852381722392a6e060fabe7320,2017-02-11 16:55:00 UTC,auto,hold,698,725,665,AR,Benton,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
137916,36f37582c9b78521db633ab4b5b6de6d0708f2b8,2017-02-23 11:35:00 UTC,cool,hold,705,760,760,AR,Little Rock,45,False,False,False,Gas
137917,36f37582c9b78521db633ab4b5b6de6d0708f2b8,2017-02-23 19:00:00 UTC,cool,hold,723,760,760,AR,Little Rock,45,False,False,False,Gas
137918,36f37582c9b78521db633ab4b5b6de6d0708f2b8,2017-02-23 09:50:00 UTC,cool,hold,709,760,760,AR,Little Rock,45,False,False,False,Gas
137919,36f37582c9b78521db633ab4b5b6de6d0708f2b8,2017-02-23 19:30:00 UTC,cool,hold,723,760,760,AR,Little Rock,45,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/AR/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/AR-day/2018-feb-day-AR.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cb4a30a922ed3455ce77d17d7a43ad04b1bed3f5,2018-02-25 13:20:00 UTC,auto,hold,640,715,645,AR,Prattsville,0,False,False,False,Gas
1,cb4a30a922ed3455ce77d17d7a43ad04b1bed3f5,2018-02-23 16:05:00 UTC,auto,hold,659,715,645,AR,Prattsville,0,False,False,False,Gas
2,cb4a30a922ed3455ce77d17d7a43ad04b1bed3f5,2018-02-25 14:40:00 UTC,auto,hold,643,715,645,AR,Prattsville,0,False,False,False,Gas
3,5c3e8064391a203cfda8c167672969d413f59739,2018-02-18 17:50:00 UTC,heat,hold,689,684,684,AR,Van Buren,20,False,False,False,Gas
4,cb4a30a922ed3455ce77d17d7a43ad04b1bed3f5,2018-02-22 19:15:00 UTC,auto,hold,644,715,645,AR,Prattsville,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
435805,7999dc5589b3f4c9b6a7600989e6d16fe40ff784,2018-02-17 15:20:00 UTC,heat,hold,752,760,760,AR,Little Rock,10,False,False,False,Gas
435806,1ae4e1e18ec80cff77129b3cc72bcab014984aa4,2018-02-17 18:40:00 UTC,heat,hold,758,760,760,AR,Little Rock,35,False,False,False,Gas
435807,54c5cdf8b3cb4b70df2ea87725ff91debefcf60c,2018-02-18 16:55:00 UTC,heat,hold,813,760,760,AR,Little Rock,29,True,False,False,Gas
435808,109700d1afdc56bab59a41c2c09eed9beac43e4f,2018-02-02 17:25:00 UTC,heat,auto,753,760,760,AR,Fayetteville,10,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/AR/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/AR-day/2019-feb-day-AR.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a578bb21941003e7a58a399375b20f274fab5233,2019-02-05 13:00:00 UTC,heat,hold,668,650,635,AR,Lamar,5,True,False,True,Electric
1,a578bb21941003e7a58a399375b20f274fab5233,2019-02-22 14:55:00 UTC,heat,hold,636,650,635,AR,Lamar,5,True,False,True,Electric
2,a578bb21941003e7a58a399375b20f274fab5233,2019-02-22 19:40:00 UTC,heat,hold,637,650,635,AR,Lamar,5,True,False,True,Electric
3,791959a4f08b4468ccc706bf3df93b705b83da12,2019-02-17 12:45:00 UTC,auto,hold,685,732,682,AR,Monticello,15,False,False,True,Electric
5,a578bb21941003e7a58a399375b20f274fab5233,2019-02-01 13:35:00 UTC,heat,hold,634,650,635,AR,Lamar,5,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455887,ced8fbad97d20b77e4b0a46da1b452f8a69905df,2019-02-02 14:00:00 UTC,heat,hold,747,760,760,AR,Heber Springs,20,False,False,False,Gas
455888,ced8fbad97d20b77e4b0a46da1b452f8a69905df,2019-02-02 12:45:00 UTC,heat,hold,753,760,760,AR,Heber Springs,20,False,False,False,Gas
455889,ced8fbad97d20b77e4b0a46da1b452f8a69905df,2019-02-08 16:40:00 UTC,heat,auto,757,734,760,AR,Heber Springs,20,False,False,False,Gas
455890,ced8fbad97d20b77e4b0a46da1b452f8a69905df,2019-02-10 13:10:00 UTC,heat,auto,753,734,760,AR,Heber Springs,20,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/AR/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/AR-day/2020-feb-day-AR.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e21f08b13c66041dc4909dd3205ecb07409265bb,2020-02-22 15:55:00 UTC,heat,hold,724,726,726,AR,Hampton,29,True,False,True,Electric
1,5a0aba1d3a7575ceeefa979cce887c29c6cfefca,2020-02-19 13:00:00 UTC,heat,hold,706,719,719,AR,Paragould,9,True,False,True,Electric
2,5a0aba1d3a7575ceeefa979cce887c29c6cfefca,2020-02-27 15:55:00 UTC,heat,hold,717,719,719,AR,Paragould,9,True,False,True,Electric
3,5a0aba1d3a7575ceeefa979cce887c29c6cfefca,2020-02-27 14:05:00 UTC,heat,hold,721,719,719,AR,Paragould,9,True,False,True,Electric
4,2407d45e869c794aa6868c5dc62960ca33d93cca,2020-02-10 15:30:00 UTC,heat,auto,632,753,616,AR,Booneville,70,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
567865,f97611b3f1b459289d6fb21a617d90ab8e08a331,2020-02-20 19:30:00 UTC,heat,auto,760,760,760,AR,Little Rock,20,False,False,False,Gas
567866,f97611b3f1b459289d6fb21a617d90ab8e08a331,2020-02-20 19:00:00 UTC,heat,auto,749,760,760,AR,Little Rock,20,False,False,False,Gas
567867,eca8b396993adbc49377e788e8109db435efc6f2,2020-02-21 14:40:00 UTC,auto,auto,701,810,760,AR,Little rock,20,False,False,False,Gas
567868,557fb13250046ef609e7f9ec7e6f26353082868d,2020-02-16 18:45:00 UTC,heat,hold,759,760,760,AR,Jacksonville,0,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/AR/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/AR-day/2021-feb-day-AR.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,607db2c120782353da1539a008956301341d54c9,2021-02-14 19:15:00 UTC,heat,hold,688,697,697,AR,Hensley,10,True,False,True,Electric
1,2a4a872961e54abc937085b49e51265949aad24b,2021-02-07 13:10:00 UTC,heat,hold,739,745,745,AR,pinebluff,0,False,False,False,Gas
2,607db2c120782353da1539a008956301341d54c9,2021-02-21 13:10:00 UTC,heat,hold,643,662,647,AR,Hensley,10,True,False,True,Electric
3,2a4a872961e54abc937085b49e51265949aad24b,2021-02-13 15:10:00 UTC,heat,hold,747,755,755,AR,pinebluff,0,False,False,False,Gas
4,a578bb21941003e7a58a399375b20f274fab5233,2021-02-27 16:30:00 UTC,heat,hold,716,721,721,AR,Lamar,5,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
338227,920be5514ed3670fd3b8764d29b8396f611c91d3,2021-02-15 17:50:00 UTC,heat,hold,750,760,760,AR,Little Rock,0,False,False,False,Gas
338228,c525e899550a37b7e76978ada0e27448c86ee29c,2021-02-04 19:50:00 UTC,auto,hold,753,810,760,AR,Little Rock,30,False,False,False,Gas
338229,920be5514ed3670fd3b8764d29b8396f611c91d3,2021-02-06 18:45:00 UTC,heat,hold,754,760,760,AR,Little Rock,0,False,False,False,Gas
338230,54c5cdf8b3cb4b70df2ea87725ff91debefcf60c,2021-02-04 16:40:00 UTC,heat,hold,760,760,760,AR,Little Rock,29,True,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/AR/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AR/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AR_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AR/feb/" + file)
    AR_feb = pd.concat([AR_feb, df])
    
AR_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,07232c051ae191506a33f7b2cf57703c45edc81a,feb,2017,auto,hold,Little Rock,679.215900,744.928452,631.644770,45.0,False,False,False
1,07232c051ae191506a33f7b2cf57703c45edc81a,feb,2017,cool,hold,Little Rock,722.781333,734.677333,713.077333,45.0,False,False,False
2,07232c051ae191506a33f7b2cf57703c45edc81a,feb,2017,heat,hold,Little Rock,661.705736,652.812968,642.847880,45.0,False,False,False
3,08fe5124a59a024f93eccf6aaec19997eb295483,feb,2017,heat,auto,Greenland,641.060976,708.335366,646.634146,10.0,False,False,False
4,095ef9d837869bd8a6ebc999468cf2b0ed3667c3,feb,2017,auto,auto,Little Rock,647.611111,730.000000,640.000000,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
488,fdc946bc185d3a8916045b8ed6b3f8cd4823e87b,feb,2021,auto,hold,Fayetteville,671.258621,760.000000,650.000000,25.0,True,False,False
489,fe5438cc1ceceb7fb36b7eadefe51bd3d01a7da4,feb,2021,auto,hold,Fayetteville,688.196262,740.000000,690.000000,7.0,False,False,False
490,fe5438cc1ceceb7fb36b7eadefe51bd3d01a7da4,feb,2021,heat,hold,Fayetteville,683.973022,699.732014,683.528777,7.0,False,False,False
491,fff29f4a20cf21ebb64f5555af72670fb05a837b,feb,2021,cool,hold,Marion,728.500000,684.625000,684.625000,0.0,False,False,False


In [67]:
AR_feb.to_csv("Scraper_Output/State_Month_Day/AR/AR_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/AR-day/2017-jun-day-AR.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6992fa7f5152d432d58ea4a195774e2dec813d42,2017-06-25 15:10:00 UTC,cool,hold,734,745,745,AR,Paragould,0,False,False,True,Electric
1,b9b5b3781739364abe020b01a74a6964c9cbb396,2017-06-18 19:55:00 UTC,cool,auto,703,699,699,AR,Texarkana,20,False,False,False,Gas
2,08fe5124a59a024f93eccf6aaec19997eb295483,2017-06-06 16:00:00 UTC,cool,hold,743,765,706,AR,Greenland,10,False,False,False,Gas
3,cc66f26c734305a0adc0b5eac05a782c9d4a7028,2017-06-20 17:55:00 UTC,cool,auto,723,736,736,AR,Vilonia,0,False,False,False,Gas
4,6992fa7f5152d432d58ea4a195774e2dec813d42,2017-06-24 16:05:00 UTC,cool,hold,734,745,745,AR,Paragould,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250931,e5d55939ec63192c4be5d8af1d9f1e6c42ec53fb,2017-06-04 16:30:00 UTC,cool,hold,764,760,760,AR,Little Rock,25,False,False,True,Electric
250932,2fdae69166f873813b4fde33534755ea8d0f20fa,2017-06-02 19:25:00 UTC,cool,auto,752,750,760,AR,Little Rock,0,False,False,False,Gas
250933,c4bc0fd34599db54045a86c96c622b5f04b2109f,2017-06-21 14:05:00 UTC,cool,auto,737,750,760,AR,Little Rock,0,False,False,False,Gas
250934,9f80637908e2f872884b42519864c75e40f6a6c2,2017-06-04 13:55:00 UTC,cool,hold,757,760,760,AR,Russellville,25,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/AR/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/AR-day/2018-jun-day-AR.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6774f0dcb4aaa1d734aa94eb8da43b8f92ed4445,2018-06-02 19:25:00 UTC,cool,hold,763,765,765,AR,Siloam Springs,50,False,False,False,Gas
1,c87ca13bc1b33ae084aa90c2e0892eefa204f640,2018-06-21 12:10:00 UTC,cool,hold,689,737,737,AR,Ft.Smith,0,False,False,True,Electric
2,6774f0dcb4aaa1d734aa94eb8da43b8f92ed4445,2018-06-03 13:50:00 UTC,cool,hold,740,775,775,AR,Siloam Springs,50,False,False,False,Gas
3,6774f0dcb4aaa1d734aa94eb8da43b8f92ed4445,2018-06-03 15:40:00 UTC,cool,hold,750,775,775,AR,Siloam Springs,50,False,False,False,Gas
4,86a26d663b46eac4b1c87290f199fe0145d2acee,2018-06-08 17:50:00 UTC,cool,hold,715,708,708,AR,Benton,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
578298,632c230443406e647569fd1364bc2918944d6db7,2018-06-11 17:10:00 UTC,cool,hold,783,760,760,AR,Hot Springs Village,40,True,False,True,Electric
578299,632c230443406e647569fd1364bc2918944d6db7,2018-06-11 17:25:00 UTC,cool,hold,742,760,760,AR,Hot Springs Village,40,True,False,True,Electric
578300,632c230443406e647569fd1364bc2918944d6db7,2018-06-06 16:50:00 UTC,cool,hold,767,760,760,AR,Hot Springs Village,40,True,False,True,Electric
578301,632c230443406e647569fd1364bc2918944d6db7,2018-06-11 18:25:00 UTC,cool,hold,746,760,760,AR,Hot Springs Village,40,True,False,True,Electric


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/AR/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/AR-day/2019-jun-day-AR.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6d47896946b989a7d1bcd1f4d7f321ded10c62f7,2019-06-10 18:20:00 UTC,auto,hold,709,705,625,AR,Quitman,29,True,False,True,Electric
1,a130befba07d0e269738f501ce66c2ee8760f534,2019-06-29 13:40:00 UTC,auto,hold,687,685,635,AR,West Memphis,15,True,False,False,Gas
2,6caa727ae08ff02a8140f5a8e663ab6b3a7a0af0,2019-06-01 19:15:00 UTC,cool,hold,746,735,735,AR,Siloam Springs,15,True,False,True,Electric
3,683227685eb44f9d231d511e8ae5255ecc1b71fb,2019-06-02 19:25:00 UTC,cool,hold,737,731,731,AR,Crawfordsville,10,True,False,False,Gas
4,795344cde944ff88c5e1e14b1cd57e552021e5d1,2019-06-08 12:35:00 UTC,auto,auto,716,713,663,AR,Benton,9,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
743077,463b4360478952c25707ff80a91b64ae5b9be258,2019-06-13 18:40:00 UTC,cool,hold,764,760,760,AR,Hot Springs Village,0,True,False,True,Electric
743078,463b4360478952c25707ff80a91b64ae5b9be258,2019-06-30 14:55:00 UTC,cool,hold,764,760,760,AR,Hot Springs Village,0,True,False,True,Electric
743079,463b4360478952c25707ff80a91b64ae5b9be258,2019-06-16 19:45:00 UTC,cool,hold,755,760,760,AR,Hot Springs Village,0,True,False,True,Electric
743080,463b4360478952c25707ff80a91b64ae5b9be258,2019-06-02 12:05:00 UTC,cool,hold,758,760,760,AR,Hot Springs Village,0,True,False,True,Electric


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/AR/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/AR-day/2020-jun-day-AR.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cb4a30a922ed3455ce77d17d7a43ad04b1bed3f5,2020-06-19 13:05:00 UTC,cool,hold,742,745,745,AR,Prattsville,0,False,False,False,Gas
1,a578bb21941003e7a58a399375b20f274fab5233,2020-06-09 15:10:00 UTC,cool,hold,757,760,721,AR,Lamar,5,True,False,True,Electric
2,478499cec6167b852381722392a6e060fabe7320,2020-06-08 13:30:00 UTC,auto,hold,737,742,688,AR,Benton,20,False,False,False,Gas
3,3038fa2fe040155e1ba87e62936f7059a8b9ff2a,2020-06-09 17:30:00 UTC,cool,hold,699,665,665,AR,Hot Springs National,9,False,False,False,Gas
4,a578bb21941003e7a58a399375b20f274fab5233,2020-06-14 19:50:00 UTC,cool,hold,794,790,721,AR,Lamar,5,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
689621,463b4360478952c25707ff80a91b64ae5b9be258,2020-06-08 18:25:00 UTC,cool,hold,762,760,760,AR,Hot Springs Village,0,True,False,True,Electric
689622,463b4360478952c25707ff80a91b64ae5b9be258,2020-06-23 19:10:00 UTC,cool,hold,759,760,760,AR,Hot Springs Village,0,True,False,True,Electric
689623,463b4360478952c25707ff80a91b64ae5b9be258,2020-06-13 14:55:00 UTC,cool,hold,758,760,760,AR,Hot Springs Village,0,True,False,True,Electric
689624,463b4360478952c25707ff80a91b64ae5b9be258,2020-06-28 15:25:00 UTC,cool,hold,758,760,760,AR,Hot Springs Village,0,True,False,True,Electric


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/AR/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/AR-day/2021-jun-day-AR.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2a4a872961e54abc937085b49e51265949aad24b,2021-06-03 13:05:00 UTC,cool,hold,746,745,745,AR,pinebluff,0,False,False,False,Gas
2,607db2c120782353da1539a008956301341d54c9,2021-06-20 14:25:00 UTC,cool,hold,666,662,662,AR,Hensley,10,True,False,True,Electric
5,2a4a872961e54abc937085b49e51265949aad24b,2021-06-01 12:25:00 UTC,cool,hold,741,745,745,AR,pinebluff,0,False,False,False,Gas
8,607db2c120782353da1539a008956301341d54c9,2021-06-05 19:40:00 UTC,heat,hold,698,692,692,AR,Hensley,10,True,False,True,Electric
9,607db2c120782353da1539a008956301341d54c9,2021-06-20 15:00:00 UTC,cool,hold,661,662,662,AR,Hensley,10,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
367924,19fb18dfa8f57589feae60fa8cbc977094f05ae8,2021-06-03 18:25:00 UTC,cool,hold,687,770,760,AR,Fayetteville,0,False,False,False,Gas
367925,19fb18dfa8f57589feae60fa8cbc977094f05ae8,2021-06-02 19:00:00 UTC,cool,hold,674,770,760,AR,Fayetteville,0,False,False,False,Gas
367926,f6357bb0b900cd765b772509d33295a1b24934c6,2021-06-01 11:50:00 UTC,cool,hold,707,760,760,AR,Eureka Springs,20,True,False,True,Electric
367927,f6357bb0b900cd765b772509d33295a1b24934c6,2021-06-01 11:35:00 UTC,cool,hold,708,760,760,AR,Eureka Springs,20,True,False,True,Electric


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/AR/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AR/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AR_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AR/jun/" + file)
    AR_jun = pd.concat([AR_jun, df])
    
AR_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,07232c051ae191506a33f7b2cf57703c45edc81a,jun,2017,cool,hold,Little Rock,710.176034,714.977068,714.977068,45.0,False,False,False
1,08fe5124a59a024f93eccf6aaec19997eb295483,jun,2017,cool,auto,Greenland,756.731760,780.980687,738.465665,10.0,False,False,False
2,08fe5124a59a024f93eccf6aaec19997eb295483,jun,2017,cool,hold,Greenland,749.800792,764.513193,764.387863,10.0,False,False,False
3,09ebf722da318b8d0a6fa94acfd1e60747ae6338,jun,2017,auto,hold,North Little Rock,766.957895,769.778947,660.978947,55.0,False,False,False
4,0cc091f4f1fd464e50d9445540e3b069164966af,jun,2017,cool,auto,Searcy,783.207858,810.259823,757.671103,15.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
468,fd85724350332de6ad1e098cfebd71fcdfe3ca76,jun,2021,cool,hold,North Little Rock,726.670520,728.928534,728.928534,0.0,False,False,True
469,fd97c7fd6d2e6f08750babb9805babacae1c720c,jun,2021,auto,hold,Little Rock,718.945946,714.684685,664.684685,75.0,True,False,False
470,fdb0a5f08eff61d50b5f015f77ffc79039cd2fd6,jun,2021,cool,hold,Little Rock,723.988239,761.446212,761.446212,50.0,False,False,False
471,fff29f4a20cf21ebb64f5555af72670fb05a837b,jun,2021,cool,hold,Marion,715.003657,711.804075,711.804075,0.0,False,False,False


In [100]:
AR_jun.to_csv("Scraper_Output/State_Month_Day/AR/AR_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/AR-day/2017-jul-day-AR.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4108687cc37f85ffd083fab1f337ea55402fce76,2017-07-29 18:20:00 UTC,cool,hold,755,775,775,AR,Barling,5,False,False,True,Electric
1,9d1abe71cd1063dc1390986258bf0cfe0f6b9fe4,2017-07-23 14:45:00 UTC,cool,auto,738,749,729,AR,Mountain Home,20,True,False,True,Electric
2,cc66f26c734305a0adc0b5eac05a782c9d4a7028,2017-07-22 12:05:00 UTC,cool,auto,759,768,728,AR,Vilonia,0,False,False,False,Gas
3,cc66f26c734305a0adc0b5eac05a782c9d4a7028,2017-07-22 16:00:00 UTC,cool,auto,767,768,728,AR,Vilonia,0,False,False,False,Gas
4,524c2b1513fde9bd4b78c791c44b0ec58b97b946,2017-07-01 15:30:00 UTC,cool,hold,737,728,728,AR,Charlotte,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
309860,ad3737faff41196992557ba0a4cb97b6ec117a98,2017-07-31 11:30:00 UTC,cool,auto,760,760,760,AR,Cave Springs,5,False,False,False,Gas
309861,ad3737faff41196992557ba0a4cb97b6ec117a98,2017-07-31 11:25:00 UTC,cool,auto,760,760,760,AR,Cave Springs,5,False,False,False,Gas
309862,ad3737faff41196992557ba0a4cb97b6ec117a98,2017-07-31 10:45:00 UTC,cool,auto,756,760,760,AR,Cave Springs,5,False,False,False,Gas
309863,a52ac345ee6b41d1e1ce0d1f7287d0df03f024bd,2017-07-27 19:50:00 UTC,cool,hold,759,760,760,AR,Fayetteville,25,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/AR/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/AR-day/2018-jul-day-AR.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1c5aab96100b40853b1604207a5daa224e8105b2,2018-07-28 12:30:00 UTC,auto,hold,716,715,665,AR,Vilonia,0,False,False,False,Gas
1,86a26d663b46eac4b1c87290f199fe0145d2acee,2018-07-07 16:50:00 UTC,cool,hold,731,728,728,AR,Benton,10,False,False,False,Gas
2,86a26d663b46eac4b1c87290f199fe0145d2acee,2018-07-01 17:40:00 UTC,cool,hold,740,731,731,AR,Benton,10,False,False,False,Gas
3,1c5aab96100b40853b1604207a5daa224e8105b2,2018-07-29 12:10:00 UTC,auto,hold,711,715,665,AR,Vilonia,0,False,False,False,Gas
4,2b814dedffeacd2533028782d1e804318935212b,2018-07-03 15:30:00 UTC,cool,hold,772,754,754,AR,Murfreesboro,7,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
615765,632c230443406e647569fd1364bc2918944d6db7,2018-07-13 19:05:00 UTC,cool,hold,752,760,760,AR,Hot Springs Village,40,True,False,True,Electric
615766,632c230443406e647569fd1364bc2918944d6db7,2018-07-01 18:00:00 UTC,cool,hold,753,760,760,AR,Hot Springs Village,40,True,False,True,Electric
615767,632c230443406e647569fd1364bc2918944d6db7,2018-07-08 18:55:00 UTC,cool,hold,757,760,760,AR,Hot Springs Village,40,True,False,True,Electric
615768,632c230443406e647569fd1364bc2918944d6db7,2018-07-01 15:50:00 UTC,cool,hold,787,760,760,AR,Hot Springs Village,40,True,False,True,Electric


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/AR/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/AR-day/2019-jul-day-AR.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5a0aba1d3a7575ceeefa979cce887c29c6cfefca,2019-07-06 17:45:00 UTC,cool,hold,719,719,719,AR,Paragould,9,True,False,True,Electric
1,08fe5124a59a024f93eccf6aaec19997eb295483,2019-07-19 15:45:00 UTC,cool,hold,765,765,765,AR,Greenland,10,False,False,False,Gas
2,674704167e0598001e087bb7c8cda62015fdc9a9,2019-07-20 16:50:00 UTC,auto,hold,728,715,665,AR,Dardanelle,20,True,False,True,Electric
3,674704167e0598001e087bb7c8cda62015fdc9a9,2019-07-31 12:55:00 UTC,auto,hold,691,695,645,AR,Dardanelle,20,True,False,True,Electric
4,5a0aba1d3a7575ceeefa979cce887c29c6cfefca,2019-07-06 18:00:00 UTC,cool,hold,719,719,719,AR,Paragould,9,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
782341,463b4360478952c25707ff80a91b64ae5b9be258,2019-07-21 15:40:00 UTC,cool,hold,765,760,760,AR,Hot Springs Village,0,True,False,True,Electric
782342,463b4360478952c25707ff80a91b64ae5b9be258,2019-07-04 16:00:00 UTC,cool,hold,762,760,760,AR,Hot Springs Village,0,True,False,True,Electric
782343,463b4360478952c25707ff80a91b64ae5b9be258,2019-07-21 18:55:00 UTC,cool,hold,749,760,760,AR,Hot Springs Village,0,True,False,True,Electric
782344,463b4360478952c25707ff80a91b64ae5b9be258,2019-07-04 19:20:00 UTC,cool,hold,763,760,760,AR,Hot Springs Village,0,True,False,True,Electric


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/AR/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/AR-day/2020-jul-day-AR.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,683227685eb44f9d231d511e8ae5255ecc1b71fb,2020-07-30 13:35:00 UTC,cool,hold,726,721,721,AR,Crawfordsville,10,True,False,False,Gas
1,78439f97301b4cd1ec182163726a6e9ef7a255bd,2020-07-21 18:15:00 UTC,cool,hold,749,745,745,AR,Elm Springs,0,True,False,False,Gas
2,a578bb21941003e7a58a399375b20f274fab5233,2020-07-31 15:25:00 UTC,cool,hold,764,760,721,AR,Lamar,5,True,False,True,Electric
3,a578bb21941003e7a58a399375b20f274fab5233,2020-07-15 17:10:00 UTC,cool,hold,764,760,721,AR,Lamar,5,True,False,True,Electric
4,78439f97301b4cd1ec182163726a6e9ef7a255bd,2020-07-27 17:55:00 UTC,cool,auto,737,748,748,AR,Elm Springs,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
729555,463b4360478952c25707ff80a91b64ae5b9be258,2020-07-12 18:55:00 UTC,cool,hold,765,760,760,AR,Hot Springs Village,0,True,False,True,Electric
729556,463b4360478952c25707ff80a91b64ae5b9be258,2020-07-23 18:25:00 UTC,cool,hold,766,760,760,AR,Hot Springs Village,0,True,False,True,Electric
729557,463b4360478952c25707ff80a91b64ae5b9be258,2020-07-15 13:00:00 UTC,cool,hold,750,760,760,AR,Hot Springs Village,0,True,False,True,Electric
729558,463b4360478952c25707ff80a91b64ae5b9be258,2020-07-17 17:40:00 UTC,cool,hold,760,760,760,AR,Hot Springs Village,0,True,False,True,Electric


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/AR/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/AR-day/2021-jul-day-AR.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,795344cde944ff88c5e1e14b1cd57e552021e5d1,2021-07-09 12:15:00 UTC,auto,hold,717,724,664,AR,Benton,9,True,False,True,Electric
2,795344cde944ff88c5e1e14b1cd57e552021e5d1,2021-07-11 15:25:00 UTC,auto,hold,737,734,664,AR,Benton,9,True,False,True,Electric
3,e09103f985544bff2a95f20fea74ce5cf79d39d3,2021-07-10 19:25:00 UTC,auto,hold,724,712,682,AR,Lonoke,0,True,False,False,Gas
5,795344cde944ff88c5e1e14b1cd57e552021e5d1,2021-07-25 15:50:00 UTC,auto,hold,725,724,664,AR,Benton,9,True,False,True,Electric
9,795344cde944ff88c5e1e14b1cd57e552021e5d1,2021-07-24 14:20:00 UTC,auto,hold,724,724,664,AR,Benton,9,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387310,6aad1830907c895deaf274c5902abb31e92705ac,2021-07-25 17:45:00 UTC,cool,hold,702,690,690,AR,El Dorado,40,False,False,False,Gas
387311,2c43dff3628d8f675fec4376562ad4a939e1aeb0,2021-07-10 14:55:00 UTC,cool,hold,766,690,690,AR,Jonesboro,20,False,False,False,Gas
387312,2c43dff3628d8f675fec4376562ad4a939e1aeb0,2021-07-10 15:00:00 UTC,cool,hold,768,690,690,AR,Jonesboro,20,False,False,False,Gas
387313,2c43dff3628d8f675fec4376562ad4a939e1aeb0,2021-07-11 13:55:00 UTC,cool,hold,693,690,690,AR,Jonesboro,20,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/AR/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AR/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AR_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AR/jul/" + file)
    AR_jul = pd.concat([AR_jul, df])
    
AR_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,07232c051ae191506a33f7b2cf57703c45edc81a,jul,2017,auto,hold,Little Rock,761.000000,730.000000,730.000000,45.0,False,False,False
1,07232c051ae191506a33f7b2cf57703c45edc81a,jul,2017,cool,auto,Little Rock,740.231481,737.185185,667.842593,45.0,False,False,False
2,07232c051ae191506a33f7b2cf57703c45edc81a,jul,2017,cool,hold,Little Rock,726.740296,724.797828,724.801987,45.0,False,False,False
3,08fe5124a59a024f93eccf6aaec19997eb295483,jul,2017,cool,auto,Greenland,760.540216,764.871549,754.213685,10.0,False,False,False
4,08fe5124a59a024f93eccf6aaec19997eb295483,jul,2017,cool,hold,Greenland,756.316441,756.521396,756.523649,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
418,fd85724350332de6ad1e098cfebd71fcdfe3ca76,jul,2021,cool,hold,North Little Rock,726.303526,728.515113,728.515113,0.0,False,False,True
419,fd97c7fd6d2e6f08750babb9805babacae1c720c,jul,2021,auto,hold,Little Rock,739.862637,734.725275,678.351648,75.0,True,False,False
420,fdb0a5f08eff61d50b5f015f77ffc79039cd2fd6,jul,2021,cool,hold,Little Rock,707.371951,721.883825,721.883825,50.0,False,False,False
421,fff29f4a20cf21ebb64f5555af72670fb05a837b,jul,2021,cool,hold,Marion,701.751427,697.109794,697.109794,0.0,False,False,False


In [133]:
AR_jul.to_csv("Scraper_Output/State_Month_Day/AR/AR_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/AR-day/2017-aug-day-AR.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,524c2b1513fde9bd4b78c791c44b0ec58b97b946,2017-08-07 11:40:00 UTC,cool,auto,739,735,735,AR,Charlotte,0,False,False,True,Electric
1,1c5aab96100b40853b1604207a5daa224e8105b2,2017-08-02 19:40:00 UTC,cool,hold,727,735,735,AR,Vilonia,0,False,False,False,Gas
2,1c5aab96100b40853b1604207a5daa224e8105b2,2017-08-02 15:50:00 UTC,cool,hold,727,735,735,AR,Vilonia,0,False,False,False,Gas
3,1c5aab96100b40853b1604207a5daa224e8105b2,2017-08-15 18:15:00 UTC,cool,hold,748,745,745,AR,Vilonia,0,False,False,False,Gas
4,524c2b1513fde9bd4b78c791c44b0ec58b97b946,2017-08-09 19:10:00 UTC,cool,hold,746,745,745,AR,Charlotte,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
311148,691317b36f4a8d50387a8194ee5667f78b0d71d5,2017-08-28 09:45:00 UTC,cool,hold,704,690,690,AR,El Dorado,5,False,False,False,Gas
311149,47343c5f98c8470ed00e978b22c5e329482a983f,2017-08-13 12:50:00 UTC,cool,hold,675,690,690,AR,Greenwood,15,False,False,False,Gas
311150,47343c5f98c8470ed00e978b22c5e329482a983f,2017-08-06 12:00:00 UTC,cool,hold,689,690,690,AR,Greenwood,15,False,False,False,Gas
311151,9e8a016de9625a94b3ab0a51ebb7f7992917aa85,2017-08-26 19:30:00 UTC,cool,hold,690,690,690,AR,Jonesboro,50,True,False,True,Electric


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/AR/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/AR-day/2018-aug-day-AR.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2b814dedffeacd2533028782d1e804318935212b,2018-08-17 12:45:00 UTC,cool,hold,748,759,696,AR,Murfreesboro,7,False,False,True,Electric
1,674704167e0598001e087bb7c8cda62015fdc9a9,2018-08-17 13:30:00 UTC,auto,hold,682,675,625,AR,Dardanelle,20,True,False,True,Electric
2,d9bebc19f52b6b318ec0f83b49a1e7bf4c800000,2018-08-24 10:55:00 UTC,cool,hold,760,721,714,AR,Sheridan,0,True,False,True,Electric
3,c59fb2e3bec33701b9032ee1df7d7d7a587c1d52,2018-08-11 15:25:00 UTC,cool,hold,740,735,735,AR,Judsonia,10,False,False,False,Gas
5,c59fb2e3bec33701b9032ee1df7d7d7a587c1d52,2018-08-09 14:30:00 UTC,cool,hold,744,745,745,AR,Judsonia,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637100,632c230443406e647569fd1364bc2918944d6db7,2018-08-23 19:55:00 UTC,cool,hold,766,760,760,AR,Hot Springs Village,40,True,False,True,Electric
637101,632c230443406e647569fd1364bc2918944d6db7,2018-08-23 19:15:00 UTC,cool,hold,763,760,760,AR,Hot Springs Village,40,True,False,True,Electric
637102,632c230443406e647569fd1364bc2918944d6db7,2018-08-30 15:10:00 UTC,cool,hold,742,760,760,AR,Hot Springs Village,40,True,False,True,Electric
637103,632c230443406e647569fd1364bc2918944d6db7,2018-08-30 15:50:00 UTC,cool,hold,745,760,760,AR,Hot Springs Village,40,True,False,True,Electric


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/AR/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/AR-day/2019-aug-day-AR.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,683227685eb44f9d231d511e8ae5255ecc1b71fb,2019-08-04 18:15:00 UTC,cool,hold,732,731,731,AR,Crawfordsville,10,True,False,False,Gas
1,683227685eb44f9d231d511e8ae5255ecc1b71fb,2019-08-03 16:35:00 UTC,cool,hold,741,731,731,AR,Crawfordsville,10,True,False,False,Gas
2,e26a65f4655c33eed4be7d7a9d3820bb02778b51,2019-08-24 19:15:00 UTC,auto,auto,731,730,671,AR,Mountain Home,39,True,False,False,Gas
3,e26a65f4655c33eed4be7d7a9d3820bb02778b51,2019-08-23 12:40:00 UTC,auto,auto,728,730,671,AR,Mountain Home,39,True,False,False,Gas
4,683227685eb44f9d231d511e8ae5255ecc1b71fb,2019-08-13 12:20:00 UTC,cool,hold,736,731,731,AR,Crawfordsville,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
758239,463b4360478952c25707ff80a91b64ae5b9be258,2019-08-01 16:45:00 UTC,cool,hold,754,760,760,AR,Hot Springs Village,0,True,False,True,Electric
758240,463b4360478952c25707ff80a91b64ae5b9be258,2019-08-31 17:25:00 UTC,cool,hold,764,760,760,AR,Hot Springs Village,0,True,False,True,Electric
758241,463b4360478952c25707ff80a91b64ae5b9be258,2019-08-12 14:05:00 UTC,cool,hold,758,760,760,AR,Hot Springs Village,0,True,False,True,Electric
758242,632c230443406e647569fd1364bc2918944d6db7,2019-08-24 17:20:00 UTC,cool,hold,764,760,760,AR,Hot Springs Village,40,True,False,True,Electric


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/AR/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/AR-day/2020-aug-day-AR.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2a4a872961e54abc937085b49e51265949aad24b,2020-08-20 12:25:00 UTC,cool,hold,729,735,735,AR,pinebluff,0,False,False,False,Gas
1,607db2c120782353da1539a008956301341d54c9,2020-08-02 15:40:00 UTC,cool,hold,670,662,662,AR,Hensley,10,True,False,True,Electric
2,a578bb21941003e7a58a399375b20f274fab5233,2020-08-26 13:35:00 UTC,cool,hold,749,750,721,AR,Lamar,5,True,False,True,Electric
3,2a4a872961e54abc937085b49e51265949aad24b,2020-08-12 11:55:00 UTC,cool,hold,740,745,745,AR,pinebluff,0,False,False,False,Gas
4,2a4a872961e54abc937085b49e51265949aad24b,2020-08-26 18:45:00 UTC,cool,hold,734,735,735,AR,pinebluff,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
678917,463b4360478952c25707ff80a91b64ae5b9be258,2020-08-08 14:45:00 UTC,cool,hold,765,760,760,AR,Hot Springs Village,0,True,False,True,Electric
678918,632c230443406e647569fd1364bc2918944d6db7,2020-08-09 18:55:00 UTC,cool,auto,747,760,760,AR,Hot Springs Village,40,True,False,True,Electric
678919,632c230443406e647569fd1364bc2918944d6db7,2020-08-09 17:05:00 UTC,cool,auto,760,760,760,AR,Hot Springs Village,40,True,False,True,Electric
678920,463b4360478952c25707ff80a91b64ae5b9be258,2020-08-02 17:20:00 UTC,cool,hold,759,760,760,AR,Hot Springs Village,0,True,False,True,Electric


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/AR/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AR/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AR_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AR/aug/" + file)
    AR_aug = pd.concat([AR_aug, df])
    
AR_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,07232c051ae191506a33f7b2cf57703c45edc81a,aug,2017,cool,hold,Little Rock,715.917359,714.611281,714.603848,45.0,False,False,False
1,08fe5124a59a024f93eccf6aaec19997eb295483,aug,2017,cool,auto,Greenland,760.813223,764.676033,741.320661,10.0,False,False,False
2,08fe5124a59a024f93eccf6aaec19997eb295483,aug,2017,cool,hold,Greenland,758.366102,768.148305,768.148305,10.0,False,False,False
3,09d3f117f9f00618d1e0a7f403505c4ee30080b6,aug,2017,cool,auto,Hamburg,755.395062,759.925926,640.222222,30.0,False,False,False
4,09d3f117f9f00618d1e0a7f403505c4ee30080b6,aug,2017,cool,hold,Hamburg,736.047619,730.000000,730.000000,30.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
774,fe5438cc1ceceb7fb36b7eadefe51bd3d01a7da4,aug,2020,auto,hold,Fayetteville,740.927665,740.671003,670.000000,7.0,False,False,False
775,ffb94d67ce32e994fff8bac731c4f5941c328895,aug,2020,cool,auto,Bryant,691.255269,690.889930,670.000000,37.0,False,False,False
776,ffb94d67ce32e994fff8bac731c4f5941c328895,aug,2020,cool,hold,Bryant,667.905784,662.297108,676.532183,37.0,False,False,False
777,fff29f4a20cf21ebb64f5555af72670fb05a837b,aug,2020,cool,auto,Marion,696.278008,695.315353,695.315353,0.0,False,False,False


In [160]:
AR_aug.to_csv("Scraper_Output/State_Month_Day/AR/AR_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/AR-day/2017-dec-day-AR.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4108687cc37f85ffd083fab1f337ea55402fce76,2017-12-17 19:45:00 UTC,heat,hold,665,665,665,AR,Barling,5,False,False,True,Electric
2,d9bebc19f52b6b318ec0f83b49a1e7bf4c800000,2017-12-30 19:50:00 UTC,heat,hold,694,698,698,AR,Sheridan,0,True,False,True,Electric
3,1e1bd3dcf79f94bf7a02ee1dcd9edd8dc20d4d4d,2017-12-15 15:30:00 UTC,heat,hold,729,750,735,AR,Rockwell,17,False,False,False,Gas
4,d9bebc19f52b6b318ec0f83b49a1e7bf4c800000,2017-12-18 12:20:00 UTC,heat,hold,723,705,699,AR,Sheridan,0,True,False,True,Electric
5,524c2b1513fde9bd4b78c791c44b0ec58b97b946,2017-12-31 14:45:00 UTC,heat,hold,687,707,707,AR,Charlotte,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
442644,4233c8f6a64c867330bd7b32347e0d2fdaf481e5,2017-12-30 19:15:00 UTC,heat,auto,713,760,760,AR,Little Rock,35,False,False,False,Gas
442645,f97611b3f1b459289d6fb21a617d90ab8e08a331,2017-12-25 13:50:00 UTC,heat,auto,726,760,760,AR,Little Rock,20,False,False,False,Gas
442646,5d57734b93370b70b90a787a6ff51d626e266bad,2017-12-25 16:30:00 UTC,heat,hold,759,760,760,AR,Little Rock,10,False,False,False,Gas
442647,5d57734b93370b70b90a787a6ff51d626e266bad,2017-12-25 15:10:00 UTC,heat,hold,755,760,760,AR,Little Rock,10,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/AR/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/AR-day/2018-dec-day-AR.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1e1bd3dcf79f94bf7a02ee1dcd9edd8dc20d4d4d,2018-12-13 19:30:00 UTC,heat,hold,704,720,704,AR,Rockwell,17,False,False,False,Gas
1,2b814dedffeacd2533028782d1e804318935212b,2018-12-27 07:15:00 UTC,heat,hold,701,700,655,AR,Murfreesboro,7,False,False,True,Electric
2,2b814dedffeacd2533028782d1e804318935212b,2018-12-01 16:25:00 UTC,heat,hold,735,755,755,AR,Murfreesboro,7,False,False,True,Electric
3,2b814dedffeacd2533028782d1e804318935212b,2018-12-14 18:25:00 UTC,heat,hold,661,700,665,AR,Murfreesboro,7,False,False,True,Electric
4,70cd3e71dcaaff3403ec35284f434674f580c2d8,2018-12-22 19:55:00 UTC,heat,hold,676,665,665,AR,Prairie Grove,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
670048,463b4360478952c25707ff80a91b64ae5b9be258,2018-12-01 14:05:00 UTC,cool,hold,761,760,760,AR,Hot Springs Village,0,True,False,True,Electric
670049,463b4360478952c25707ff80a91b64ae5b9be258,2018-12-14 18:20:00 UTC,heat,hold,765,760,760,AR,Hot Springs Village,0,True,False,True,Electric
670050,463b4360478952c25707ff80a91b64ae5b9be258,2018-12-14 13:20:00 UTC,heat,hold,768,760,760,AR,Hot Springs Village,0,True,False,True,Electric
670051,463b4360478952c25707ff80a91b64ae5b9be258,2018-12-09 12:30:00 UTC,heat,hold,752,760,760,AR,Hot Springs Village,0,True,False,True,Electric


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/AR/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/AR-day/2019-dec-day-AR.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,2a4a872961e54abc937085b49e51265949aad24b,2019-12-21 15:50:00 UTC,heat,hold,729,735,735,AR,pinebluff,0,False,False,False,Gas
2,a578bb21941003e7a58a399375b20f274fab5233,2019-12-08 15:10:00 UTC,heat,hold,726,721,721,AR,Lamar,5,True,False,True,Electric
4,f091cc18a2bc6c3755e27bd76fbb0f9c95646dfd,2019-12-13 16:35:00 UTC,heat,hold,729,735,735,AR,Van Buren,9,False,False,True,Electric
5,6992fa7f5152d432d58ea4a195774e2dec813d42,2019-12-14 15:00:00 UTC,heat,hold,662,666,666,AR,Paragould,0,False,False,True,Electric
6,3038fa2fe040155e1ba87e62936f7059a8b9ff2a,2019-12-11 17:45:00 UTC,heat,hold,746,745,745,AR,Hot Springs National,9,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
630202,9dd31c7e65297722f232494a34c89ee447a965e8,2019-12-24 17:30:00 UTC,heat,auto,754,760,760,AR,Russellville,25,False,False,False,Gas
630203,463b4360478952c25707ff80a91b64ae5b9be258,2019-12-27 13:45:00 UTC,cool,auto,730,760,760,AR,Hot Springs Village,0,True,False,True,Electric
630204,463b4360478952c25707ff80a91b64ae5b9be258,2019-12-27 19:15:00 UTC,cool,hold,728,760,760,AR,Hot Springs Village,0,True,False,True,Electric
630205,463b4360478952c25707ff80a91b64ae5b9be258,2019-12-27 14:00:00 UTC,cool,auto,726,760,760,AR,Hot Springs Village,0,True,False,True,Electric


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/AR/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/AR-day/2020-dec-day-AR.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,b9b5b3781739364abe020b01a74a6964c9cbb396,2020-12-31 14:25:00 UTC,cool,hold,664,719,719,AR,Texarkana,20,False,False,False,Gas
2,e31826261f11604983b9b548383bbf0d5add40fa,2020-12-01 17:05:00 UTC,auto,hold,705,767,717,AR,Osceola,70,False,False,False,Gas
3,a578bb21941003e7a58a399375b20f274fab5233,2020-12-14 19:00:00 UTC,heat,hold,717,721,721,AR,Lamar,5,True,False,True,Electric
4,2a4a872961e54abc937085b49e51265949aad24b,2020-12-05 19:45:00 UTC,heat,hold,742,745,745,AR,pinebluff,0,False,False,False,Gas
5,a578bb21941003e7a58a399375b20f274fab5233,2020-12-08 12:45:00 UTC,heat,hold,716,721,721,AR,Lamar,5,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
528559,557fb13250046ef609e7f9ec7e6f26353082868d,2020-12-04 13:30:00 UTC,auto,auto,756,810,760,AR,Jacksonville,0,False,False,False,Gas
528560,463b4360478952c25707ff80a91b64ae5b9be258,2020-12-27 17:50:00 UTC,cool,hold,733,760,760,AR,Hot Springs Village,0,True,False,True,Electric
528561,463b4360478952c25707ff80a91b64ae5b9be258,2020-12-27 18:00:00 UTC,cool,hold,726,760,760,AR,Hot Springs Village,0,True,False,True,Electric
528562,463b4360478952c25707ff80a91b64ae5b9be258,2020-12-27 17:35:00 UTC,cool,hold,722,760,760,AR,Hot Springs Village,0,True,False,True,Electric


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/AR/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AR/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AR_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AR/dec/" + file)
    AR_dec = pd.concat([AR_dec, df])
    
AR_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0365faed9eda6da204a74c3bcde1563857427d95,dec,2017,auto,auto,Little Rock,643.928230,756.411483,640.976077,15.0,False,False,False
1,05f016d92aafe9576020078fc75573d087c9ab46,dec,2017,heat,hold,Little Rock,694.802301,696.131201,696.131201,90.0,False,False,False
2,07232c051ae191506a33f7b2cf57703c45edc81a,dec,2017,cool,hold,Little Rock,683.341615,715.115942,715.115942,45.0,False,False,False
3,07232c051ae191506a33f7b2cf57703c45edc81a,dec,2017,heat,hold,Little Rock,649.412402,650.792595,629.362131,45.0,False,False,False
4,08fe5124a59a024f93eccf6aaec19997eb295483,dec,2017,heat,auto,Greenland,653.566820,667.539171,662.806452,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
869,fdaa1ea391875e148146e9149f3ba56a69aaa10f,dec,2020,heat,hold,Rogers,695.084444,699.003333,698.247778,5.0,False,False,True
870,fdb0a5f08eff61d50b5f015f77ffc79039cd2fd6,dec,2020,heat,hold,Little Rock,640.744556,642.957034,642.938199,50.0,False,False,False
871,fe5438cc1ceceb7fb36b7eadefe51bd3d01a7da4,dec,2020,auto,hold,Fayetteville,683.642949,734.457780,684.457780,7.0,False,False,False
872,fff29f4a20cf21ebb64f5555af72670fb05a837b,dec,2020,heat,auto,Marion,703.399240,708.178707,708.178707,0.0,False,False,False


In [187]:
AR_dec.to_csv("Scraper_Output/State_Month_Day/AR/AR_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/AR/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AR_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/AR/" + file)
    AR_all = pd.concat([AR_all, df])
    
AR_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,07232c051ae191506a33f7b2cf57703c45edc81a,aug,2017,cool,hold,Little Rock,715.917359,714.611281,714.603848,45.0,False,False,False
1,08fe5124a59a024f93eccf6aaec19997eb295483,aug,2017,cool,auto,Greenland,760.813223,764.676033,741.320661,10.0,False,False,False
2,08fe5124a59a024f93eccf6aaec19997eb295483,aug,2017,cool,hold,Greenland,758.366102,768.148305,768.148305,10.0,False,False,False
3,09d3f117f9f00618d1e0a7f403505c4ee30080b6,aug,2017,cool,auto,Hamburg,755.395062,759.925926,640.222222,30.0,False,False,False
4,09d3f117f9f00618d1e0a7f403505c4ee30080b6,aug,2017,cool,hold,Hamburg,736.047619,730.000000,730.000000,30.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3311,fd85724350332de6ad1e098cfebd71fcdfe3ca76,jun,2021,cool,hold,North Little Rock,726.670520,728.928534,728.928534,0.0,False,False,True
3312,fd97c7fd6d2e6f08750babb9805babacae1c720c,jun,2021,auto,hold,Little Rock,718.945946,714.684685,664.684685,75.0,True,False,False
3313,fdb0a5f08eff61d50b5f015f77ffc79039cd2fd6,jun,2021,cool,hold,Little Rock,723.988239,761.446212,761.446212,50.0,False,False,False
3314,fff29f4a20cf21ebb64f5555af72670fb05a837b,jun,2021,cool,hold,Marion,715.003657,711.804075,711.804075,0.0,False,False,False


In [190]:
AR_all.to_csv("Scraper_Output/State_Month_Day/AR_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mARe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['AR']
Unique jan_2018: ['AR']
Unique jan_2019: ['AR']
Unique jan_2020: ['AR']
Unique jan_2021: ['AR']
Unique feb_2017: ['AR']
Unique feb_2018: ['AR']
Unique feb_2019: ['AR']
Unique feb_2020: ['AR']
Unique feb_2021: ['AR']
Unique jun_2017: ['AR']
Unique jun_2018: ['AR']
Unique jun_2019: ['AR']
Unique jun_2020: ['AR']
Unique jun_2021: ['AR']
Unique jul_2017: ['AR']
Unique jul_2018: ['AR']
Unique jul_2019: ['AR']
Unique jul_2020: ['AR']
Unique jul_2021: ['AR']
Unique aug_2017: ['AR']
Unique aug_2018: ['AR']
Unique aug_2019: ['AR']
Unique aug_2020: ['AR']
Unique dec_2017: ['AR']
Unique dec_2018: ['AR']
Unique dec_2019: ['AR']
Unique dec_2020: ['AR']
