# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/AL-day/2017-jan-day-AL.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2017-01-08 19:00:00 UTC,auto,auto,692,780,700,AL,Valley,46,False,False,False,Gas
1,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2017-01-29 19:50:00 UTC,auto,hold,697,750,700,AL,Valley,46,False,False,False,Gas
2,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2017-01-29 18:05:00 UTC,auto,hold,695,750,700,AL,Valley,46,False,False,False,Gas
3,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2017-01-07 13:15:00 UTC,auto,hold,661,750,700,AL,Valley,46,False,False,False,Gas
4,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2017-01-07 16:20:00 UTC,auto,auto,673,745,695,AL,Valley,46,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131866,b67a82400c3d1d1be6247386517432d46b13a031,2017-01-19 16:30:00 UTC,heat,hold,707,710,710,AL,Sheffield,80,True,False,True,Electric
131867,b67a82400c3d1d1be6247386517432d46b13a031,2017-01-11 18:15:00 UTC,heat,hold,723,720,720,AL,Sheffield,80,True,False,True,Electric
131868,b67a82400c3d1d1be6247386517432d46b13a031,2017-01-07 17:40:00 UTC,heat,hold,728,730,730,AL,Sheffield,80,True,False,True,Electric
131869,b67a82400c3d1d1be6247386517432d46b13a031,2017-01-23 13:50:00 UTC,heat,hold,697,700,700,AL,Sheffield,80,True,False,True,Electric


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
00d06723ae919c589e74b53969c61ede3f74dbc2,Jan,2017,cool,hold,Smiths,699.117898,720.475142,720.475142,15.0,False,False,False
00d06723ae919c589e74b53969c61ede3f74dbc2,Jan,2017,heat,hold,Smiths,702.796465,686.219498,686.219498,15.0,False,False,False
018375b6043c8c22642b47291bea64e83fe67b21,Jan,2017,cool,auto,Orange Beach,686.750969,760.246124,704.984496,10.0,False,False,True
01d4384fb6e0bc8f9d2d347ac245408fed66e25a,Jan,2017,auxHeatOnly,auto,Prattville,597.666667,791.000000,651.666667,0.0,True,False,True
01d4384fb6e0bc8f9d2d347ac245408fed66e25a,Jan,2017,cool,auto,Prattville,694.136986,710.013699,740.000000,0.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...
ef3d38cb05e41e1448c42744df464a9a1bb374f9,Jan,2017,heat,hold,Madison,682.892699,679.834071,679.834071,5.0,True,False,True
efc00cafde80ac7f441b9d2b35a262125dd3f590,Jan,2017,auto,auto,Birmingham,685.021689,718.424658,668.424658,0.0,False,False,False
f6eee5297c6935020c0967466a01f9cd097ab7dc,Jan,2017,heat,hold,Ardmore,700.906593,702.296703,702.098901,15.0,False,False,True
fb93bbdba0906d7cd5fad33c3070541e11d58368,Jan,2017,auto,hold,Harvest,660.624413,709.009390,654.741784,20.0,True,False,True


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/AL/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/AL-day/2018-jan-day-AL.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,93545042fcd888a4e58b4f58f1c27d4ef52d82d9,2018-01-18 15:15:00 UTC,heat,hold,694,700,700,AL,Wilmer,37,True,False,True,Electric
1,93545042fcd888a4e58b4f58f1c27d4ef52d82d9,2018-01-23 12:50:00 UTC,heat,auto,648,680,680,AL,Wilmer,37,True,False,True,Electric
2,93545042fcd888a4e58b4f58f1c27d4ef52d82d9,2018-01-08 19:55:00 UTC,heat,hold,656,650,640,AL,Wilmer,37,True,False,True,Electric
3,df533fdf2927b98dd3b455f9b0af6fc182b43132,2018-01-27 14:55:00 UTC,auto,auto,693,740,690,AL,Bay Minette,87,False,False,True,Electric
4,5883fb4b878b6ed208915c856360bde9a0f58430,2018-01-14 19:40:00 UTC,auto,auto,675,780,680,AL,Rainbow City,77,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
356478,114200f01dfeb4838134fad31c015fc500c7b7f5,2018-01-06 16:50:00 UTC,heat,auto,808,810,810,AL,Mobile,105,False,False,False,Gas
356479,114200f01dfeb4838134fad31c015fc500c7b7f5,2018-01-25 12:30:00 UTC,heat,hold,717,730,730,AL,Mobile,105,False,False,False,Gas
356480,114200f01dfeb4838134fad31c015fc500c7b7f5,2018-01-13 14:30:00 UTC,heat,auto,714,700,700,AL,Mobile,105,False,False,False,Gas
356481,114200f01dfeb4838134fad31c015fc500c7b7f5,2018-01-25 15:25:00 UTC,heat,hold,710,730,730,AL,Mobile,105,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/AL/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/AL-day/2019-jan-day-AL.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c4d725867f15e9e044b5c90799ee7c84bdf18a26,2019-01-23 19:45:00 UTC,heat,hold,729,740,740,AL,Gadsden,95,True,False,False,Gas
1,c4d725867f15e9e044b5c90799ee7c84bdf18a26,2019-01-24 18:05:00 UTC,heat,hold,734,740,740,AL,Gadsden,95,True,False,False,Gas
2,c4d725867f15e9e044b5c90799ee7c84bdf18a26,2019-01-17 12:50:00 UTC,heat,hold,741,740,740,AL,Gadsden,95,True,False,False,Gas
3,c4d725867f15e9e044b5c90799ee7c84bdf18a26,2019-01-24 16:20:00 UTC,heat,hold,727,740,740,AL,Gadsden,95,True,False,False,Gas
7,c4d725867f15e9e044b5c90799ee7c84bdf18a26,2019-01-24 15:00:00 UTC,heat,hold,737,740,740,AL,Gadsden,95,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
656428,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-01-30 12:55:00 UTC,heat,auto,580,650,620,AL,Birmingham,105,False,False,False,Gas
656429,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-01-28 11:10:00 UTC,heat,auto,617,650,620,AL,Birmingham,105,False,False,False,Gas
656430,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-01-13 12:55:00 UTC,heat,hold,616,650,620,AL,Birmingham,105,False,False,False,Gas
656431,a5f094eae7267f0b871b977dc70679aa58dc3867,2019-01-26 18:50:00 UTC,heat,hold,706,700,700,AL,Birmingham,105,False,False,True,Electric


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/AL/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/AL-day/2020-jan-day-AL.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,73aa5c1d5caa1d88c5287a0beb255fff3f0be442,2020-01-11 19:00:00 UTC,cool,auto,700,700,700,AL,Empire,89,True,False,True,Electric
1,73aa5c1d5caa1d88c5287a0beb255fff3f0be442,2020-01-31 17:40:00 UTC,heat,hold,696,670,670,AL,Empire,89,True,False,True,Electric
2,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2020-01-04 16:45:00 UTC,auto,auto,734,790,740,AL,Valley,59,True,False,True,Electric
3,73aa5c1d5caa1d88c5287a0beb255fff3f0be442,2020-01-07 13:45:00 UTC,auxHeatOnly,hold,659,660,660,AL,Empire,89,True,False,True,Electric
4,73aa5c1d5caa1d88c5287a0beb255fff3f0be442,2020-01-16 14:15:00 UTC,heat,auto,708,690,690,AL,Empire,89,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
753959,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2020-01-09 15:35:00 UTC,heat,auto,678,680,680,AL,Birmingham,105,False,False,False,Gas
753960,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2020-01-03 11:35:00 UTC,heat,hold,658,660,660,AL,Birmingham,105,False,False,False,Gas
753961,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2020-01-15 16:45:00 UTC,heat,auto,683,680,680,AL,Birmingham,105,False,False,False,Gas
753962,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2020-01-30 16:40:00 UTC,heat,auto,674,680,680,AL,Birmingham,105,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/AL/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/AL-day/2021-jan-day-AL.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7fb92427ed808d17fbd291785e07542b93363ad7,2021-01-05 14:45:00 UTC,auto,hold,682,740,680,AL,Enterprise,57,False,False,False,Gas
1,5883fb4b878b6ed208915c856360bde9a0f58430,2021-01-23 16:55:00 UTC,heat,hold,684,668,668,AL,Rainbow City,77,True,False,True,Electric
2,7fb92427ed808d17fbd291785e07542b93363ad7,2021-01-09 13:00:00 UTC,auto,hold,656,710,660,AL,Enterprise,57,False,False,False,Gas
3,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2021-01-24 19:45:00 UTC,auto,hold,716,770,720,AL,Valley,59,True,False,True,Electric
4,c4d725867f15e9e044b5c90799ee7c84bdf18a26,2021-01-07 13:50:00 UTC,heat,hold,728,740,740,AL,Gadsden,95,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
474623,a5f094eae7267f0b871b977dc70679aa58dc3867,2021-01-21 10:15:00 UTC,auto,hold,713,745,715,AL,Birmingham,105,False,False,True,Electric
474624,a5f094eae7267f0b871b977dc70679aa58dc3867,2021-01-21 12:45:00 UTC,auto,hold,710,745,715,AL,Birmingham,105,False,False,True,Electric
474625,a5f094eae7267f0b871b977dc70679aa58dc3867,2021-01-19 19:00:00 UTC,auto,hold,713,820,720,AL,Birmingham,105,False,False,True,Electric
474626,a5f094eae7267f0b871b977dc70679aa58dc3867,2021-01-21 15:30:00 UTC,auto,hold,714,745,715,AL,Birmingham,105,False,False,True,Electric


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/AL/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AL/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AL_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AL/jan/" + file)
    AL_jan = pd.concat([AL_jan, df])
    
AL_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00d06723ae919c589e74b53969c61ede3f74dbc2,Jan,2017,cool,hold,Smiths,699.117898,720.475142,720.475142,15.0,False,False,False
1,00d06723ae919c589e74b53969c61ede3f74dbc2,Jan,2017,heat,hold,Smiths,702.796465,686.219498,686.219498,15.0,False,False,False
2,018375b6043c8c22642b47291bea64e83fe67b21,Jan,2017,cool,auto,Orange Beach,686.750969,760.246124,704.984496,10.0,False,False,True
3,01d4384fb6e0bc8f9d2d347ac245408fed66e25a,Jan,2017,auxHeatOnly,auto,Prattville,597.666667,791.000000,651.666667,0.0,True,False,True
4,01d4384fb6e0bc8f9d2d347ac245408fed66e25a,Jan,2017,cool,auto,Prattville,694.136986,710.013699,740.000000,0.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
680,fc125ee5f6f9bcfb29f66c5e987b8c9dae5188fd,Jan,2021,heat,hold,Phenix City,695.634750,697.693722,697.693722,37.0,True,False,True
681,fc8ca68e06db1e45cd1879fb6aa4cac4308d78b6,Jan,2021,auto,hold,Gulf Shores,683.587826,747.127826,667.764348,20.0,True,False,True
682,ff5aa8f5b0f3c532a276504417b65e822c593794,Jan,2021,heat,hold,Hoover,699.293103,700.275862,700.275862,0.0,False,False,False
683,ff61e720fb3fdb1d8ff971081c9ed7701c13da99,Jan,2021,auto,hold,Semmes,651.522026,728.193833,653.583700,10.0,True,False,True


In [34]:
AL_jan.to_csv("Scraper_Output/State_Month_Day/AL/AL_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/AL-day/2017-feb-day-AL.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2017-02-04 19:20:00 UTC,auto,auto,691,745,695,AL,Valley,46,False,False,False,Gas
1,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2017-02-04 19:45:00 UTC,auto,auto,690,745,695,AL,Valley,46,False,False,False,Gas
2,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2017-02-26 19:35:00 UTC,auto,hold,696,750,700,AL,Valley,46,False,False,False,Gas
3,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2017-02-26 19:40:00 UTC,auto,hold,696,750,700,AL,Valley,46,False,False,False,Gas
4,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2017-02-04 18:45:00 UTC,auto,auto,690,745,695,AL,Valley,46,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117513,b67a82400c3d1d1be6247386517432d46b13a031,2017-02-25 18:05:00 UTC,heat,hold,706,710,710,AL,Sheffield,80,True,False,True,Electric
117514,b67a82400c3d1d1be6247386517432d46b13a031,2017-02-04 15:45:00 UTC,heat,hold,699,700,700,AL,Sheffield,80,True,False,True,Electric
117515,b67a82400c3d1d1be6247386517432d46b13a031,2017-02-13 13:15:00 UTC,heat,hold,695,700,700,AL,Sheffield,80,True,False,True,Electric
117516,b67a82400c3d1d1be6247386517432d46b13a031,2017-02-25 19:00:00 UTC,heat,hold,714,710,710,AL,Sheffield,80,True,False,True,Electric


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/AL/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/AL-day/2018-feb-day-AL.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,93545042fcd888a4e58b4f58f1c27d4ef52d82d9,2018-02-20 15:10:00 UTC,cool,hold,711,730,730,AL,Wilmer,37,True,False,True,Electric
1,df533fdf2927b98dd3b455f9b0af6fc182b43132,2018-02-25 14:00:00 UTC,auto,auto,709,740,690,AL,Bay Minette,87,False,False,True,Electric
2,7fb92427ed808d17fbd291785e07542b93363ad7,2018-02-17 13:30:00 UTC,auto,auto,700,740,690,AL,Enterprise,57,False,False,False,Gas
3,df533fdf2927b98dd3b455f9b0af6fc182b43132,2018-02-04 15:45:00 UTC,auto,auto,719,740,720,AL,Bay Minette,87,False,False,True,Electric
4,93545042fcd888a4e58b4f58f1c27d4ef52d82d9,2018-02-02 17:40:00 UTC,heat,hold,688,690,690,AL,Wilmer,37,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324575,114200f01dfeb4838134fad31c015fc500c7b7f5,2018-02-27 14:15:00 UTC,heat,auto,728,730,730,AL,Mobile,105,False,False,False,Gas
324576,114200f01dfeb4838134fad31c015fc500c7b7f5,2018-02-21 16:25:00 UTC,cool,auto,726,750,670,AL,Mobile,105,False,False,False,Gas
324577,114200f01dfeb4838134fad31c015fc500c7b7f5,2018-02-06 12:45:00 UTC,heat,hold,693,720,720,AL,Mobile,105,False,False,False,Gas
324578,114200f01dfeb4838134fad31c015fc500c7b7f5,2018-02-01 12:55:00 UTC,heat,auto,780,780,780,AL,Mobile,105,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/AL/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/AL-day/2019-feb-day-AL.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c4d725867f15e9e044b5c90799ee7c84bdf18a26,2019-02-24 16:15:00 UTC,heat,hold,727,740,740,AL,Gadsden,95,True,False,False,Gas
1,5883fb4b878b6ed208915c856360bde9a0f58430,2019-02-23 18:20:00 UTC,cool,auto,695,720,676,AL,Rainbow City,77,True,False,True,Electric
2,c4d725867f15e9e044b5c90799ee7c84bdf18a26,2019-02-26 13:05:00 UTC,heat,hold,741,740,740,AL,Gadsden,95,True,False,False,Gas
3,df533fdf2927b98dd3b455f9b0af6fc182b43132,2019-02-23 18:35:00 UTC,auto,auto,716,720,670,AL,Bay Minette,87,False,False,True,Electric
4,c4d725867f15e9e044b5c90799ee7c84bdf18a26,2019-02-24 15:40:00 UTC,heat,hold,730,740,740,AL,Gadsden,95,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
436330,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-02-02 12:55:00 UTC,heat,auto,627,650,630,AL,Birmingham,105,False,False,False,Gas
436331,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-02-28 19:10:00 UTC,heat,hold,694,680,680,AL,Birmingham,105,False,False,False,Gas
436332,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-02-22 14:00:00 UTC,heat,auto,679,680,680,AL,Birmingham,105,False,False,False,Gas
436333,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-02-21 11:25:00 UTC,heat,auto,658,690,690,AL,Birmingham,105,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/AL/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/AL-day/2020-feb-day-AL.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,73aa5c1d5caa1d88c5287a0beb255fff3f0be442,2020-02-24 14:10:00 UTC,heat,hold,680,680,680,AL,Empire,89,True,False,True,Electric
1,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2020-02-08 14:10:00 UTC,auto,hold,717,785,735,AL,Valley,59,True,False,True,Electric
2,73aa5c1d5caa1d88c5287a0beb255fff3f0be442,2020-02-21 19:20:00 UTC,heat,auto,675,680,680,AL,Empire,89,True,False,True,Electric
3,73aa5c1d5caa1d88c5287a0beb255fff3f0be442,2020-02-26 14:55:00 UTC,heat,hold,672,670,670,AL,Empire,89,True,False,True,Electric
4,7fb92427ed808d17fbd291785e07542b93363ad7,2020-02-15 19:10:00 UTC,auto,hold,691,750,700,AL,Enterprise,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
675106,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2020-02-19 14:15:00 UTC,heat,auto,681,680,680,AL,Birmingham,105,False,False,False,Gas
675107,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2020-02-22 18:05:00 UTC,heat,auto,698,700,700,AL,Birmingham,105,False,False,False,Gas
675108,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2020-02-20 18:05:00 UTC,heat,auto,675,680,680,AL,Birmingham,105,False,False,False,Gas
675109,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2020-02-11 19:05:00 UTC,heat,auto,680,680,680,AL,Birmingham,105,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/AL/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/AL-day/2021-feb-day-AL.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7fb92427ed808d17fbd291785e07542b93363ad7,2021-02-14 17:25:00 UTC,auto,hold,732,790,740,AL,Enterprise,57,False,False,False,Gas
1,c4d725867f15e9e044b5c90799ee7c84bdf18a26,2021-02-24 18:20:00 UTC,heat,hold,733,740,740,AL,Gadsden,95,True,False,False,Gas
2,7fb92427ed808d17fbd291785e07542b93363ad7,2021-02-01 18:35:00 UTC,auto,hold,710,770,720,AL,Enterprise,57,False,False,False,Gas
3,c4d725867f15e9e044b5c90799ee7c84bdf18a26,2021-02-16 16:55:00 UTC,heat,hold,739,740,740,AL,Gadsden,95,True,False,False,Gas
4,5883fb4b878b6ed208915c856360bde9a0f58430,2021-02-13 17:50:00 UTC,auto,hold,693,738,688,AL,Rainbow City,77,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
420104,5e73b04c525bef54e3086593aef9519ac7b12f7e,2021-02-16 16:45:00 UTC,auto,hold,704,770,720,AL,Birmingham,90,False,False,True,Electric
420105,5e73b04c525bef54e3086593aef9519ac7b12f7e,2021-02-19 13:00:00 UTC,auto,hold,701,770,710,AL,Birmingham,90,False,False,True,Electric
420106,2ea53d131abef5df9ca5c063c91d483d4c19f7e8,2021-02-25 18:05:00 UTC,auto,hold,702,760,680,AL,Birmingham,90,False,False,True,Electric
420107,2ea53d131abef5df9ca5c063c91d483d4c19f7e8,2021-02-27 15:35:00 UTC,auto,hold,719,760,680,AL,Birmingham,90,False,False,True,Electric


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/AL/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AL/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AL_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AL/feb/" + file)
    AL_feb = pd.concat([AL_feb, df])
    
AL_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00d06723ae919c589e74b53969c61ede3f74dbc2,feb,2017,cool,hold,Smiths,701.200617,719.623971,719.621914,15.0,False,False,False
1,00d06723ae919c589e74b53969c61ede3f74dbc2,feb,2017,heat,hold,Smiths,698.658991,685.865132,682.632675,15.0,False,False,False
2,00ecc16fbe65c54970eeda02b8342d929ae097b8,feb,2017,heat,auto,Madison,631.714286,720.000000,640.000000,5.0,False,False,True
3,018375b6043c8c22642b47291bea64e83fe67b21,feb,2017,cool,auto,Orange Beach,700.501532,771.174668,663.227783,10.0,False,False,True
4,01d4384fb6e0bc8f9d2d347ac245408fed66e25a,feb,2017,cool,auto,Prattville,726.250000,700.000000,700.000000,0.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
729,fc8ca68e06db1e45cd1879fb6aa4cac4308d78b6,feb,2021,auto,hold,Gulf Shores,662.358277,743.551020,664.353741,20.0,True,False,True
730,fc8ca68e06db1e45cd1879fb6aa4cac4308d78b6,feb,2021,auxHeatOnly,hold,Gulf Shores,672.000000,740.000000,740.000000,20.0,True,False,True
731,fc8ca68e06db1e45cd1879fb6aa4cac4308d78b6,feb,2021,heat,hold,Gulf Shores,663.593407,748.351648,670.813187,20.0,True,False,True
732,ff61e720fb3fdb1d8ff971081c9ed7701c13da99,feb,2021,auto,hold,Semmes,668.170792,733.054455,672.571782,10.0,True,False,True


In [67]:
AL_feb.to_csv("Scraper_Output/State_Month_Day/AL/AL_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/AL-day/2017-jun-day-AL.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,df533fdf2927b98dd3b455f9b0af6fc182b43132,2017-06-18 15:55:00 UTC,auto,auto,701,690,650,AL,Bay Minette,87,False,False,True,Electric
2,df533fdf2927b98dd3b455f9b0af6fc182b43132,2017-06-24 19:25:00 UTC,auto,auto,737,740,690,AL,Bay Minette,87,False,False,True,Electric
3,df533fdf2927b98dd3b455f9b0af6fc182b43132,2017-06-18 13:10:00 UTC,auto,auto,685,690,650,AL,Bay Minette,87,False,False,True,Electric
5,df533fdf2927b98dd3b455f9b0af6fc182b43132,2017-06-24 14:55:00 UTC,auto,auto,741,740,690,AL,Bay Minette,87,False,False,True,Electric
7,df533fdf2927b98dd3b455f9b0af6fc182b43132,2017-06-24 19:15:00 UTC,auto,auto,743,740,690,AL,Bay Minette,87,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
229852,88e44629592067dbdbad4423f521b743ab63c5ff,2017-06-15 18:35:00 UTC,cool,hold,733,730,730,AL,Birmingham,90,False,False,False,Gas
229853,88e44629592067dbdbad4423f521b743ab63c5ff,2017-06-22 17:30:00 UTC,cool,hold,750,750,750,AL,Birmingham,90,False,False,False,Gas
229854,88e44629592067dbdbad4423f521b743ab63c5ff,2017-06-17 16:10:00 UTC,cool,auto,742,820,640,AL,Birmingham,90,False,False,False,Gas
229855,88e44629592067dbdbad4423f521b743ab63c5ff,2017-06-17 14:30:00 UTC,cool,auto,739,820,640,AL,Birmingham,90,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/AL/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/AL-day/2018-jun-day-AL.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5883fb4b878b6ed208915c856360bde9a0f58430,2018-06-18 11:30:00 UTC,cool,auto,695,710,610,AL,Rainbow City,77,True,False,True,Electric
1,5883fb4b878b6ed208915c856360bde9a0f58430,2018-06-27 12:55:00 UTC,cool,auto,696,710,610,AL,Rainbow City,77,True,False,True,Electric
2,93545042fcd888a4e58b4f58f1c27d4ef52d82d9,2018-06-07 18:30:00 UTC,cool,hold,737,730,730,AL,Wilmer,37,True,False,True,Electric
5,5883fb4b878b6ed208915c856360bde9a0f58430,2018-06-17 19:35:00 UTC,cool,auto,693,710,610,AL,Rainbow City,77,True,False,True,Electric
7,5883fb4b878b6ed208915c856360bde9a0f58430,2018-06-16 16:05:00 UTC,cool,auto,696,710,610,AL,Rainbow City,77,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
498391,a5f094eae7267f0b871b977dc70679aa58dc3867,2018-06-02 13:05:00 UTC,auto,hold,720,715,675,AL,Birmingham,105,False,False,True,Electric
498392,a5f094eae7267f0b871b977dc70679aa58dc3867,2018-06-02 14:40:00 UTC,auto,hold,724,715,675,AL,Birmingham,105,False,False,True,Electric
498393,a5f094eae7267f0b871b977dc70679aa58dc3867,2018-06-24 18:35:00 UTC,auto,hold,734,735,695,AL,Birmingham,105,False,False,True,Electric
498394,a5f094eae7267f0b871b977dc70679aa58dc3867,2018-06-02 15:25:00 UTC,auto,hold,717,715,675,AL,Birmingham,105,False,False,True,Electric


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/AL/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/AL-day/2019-jun-day-AL.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a15ed7b02c52431b7744e9339282a1e98aaa266f,2019-06-26 10:50:00 UTC,cool,auto,705,710,710,AL,Pinson,28,True,False,False,Gas
1,c4d725867f15e9e044b5c90799ee7c84bdf18a26,2019-06-15 19:05:00 UTC,auto,hold,741,730,680,AL,Gadsden,95,True,False,False,Gas
3,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2019-06-29 19:25:00 UTC,cool,hold,725,722,722,AL,Valley,59,True,False,True,Electric
4,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2019-06-30 12:50:00 UTC,cool,hold,717,722,722,AL,Valley,59,True,False,True,Electric
5,7fb92427ed808d17fbd291785e07542b93363ad7,2019-06-30 12:30:00 UTC,cool,hold,706,740,740,AL,Enterprise,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
756331,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-06-04 10:50:00 UTC,cool,hold,720,720,720,AL,Birmingham,105,False,False,False,Gas
756332,a5f094eae7267f0b871b977dc70679aa58dc3867,2019-06-08 12:00:00 UTC,auto,auto,729,725,695,AL,Birmingham,105,False,False,True,Electric
756333,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-06-01 15:50:00 UTC,cool,hold,735,740,740,AL,Birmingham,105,False,False,False,Gas
756334,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-06-27 19:00:00 UTC,cool,auto,725,720,720,AL,Birmingham,105,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/AL/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/AL-day/2020-jun-day-AL.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,77e16be6a64333b26276885e5a36a2d23ea5bf1f,2020-06-05 16:10:00 UTC,auto,hold,736,730,650,AL,Anderson,108,False,False,False,Gas
1,77e16be6a64333b26276885e5a36a2d23ea5bf1f,2020-06-20 18:30:00 UTC,auto,hold,757,760,680,AL,Anderson,108,False,False,False,Gas
2,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2020-06-20 13:00:00 UTC,cool,hold,717,724,724,AL,Valley,59,True,False,True,Electric
3,77e16be6a64333b26276885e5a36a2d23ea5bf1f,2020-06-20 18:25:00 UTC,auto,hold,757,760,680,AL,Anderson,108,False,False,False,Gas
4,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2020-06-14 16:00:00 UTC,cool,hold,727,724,724,AL,Valley,59,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
809671,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2020-06-09 13:00:00 UTC,cool,auto,720,720,720,AL,Birmingham,105,False,False,False,Gas
809672,a5f094eae7267f0b871b977dc70679aa58dc3867,2020-06-27 18:05:00 UTC,auto,auto,722,725,695,AL,Birmingham,105,False,False,True,Electric
809673,a5f094eae7267f0b871b977dc70679aa58dc3867,2020-06-19 12:00:00 UTC,auto,auto,719,725,695,AL,Birmingham,105,False,False,True,Electric
809674,a5f094eae7267f0b871b977dc70679aa58dc3867,2020-06-28 18:20:00 UTC,auto,auto,739,786,659,AL,Birmingham,105,False,False,True,Electric


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/AL/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/AL-day/2021-jun-day-AL.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,77e16be6a64333b26276885e5a36a2d23ea5bf1f,2021-06-03 16:05:00 UTC,auto,hold,704,710,640,AL,Anderson,108,False,False,False,Gas
1,77e16be6a64333b26276885e5a36a2d23ea5bf1f,2021-06-03 18:20:00 UTC,auto,hold,712,710,640,AL,Anderson,108,False,False,False,Gas
2,7fb92427ed808d17fbd291785e07542b93363ad7,2021-06-20 18:55:00 UTC,auto,hold,692,690,630,AL,Enterprise,57,False,False,False,Gas
4,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2021-06-27 16:25:00 UTC,auto,hold,717,717,667,AL,Valley,59,True,False,True,Electric
5,77e16be6a64333b26276885e5a36a2d23ea5bf1f,2021-06-03 15:45:00 UTC,auto,hold,699,710,640,AL,Anderson,108,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
493936,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2021-06-07 19:25:00 UTC,auto,hold,726,735,665,AL,Birmingham,105,False,False,False,Gas
493937,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2021-06-16 12:45:00 UTC,auto,hold,723,735,665,AL,Birmingham,105,False,False,False,Gas
493938,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2021-06-06 18:50:00 UTC,auto,hold,735,735,665,AL,Birmingham,105,False,False,False,Gas
493939,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2021-06-07 16:30:00 UTC,auto,hold,719,735,665,AL,Birmingham,105,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/AL/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AL/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AL_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AL/jun/" + file)
    AL_jun = pd.concat([AL_jun, df])
    
AL_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00d06723ae919c589e74b53969c61ede3f74dbc2,jun,2017,cool,hold,Smiths,732.251436,734.266306,734.266306,15.0,False,False,False
1,018375b6043c8c22642b47291bea64e83fe67b21,jun,2017,cool,auto,Orange Beach,762.200000,779.233333,662.888889,10.0,False,False,True
2,018375b6043c8c22642b47291bea64e83fe67b21,jun,2017,cool,hold,Orange Beach,742.900000,774.200000,773.533333,10.0,False,False,True
3,01d4384fb6e0bc8f9d2d347ac245408fed66e25a,jun,2017,cool,auto,Prattville,741.811152,740.838662,754.802230,0.0,True,False,True
4,01d4384fb6e0bc8f9d2d347ac245408fed66e25a,jun,2017,cool,hold,Prattville,747.277355,747.006437,747.073727,0.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
572,fc125ee5f6f9bcfb29f66c5e987b8c9dae5188fd,jun,2021,auto,hold,Phenix City,718.624294,718.158192,661.864407,37.0,True,False,True
573,fc8ca68e06db1e45cd1879fb6aa4cac4308d78b6,jun,2021,auto,hold,Gulf Shores,732.672598,731.756228,650.000000,20.0,True,False,True
574,fc8ca68e06db1e45cd1879fb6aa4cac4308d78b6,jun,2021,cool,hold,Gulf Shores,736.178623,732.840154,714.931632,20.0,True,False,True
575,ff5aa8f5b0f3c532a276504417b65e822c593794,jun,2021,cool,hold,Hoover,742.678571,740.428571,740.214286,0.0,False,False,False


In [100]:
AL_jun.to_csv("Scraper_Output/State_Month_Day/AL/AL_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/AL-day/2017-jul-day-AL.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,df533fdf2927b98dd3b455f9b0af6fc182b43132,2017-07-23 15:30:00 UTC,auto,auto,737,740,690,AL,Bay Minette,87,False,False,True,Electric
1,df533fdf2927b98dd3b455f9b0af6fc182b43132,2017-07-29 17:30:00 UTC,auto,auto,734,740,690,AL,Bay Minette,87,False,False,True,Electric
2,7fb92427ed808d17fbd291785e07542b93363ad7,2017-07-27 19:05:00 UTC,cool,hold,704,670,670,AL,Enterprise,57,False,False,False,Gas
3,7fb92427ed808d17fbd291785e07542b93363ad7,2017-07-25 16:55:00 UTC,cool,auto,690,690,680,AL,Enterprise,57,False,False,False,Gas
6,7fb92427ed808d17fbd291785e07542b93363ad7,2017-07-26 16:15:00 UTC,cool,hold,701,700,700,AL,Enterprise,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
258292,88e44629592067dbdbad4423f521b743ab63c5ff,2017-07-30 17:50:00 UTC,cool,hold,724,720,720,AL,Birmingham,90,False,False,False,Gas
258293,88e44629592067dbdbad4423f521b743ab63c5ff,2017-07-16 15:05:00 UTC,cool,auto,772,820,640,AL,Birmingham,90,False,False,False,Gas
258294,88e44629592067dbdbad4423f521b743ab63c5ff,2017-07-26 12:20:00 UTC,cool,hold,719,720,720,AL,Birmingham,90,False,False,False,Gas
258295,88e44629592067dbdbad4423f521b743ab63c5ff,2017-07-25 17:35:00 UTC,cool,hold,721,720,720,AL,Birmingham,90,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/AL/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/AL-day/2018-jul-day-AL.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
5,a15ed7b02c52431b7744e9339282a1e98aaa266f,2018-07-14 17:00:00 UTC,auto,auto,757,750,650,AL,Pinson,28,True,False,False,Gas
6,5883fb4b878b6ed208915c856360bde9a0f58430,2018-07-30 11:05:00 UTC,cool,auto,695,700,630,AL,Rainbow City,77,True,False,True,Electric
8,5883fb4b878b6ed208915c856360bde9a0f58430,2018-07-27 12:35:00 UTC,cool,auto,690,700,630,AL,Rainbow City,77,True,False,True,Electric
9,5883fb4b878b6ed208915c856360bde9a0f58430,2018-07-08 18:25:00 UTC,cool,auto,696,710,610,AL,Rainbow City,77,True,False,True,Electric
16,5883fb4b878b6ed208915c856360bde9a0f58430,2018-07-23 11:10:00 UTC,cool,auto,695,700,630,AL,Rainbow City,77,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
539348,a5f094eae7267f0b871b977dc70679aa58dc3867,2018-07-07 18:05:00 UTC,auto,hold,757,755,725,AL,Birmingham,105,False,False,True,Electric
539349,a5f094eae7267f0b871b977dc70679aa58dc3867,2018-07-14 16:50:00 UTC,auto,hold,755,755,725,AL,Birmingham,105,False,False,True,Electric
539350,a5f094eae7267f0b871b977dc70679aa58dc3867,2018-07-18 12:50:00 UTC,auto,hold,720,715,675,AL,Birmingham,105,False,False,True,Electric
539351,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2018-07-31 17:30:00 UTC,cool,hold,751,750,750,AL,Birmingham,105,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/AL/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/AL-day/2019-jul-day-AL.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,73aa5c1d5caa1d88c5287a0beb255fff3f0be442,2019-07-15 14:15:00 UTC,cool,hold,671,680,680,AL,Empire,89,True,False,True,Electric
1,5883fb4b878b6ed208915c856360bde9a0f58430,2019-07-12 11:25:00 UTC,cool,hold,715,720,720,AL,Rainbow City,77,True,False,True,Electric
2,73aa5c1d5caa1d88c5287a0beb255fff3f0be442,2019-07-17 13:45:00 UTC,cool,hold,663,670,670,AL,Empire,89,True,False,True,Electric
3,73aa5c1d5caa1d88c5287a0beb255fff3f0be442,2019-07-30 16:00:00 UTC,cool,auto,666,670,670,AL,Empire,89,True,False,True,Electric
4,a15ed7b02c52431b7744e9339282a1e98aaa266f,2019-07-22 11:15:00 UTC,cool,auto,735,730,730,AL,Pinson,28,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
812435,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-07-10 18:25:00 UTC,cool,hold,736,730,730,AL,Birmingham,105,False,False,False,Gas
812436,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-07-17 14:50:00 UTC,cool,auto,722,720,720,AL,Birmingham,105,False,False,False,Gas
812437,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-07-22 16:25:00 UTC,cool,auto,746,740,740,AL,Birmingham,105,False,False,False,Gas
812438,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-07-07 19:45:00 UTC,cool,hold,700,700,700,AL,Birmingham,105,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/AL/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/AL-day/2020-jul-day-AL.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2020-07-30 10:55:00 UTC,cool,auto,739,740,740,AL,Valley,46,False,False,False,Gas
1,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2020-07-12 11:00:00 UTC,cool,auto,698,700,700,AL,Valley,59,True,False,True,Electric
2,77e16be6a64333b26276885e5a36a2d23ea5bf1f,2020-07-18 13:40:00 UTC,auto,auto,743,740,680,AL,Anderson,108,False,False,False,Gas
3,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2020-07-27 10:00:00 UTC,cool,auto,741,740,740,AL,Valley,46,False,False,False,Gas
4,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2020-07-30 19:05:00 UTC,cool,auto,705,700,700,AL,Valley,59,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
833759,a5f094eae7267f0b871b977dc70679aa58dc3867,2020-07-11 12:10:00 UTC,auto,auto,720,725,695,AL,Birmingham,105,False,False,True,Electric
833760,a5f094eae7267f0b871b977dc70679aa58dc3867,2020-07-05 13:05:00 UTC,auto,auto,723,725,695,AL,Birmingham,105,False,False,True,Electric
833761,a5f094eae7267f0b871b977dc70679aa58dc3867,2020-07-25 15:25:00 UTC,auto,auto,718,725,695,AL,Birmingham,105,False,False,True,Electric
833762,a5f094eae7267f0b871b977dc70679aa58dc3867,2020-07-18 16:10:00 UTC,auto,auto,727,725,695,AL,Birmingham,105,False,False,True,Electric


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/AL/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/AL-day/2021-jul-day-AL.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7fb92427ed808d17fbd291785e07542b93363ad7,2021-07-16 17:35:00 UTC,auto,hold,693,690,640,AL,Enterprise,57,False,False,False,Gas
2,5883fb4b878b6ed208915c856360bde9a0f58430,2021-07-18 17:05:00 UTC,cool,hold,674,668,668,AL,Rainbow City,77,True,False,True,Electric
3,7fb92427ed808d17fbd291785e07542b93363ad7,2021-07-16 16:15:00 UTC,auto,hold,696,690,640,AL,Enterprise,57,False,False,False,Gas
4,5883fb4b878b6ed208915c856360bde9a0f58430,2021-07-18 14:40:00 UTC,cool,hold,676,668,668,AL,Rainbow City,77,True,False,True,Electric
5,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2021-07-03 16:15:00 UTC,auto,hold,716,717,667,AL,Valley,59,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
489072,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2021-07-13 16:25:00 UTC,auto,hold,701,725,665,AL,Birmingham,105,False,False,False,Gas
489073,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2021-07-29 13:40:00 UTC,auto,hold,728,725,665,AL,Birmingham,105,False,False,False,Gas
489074,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2021-07-13 18:20:00 UTC,auto,hold,709,725,665,AL,Birmingham,105,False,False,False,Gas
489075,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2021-07-28 13:45:00 UTC,auto,hold,710,725,665,AL,Birmingham,105,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/AL/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AL/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AL_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AL/jul/" + file)
    AL_jul = pd.concat([AL_jul, df])
    
AL_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,018375b6043c8c22642b47291bea64e83fe67b21,jul,2017,cool,hold,Orange Beach,768.458333,775.000000,775.000000,10.0,False,False,True
1,01d4384fb6e0bc8f9d2d347ac245408fed66e25a,jul,2017,cool,auto,Prattville,727.403349,720.980213,732.806697,0.0,True,False,True
2,01d4384fb6e0bc8f9d2d347ac245408fed66e25a,jul,2017,cool,hold,Prattville,744.507502,740.688438,740.683142,0.0,True,False,True
3,01ec1e5fd26dfeb9bfc4d7e12b404f49c745aaee,jul,2017,cool,auto,Birmingham,750.559676,751.374916,735.757249,0.0,False,False,True
4,01ec1e5fd26dfeb9bfc4d7e12b404f49c745aaee,jul,2017,cool,hold,Birmingham,763.243197,765.767857,765.232993,0.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
535,fa78b408d37220ad1495e00950a5e905410541e1,jul,2021,cool,hold,Daphne,726.668712,691.411043,691.411043,50.0,False,False,False
536,fb93bbdba0906d7cd5fad33c3070541e11d58368,jul,2021,auto,hold,Harvest,744.625000,740.000000,671.000000,20.0,True,False,True
537,fc125ee5f6f9bcfb29f66c5e987b8c9dae5188fd,jul,2021,auto,hold,Phenix City,704.718421,704.607895,615.778947,37.0,True,False,True
538,fc8ca68e06db1e45cd1879fb6aa4cac4308d78b6,jul,2021,cool,hold,Gulf Shores,738.533391,733.621284,733.511417,20.0,True,False,True


In [133]:
AL_jul.to_csv("Scraper_Output/State_Month_Day/AL/AL_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/AL-day/2017-aug-day-AL.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,df533fdf2927b98dd3b455f9b0af6fc182b43132,2017-08-20 13:50:00 UTC,auto,auto,678,685,665,AL,Bay Minette,87,False,False,True,Electric
1,df533fdf2927b98dd3b455f9b0af6fc182b43132,2017-08-19 19:25:00 UTC,auto,auto,699,685,665,AL,Bay Minette,87,False,False,True,Electric
2,df533fdf2927b98dd3b455f9b0af6fc182b43132,2017-08-20 14:45:00 UTC,auto,auto,693,685,665,AL,Bay Minette,87,False,False,True,Electric
3,7fb92427ed808d17fbd291785e07542b93363ad7,2017-08-07 14:15:00 UTC,cool,auto,708,710,670,AL,Enterprise,57,False,False,False,Gas
4,7fb92427ed808d17fbd291785e07542b93363ad7,2017-08-22 18:30:00 UTC,cool,auto,713,710,680,AL,Enterprise,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
282180,614ccb2672d51fb6db60926a51c45d67f49ac5ad,2017-08-27 18:45:00 UTC,heat,auto,684,680,680,AL,Eau Claire,105,False,False,False,Gas
282181,614ccb2672d51fb6db60926a51c45d67f49ac5ad,2017-08-08 15:00:00 UTC,cool,auto,647,644,640,AL,Eau Claire,105,False,False,False,Gas
282182,614ccb2672d51fb6db60926a51c45d67f49ac5ad,2017-08-30 18:25:00 UTC,cool,hold,664,649,649,AL,Eau Claire,105,False,False,False,Gas
282183,614ccb2672d51fb6db60926a51c45d67f49ac5ad,2017-08-06 17:00:00 UTC,cool,auto,682,680,680,AL,Eau Claire,105,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/AL/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/AL-day/2018-aug-day-AL.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,df533fdf2927b98dd3b455f9b0af6fc182b43132,2018-08-25 14:15:00 UTC,auto,auto,700,720,670,AL,Bay Minette,87,False,False,True,Electric
3,5883fb4b878b6ed208915c856360bde9a0f58430,2018-08-18 12:30:00 UTC,cool,auto,676,690,620,AL,Rainbow City,77,True,False,True,Electric
5,7fb92427ed808d17fbd291785e07542b93363ad7,2018-08-23 15:05:00 UTC,cool,auto,694,770,620,AL,Enterprise,57,False,False,False,Gas
7,5883fb4b878b6ed208915c856360bde9a0f58430,2018-08-08 12:45:00 UTC,cool,auto,682,700,630,AL,Rainbow City,77,True,False,True,Electric
18,5883fb4b878b6ed208915c856360bde9a0f58430,2018-08-23 12:50:00 UTC,cool,auto,682,700,620,AL,Rainbow City,77,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523735,a5f094eae7267f0b871b977dc70679aa58dc3867,2018-08-14 12:25:00 UTC,auto,hold,715,715,675,AL,Birmingham,105,False,False,True,Electric
523736,a5f094eae7267f0b871b977dc70679aa58dc3867,2018-08-05 19:00:00 UTC,auto,hold,713,715,675,AL,Birmingham,105,False,False,True,Electric
523737,a5f094eae7267f0b871b977dc70679aa58dc3867,2018-08-04 17:15:00 UTC,auto,hold,710,715,675,AL,Birmingham,105,False,False,True,Electric
523738,a5f094eae7267f0b871b977dc70679aa58dc3867,2018-08-04 12:35:00 UTC,auto,hold,718,715,675,AL,Birmingham,105,False,False,True,Electric


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/AL/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/AL-day/2019-aug-day-AL.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,73aa5c1d5caa1d88c5287a0beb255fff3f0be442,2019-08-29 17:20:00 UTC,cool,hold,698,710,710,AL,Empire,89,True,False,True,Electric
1,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2019-08-10 16:45:00 UTC,cool,hold,721,722,722,AL,Valley,59,True,False,True,Electric
2,5883fb4b878b6ed208915c856360bde9a0f58430,2019-08-12 12:40:00 UTC,cool,hold,713,700,700,AL,Rainbow City,77,True,False,True,Electric
3,73aa5c1d5caa1d88c5287a0beb255fff3f0be442,2019-08-12 15:10:00 UTC,cool,hold,667,670,670,AL,Empire,89,True,False,True,Electric
4,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2019-08-10 17:50:00 UTC,cool,hold,721,722,722,AL,Valley,59,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
820458,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-08-08 19:30:00 UTC,cool,hold,756,750,750,AL,Birmingham,105,False,False,False,Gas
820459,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-08-22 11:45:00 UTC,cool,auto,761,760,760,AL,Birmingham,105,False,False,False,Gas
820460,a5f094eae7267f0b871b977dc70679aa58dc3867,2019-08-03 12:15:00 UTC,auto,hold,721,720,690,AL,Birmingham,105,False,False,True,Electric
820461,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-08-14 12:05:00 UTC,cool,auto,729,720,720,AL,Birmingham,105,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/AL/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/AL-day/2020-aug-day-AL.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2020-08-09 12:15:00 UTC,cool,auto,741,740,740,AL,Valley,46,False,False,False,Gas
2,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2020-08-03 19:05:00 UTC,cool,auto,701,700,700,AL,Valley,59,True,False,True,Electric
3,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2020-08-08 15:40:00 UTC,cool,auto,699,700,700,AL,Valley,59,True,False,True,Electric
4,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2020-08-01 18:00:00 UTC,cool,auto,746,740,740,AL,Valley,46,False,False,False,Gas
6,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2020-08-06 19:25:00 UTC,cool,auto,745,740,740,AL,Valley,46,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
800453,a5f094eae7267f0b871b977dc70679aa58dc3867,2020-08-29 14:10:00 UTC,auto,hold,746,755,725,AL,Birmingham,105,False,False,True,Electric
800454,a5f094eae7267f0b871b977dc70679aa58dc3867,2020-08-02 17:45:00 UTC,auto,auto,762,820,640,AL,Birmingham,105,False,False,True,Electric
800455,a5f094eae7267f0b871b977dc70679aa58dc3867,2020-08-08 14:10:00 UTC,auto,auto,721,725,695,AL,Birmingham,105,False,False,True,Electric
800456,a5f094eae7267f0b871b977dc70679aa58dc3867,2020-08-08 13:55:00 UTC,auto,auto,722,725,695,AL,Birmingham,105,False,False,True,Electric


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/AL/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AL/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AL_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AL/aug/" + file)
    AL_aug = pd.concat([AL_aug, df])
    
AL_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00bf49f4054401422e49a040a2ca12b50029d5f3,aug,2017,cool,auto,Montgomery,749.015730,748.725843,706.975843,10.0,False,False,False
1,00bf49f4054401422e49a040a2ca12b50029d5f3,aug,2017,cool,hold,Montgomery,749.374535,748.016729,748.016729,10.0,False,False,False
2,018375b6043c8c22642b47291bea64e83fe67b21,aug,2017,cool,hold,Orange Beach,772.080000,775.000000,775.000000,10.0,False,False,True
3,01d4384fb6e0bc8f9d2d347ac245408fed66e25a,aug,2017,cool,auto,Prattville,742.739516,741.437097,748.859677,0.0,True,False,True
4,01d4384fb6e0bc8f9d2d347ac245408fed66e25a,aug,2017,cool,hold,Prattville,740.294213,737.855598,737.766901,0.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
976,ff5aa8f5b0f3c532a276504417b65e822c593794,aug,2020,cool,auto,Hoover,748.990196,743.529412,708.235294,0.0,False,False,False
977,ff5aa8f5b0f3c532a276504417b65e822c593794,aug,2020,cool,hold,Hoover,744.921569,740.000000,740.000000,0.0,False,False,False
978,ff618f67053f5ffec45ef9ab2d8f895fd570051b,aug,2020,cool,auto,Madison,715.823529,721.647059,717.529412,5.0,True,False,True
979,ff61e720fb3fdb1d8ff971081c9ed7701c13da99,aug,2020,auto,auto,Semmes,727.019489,735.930332,640.567204,10.0,True,False,True


In [160]:
AL_aug.to_csv("Scraper_Output/State_Month_Day/AL/AL_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/AL-day/2017-dec-day-AL.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,93545042fcd888a4e58b4f58f1c27d4ef52d82d9,2017-12-27 15:10:00 UTC,heat,hold,687,690,690,AL,Wilmer,37,True,False,True,Electric
1,7fb92427ed808d17fbd291785e07542b93363ad7,2017-12-26 13:25:00 UTC,auto,hold,694,750,700,AL,Enterprise,57,False,False,False,Gas
2,93545042fcd888a4e58b4f58f1c27d4ef52d82d9,2017-12-27 16:10:00 UTC,heat,hold,696,700,700,AL,Wilmer,37,True,False,True,Electric
3,93545042fcd888a4e58b4f58f1c27d4ef52d82d9,2017-12-31 16:40:00 UTC,heat,hold,698,700,700,AL,Wilmer,37,True,False,True,Electric
4,7fb92427ed808d17fbd291785e07542b93363ad7,2017-12-20 13:45:00 UTC,auto,auto,683,740,610,AL,Enterprise,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
297811,114200f01dfeb4838134fad31c015fc500c7b7f5,2017-12-02 17:20:00 UTC,heat,auto,707,710,710,AL,Mobile,105,False,False,False,Gas
297812,114200f01dfeb4838134fad31c015fc500c7b7f5,2017-12-28 16:00:00 UTC,heat,auto,753,750,750,AL,Mobile,105,False,False,False,Gas
297813,114200f01dfeb4838134fad31c015fc500c7b7f5,2017-12-29 15:30:00 UTC,heat,hold,717,720,720,AL,Mobile,105,False,False,False,Gas
297814,114200f01dfeb4838134fad31c015fc500c7b7f5,2017-12-28 18:50:00 UTC,heat,auto,735,750,750,AL,Mobile,105,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/AL/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/AL-day/2018-dec-day-AL.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,77e16be6a64333b26276885e5a36a2d23ea5bf1f,2018-12-06 13:15:00 UTC,auto,hold,685,750,690,AL,Anderson,108,False,False,False,Gas
1,77e16be6a64333b26276885e5a36a2d23ea5bf1f,2018-12-06 19:45:00 UTC,auto,hold,686,750,690,AL,Anderson,108,False,False,False,Gas
2,7fb92427ed808d17fbd291785e07542b93363ad7,2018-12-08 16:00:00 UTC,auto,hold,708,770,720,AL,Enterprise,57,False,False,False,Gas
3,77e16be6a64333b26276885e5a36a2d23ea5bf1f,2018-12-03 17:00:00 UTC,auto,hold,691,750,690,AL,Anderson,108,False,False,False,Gas
4,c4d725867f15e9e044b5c90799ee7c84bdf18a26,2018-12-06 17:05:00 UTC,heat,hold,707,720,720,AL,Gadsden,95,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
616523,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2018-12-20 11:05:00 UTC,heat,auto,689,690,690,AL,Birmingham,105,False,False,False,Gas
616524,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2018-12-14 19:50:00 UTC,heat,hold,718,720,720,AL,Birmingham,105,False,False,False,Gas
616525,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2018-12-14 15:05:00 UTC,heat,hold,687,670,670,AL,Birmingham,105,False,False,False,Gas
616526,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2018-12-14 12:50:00 UTC,heat,auto,685,690,690,AL,Birmingham,105,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/AL/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/AL-day/2019-dec-day-AL.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7fb92427ed808d17fbd291785e07542b93363ad7,2019-12-05 15:40:00 UTC,auto,hold,674,730,680,AL,Enterprise,57,False,False,False,Gas
1,73aa5c1d5caa1d88c5287a0beb255fff3f0be442,2019-12-01 19:55:00 UTC,heat,auto,708,690,690,AL,Empire,89,True,False,True,Electric
2,6b3a5cfb6ca076b7dc6abe5b5f143e1b26f70238,2019-12-28 13:20:00 UTC,auto,hold,724,775,725,AL,Valley,46,False,False,False,Gas
3,73aa5c1d5caa1d88c5287a0beb255fff3f0be442,2019-12-17 19:35:00 UTC,heat,auto,697,700,700,AL,Empire,89,True,False,True,Electric
4,7fb92427ed808d17fbd291785e07542b93363ad7,2019-12-16 16:20:00 UTC,auto,hold,727,760,710,AL,Enterprise,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
727864,a5f094eae7267f0b871b977dc70679aa58dc3867,2019-12-09 12:30:00 UTC,auto,auto,725,755,725,AL,Birmingham,105,False,False,True,Electric
727865,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2019-12-04 18:00:00 UTC,heat,auto,689,690,690,AL,Birmingham,105,False,False,False,Gas
727866,a5f094eae7267f0b871b977dc70679aa58dc3867,2019-12-08 18:00:00 UTC,auto,auto,727,755,725,AL,Birmingham,105,False,False,True,Electric
727867,a5f094eae7267f0b871b977dc70679aa58dc3867,2019-12-08 18:30:00 UTC,auto,auto,729,755,725,AL,Birmingham,105,False,False,True,Electric


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/AL/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/AL-day/2020-dec-day-AL.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c4d725867f15e9e044b5c90799ee7c84bdf18a26,2020-12-10 15:10:00 UTC,heat,auto,702,720,720,AL,Gadsden,95,True,False,False,Gas
1,7fb92427ed808d17fbd291785e07542b93363ad7,2020-12-16 18:00:00 UTC,auto,hold,683,740,680,AL,Enterprise,57,False,False,False,Gas
2,7fb92427ed808d17fbd291785e07542b93363ad7,2020-12-14 16:05:00 UTC,auto,hold,679,730,680,AL,Enterprise,57,False,False,False,Gas
3,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2020-12-13 15:00:00 UTC,auto,hold,714,770,720,AL,Valley,59,True,False,True,Electric
4,2342a2f3a78079c1fab6ddccccf83fc4729a1df1,2020-12-26 16:50:00 UTC,auto,hold,711,770,720,AL,Valley,59,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
673392,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2020-12-05 17:40:00 UTC,auto,hold,684,760,690,AL,Birmingham,105,False,False,False,Gas
673393,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2020-12-04 16:20:00 UTC,auto,auto,692,740,690,AL,Birmingham,105,False,False,False,Gas
673394,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2020-12-12 15:55:00 UTC,auto,auto,697,750,700,AL,Birmingham,105,False,False,False,Gas
673395,00d0dbc1e15120418e25fa195bb69bbaa749cda7,2020-12-12 15:35:00 UTC,auto,auto,698,750,700,AL,Birmingham,105,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/AL/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AL/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AL_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AL/dec/" + file)
    AL_dec = pd.concat([AL_dec, df])
    
AL_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00bf49f4054401422e49a040a2ca12b50029d5f3,dec,2017,auto,auto,Montgomery,683.134281,751.774092,653.873618,10.0,False,False,False
1,00bf49f4054401422e49a040a2ca12b50029d5f3,dec,2017,auto,hold,Montgomery,682.397683,754.864865,682.243243,10.0,False,False,False
2,00d06723ae919c589e74b53969c61ede3f74dbc2,dec,2017,auto,auto,Smiths,677.442786,725.000000,674.905473,15.0,False,False,False
3,00d06723ae919c589e74b53969c61ede3f74dbc2,dec,2017,auto,hold,Smiths,671.893720,727.666667,672.855072,15.0,False,False,False
4,00ecc16fbe65c54970eeda02b8342d929ae097b8,dec,2017,heat,hold,Madison,677.531250,650.000000,630.000000,5.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1185,ff618f67053f5ffec45ef9ab2d8f895fd570051b,dec,2020,heat,hold,Madison,723.036145,730.265060,730.265060,5.0,True,False,True
1186,ff61e720fb3fdb1d8ff971081c9ed7701c13da99,dec,2020,auto,auto,Semmes,640.234160,729.172176,620.199725,10.0,True,False,True
1187,ff61e720fb3fdb1d8ff971081c9ed7701c13da99,dec,2020,auto,hold,Semmes,647.840237,730.692308,647.017751,10.0,True,False,True
1188,ff61e720fb3fdb1d8ff971081c9ed7701c13da99,dec,2020,heat,hold,Semmes,632.750000,665.750000,665.750000,10.0,True,False,True


In [187]:
AL_dec.to_csv("Scraper_Output/State_Month_Day/AL/AL_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/AL/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AL_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/AL/" + file)
    AL_all = pd.concat([AL_all, df])
    
AL_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00bf49f4054401422e49a040a2ca12b50029d5f3,aug,2017,cool,auto,Montgomery,749.015730,748.725843,706.975843,10.0,False,False,False
1,00bf49f4054401422e49a040a2ca12b50029d5f3,aug,2017,cool,hold,Montgomery,749.374535,748.016729,748.016729,10.0,False,False,False
2,018375b6043c8c22642b47291bea64e83fe67b21,aug,2017,cool,hold,Orange Beach,772.080000,775.000000,775.000000,10.0,False,False,True
3,01d4384fb6e0bc8f9d2d347ac245408fed66e25a,aug,2017,cool,auto,Prattville,742.739516,741.437097,748.859677,0.0,True,False,True
4,01d4384fb6e0bc8f9d2d347ac245408fed66e25a,aug,2017,cool,hold,Prattville,740.294213,737.855598,737.766901,0.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3839,fc125ee5f6f9bcfb29f66c5e987b8c9dae5188fd,jun,2021,auto,hold,Phenix City,718.624294,718.158192,661.864407,37.0,True,False,True
3840,fc8ca68e06db1e45cd1879fb6aa4cac4308d78b6,jun,2021,auto,hold,Gulf Shores,732.672598,731.756228,650.000000,20.0,True,False,True
3841,fc8ca68e06db1e45cd1879fb6aa4cac4308d78b6,jun,2021,cool,hold,Gulf Shores,736.178623,732.840154,714.931632,20.0,True,False,True
3842,ff5aa8f5b0f3c532a276504417b65e822c593794,jun,2021,cool,hold,Hoover,742.678571,740.428571,740.214286,0.0,False,False,False


In [190]:
AL_all.to_csv("Scraper_Output/State_Month_Day/AL_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to make sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['AL']
Unique jan_2018: ['AL']
Unique jan_2019: ['AL']
Unique jan_2020: ['AL']
Unique jan_2021: ['AL']
Unique feb_2017: ['AL']
Unique feb_2018: ['AL']
Unique feb_2019: ['AL']
Unique feb_2020: ['AL']
Unique feb_2021: ['AL']
Unique jun_2017: ['AL']
Unique jun_2018: ['AL']
Unique jun_2019: ['AL']
Unique jun_2020: ['AL']
Unique jun_2021: ['AL']
Unique jul_2017: ['AL']
Unique jul_2018: ['AL']
Unique jul_2019: ['AL']
Unique jul_2020: ['AL']
Unique jul_2021: ['AL']
Unique aug_2017: ['AL']
Unique aug_2018: ['AL']
Unique aug_2019: ['AL']
Unique aug_2020: ['AL']
Unique dec_2017: ['AL']
Unique dec_2018: ['AL']
Unique dec_2019: ['AL']
Unique dec_2020: ['AL']
