# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/UT-day/2017-jan-day-UT.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,29e525e43480bf5544eb3ce5bf4fcae6be87210e,2017-01-14 17:15:00 UTC,heat,hold,668,655,655,UT,La Verkin,15,False,False,False,Gas
1,58376b7a2dac72c00ae157b9d71e28cf73eed0b9,2017-01-13 16:00:00 UTC,heat,hold,713,699,699,UT,Alpine,15,False,False,False,Gas
2,27a9407854139beb45882990c8dda7c983a269b6,2017-01-19 18:50:00 UTC,heat,hold,652,704,668,UT,Hooper,10,False,False,False,Gas
3,29e525e43480bf5544eb3ce5bf4fcae6be87210e,2017-01-13 17:00:00 UTC,heat,hold,650,655,655,UT,La Verkin,15,False,False,False,Gas
4,29e525e43480bf5544eb3ce5bf4fcae6be87210e,2017-01-14 16:55:00 UTC,heat,hold,653,655,655,UT,La Verkin,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
268094,0fc8e5fdd377dcde2062888694f2e930a3743218,2017-01-08 19:05:00 UTC,heat,auto,701,740,740,UT,West Valley City,45,False,False,False,Gas
268095,0fc8e5fdd377dcde2062888694f2e930a3743218,2017-01-16 19:10:00 UTC,heat,auto,699,740,740,UT,West Valley City,45,False,False,False,Gas
268096,0fc8e5fdd377dcde2062888694f2e930a3743218,2017-01-12 19:25:00 UTC,heat,auto,690,740,740,UT,West Valley City,45,False,False,False,Gas
268097,0fc8e5fdd377dcde2062888694f2e930a3743218,2017-01-13 18:40:00 UTC,heat,hold,737,740,740,UT,West Valley City,45,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0026c5e19df4da540be6b77d831fde80082d7ad0,Jan,2017,heat,auto,Holladay,622.000000,656.500000,655.000000,20.0,False,False,False
0026c5e19df4da540be6b77d831fde80082d7ad0,Jan,2017,heat,hold,Holladay,673.892308,683.692308,683.261538,20.0,False,False,False
0042e2ecdbaeaf75adc185886abc85a3a85a9872,Jan,2017,heat,auto,Ivins,667.357143,745.714286,664.285714,15.0,False,False,False
0042e2ecdbaeaf75adc185886abc85a3a85a9872,Jan,2017,heat,hold,Ivins,689.475000,680.500000,680.500000,15.0,False,False,False
0096dc81438b6d126f8026d657a5ca1d511c2b36,Jan,2017,heat,auto,Pleasant View,618.250000,650.000000,610.000000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fdab924fe78d734b377493f1c499870b658c060d,Jan,2017,heat,hold,Sandy,707.718563,713.982036,713.982036,40.0,False,False,False
fdcb61ba556c8cb233e1e7c9772b41295713e9a8,Jan,2017,heat,auto,Saratoga Springs,700.821212,699.804545,699.631818,5.0,False,False,False
fdcb61ba556c8cb233e1e7c9772b41295713e9a8,Jan,2017,heat,hold,Saratoga Springs,702.727273,701.068182,699.772727,5.0,False,False,False
ff2c4ccc666ca6cacacd5fa84a3a4ba659e1ff87,Jan,2017,heat,hold,Syracuse,677.291667,720.000000,680.000000,5.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/UT/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/UT-day/2018-jan-day-UT.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9d377e2a176dfe7292b9ca0b5e145dadc364e4a9,2018-01-16 19:35:00 UTC,heat,hold,673,675,675,UT,Wellsville,0,False,False,False,Gas
1,7edc4e0e5f8a8d2959c82ec9345484ea1c326957,2018-01-04 14:40:00 UTC,heat,hold,659,665,665,UT,Midway,10,False,False,False,Gas
2,7edc4e0e5f8a8d2959c82ec9345484ea1c326957,2018-01-03 14:10:00 UTC,heat,hold,656,655,655,UT,Midway,10,False,False,False,Gas
3,7edc4e0e5f8a8d2959c82ec9345484ea1c326957,2018-01-08 16:40:00 UTC,heat,hold,666,665,665,UT,Midway,10,False,False,False,Gas
5,afc12a7d91651a4c8b7e0a2e75ac1635daae0cec,2018-01-30 14:00:00 UTC,auto,auto,683,840,670,UT,Riverton,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
668647,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2018-01-02 14:00:00 UTC,heat,hold,755,760,760,UT,West Valley City,47,False,False,False,Gas
668648,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2018-01-05 19:00:00 UTC,heat,hold,755,760,760,UT,West Valley City,47,False,False,False,Gas
668649,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2018-01-02 13:35:00 UTC,heat,hold,753,760,760,UT,West Valley City,47,False,False,False,Gas
668650,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2018-01-04 15:00:00 UTC,heat,hold,754,760,760,UT,West Valley City,47,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/UT/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/UT-day/2019-jan-day-UT.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,268023e4dc64e561f6560bb1efe2745ece4c34dc,2019-01-23 18:55:00 UTC,heat,hold,679,685,685,UT,Kearns,5,False,False,False,Gas
1,268023e4dc64e561f6560bb1efe2745ece4c34dc,2019-01-05 17:00:00 UTC,heat,hold,683,685,685,UT,Kearns,5,False,False,False,Gas
2,268023e4dc64e561f6560bb1efe2745ece4c34dc,2019-01-04 17:40:00 UTC,heat,hold,674,685,685,UT,Kearns,5,False,False,False,Gas
3,0c366fdd6136dc80cd546ba1fe8e920111c93c2a,2019-01-18 19:10:00 UTC,auto,hold,715,785,715,UT,Saly Lake City,98,False,False,False,Gas
4,0c366fdd6136dc80cd546ba1fe8e920111c93c2a,2019-01-10 17:55:00 UTC,auto,hold,707,775,705,UT,Saly Lake City,98,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
975171,931fd9e18c0d107b457f7a51a4f319a330c1ebc7,2019-01-13 16:45:00 UTC,auto,hold,690,750,690,UT,West Valley City,0,True,False,False,Gas
975172,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2019-01-05 16:05:00 UTC,heat,hold,744,750,750,UT,West Valley City,47,False,False,False,Gas
975173,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2019-01-05 14:00:00 UTC,heat,hold,749,750,750,UT,West Valley City,47,False,False,False,Gas
975174,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2019-01-05 19:20:00 UTC,heat,hold,749,750,750,UT,West Valley City,47,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/UT/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/UT-day/2020-jan-day-UT.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ec2ad09b9a8540f5ed55ce57cc7851f2d9de6449,2020-01-10 14:35:00 UTC,heat,hold,615,630,630,UT,Santa Clara,50,True,False,True,Electric
1,29e525e43480bf5544eb3ce5bf4fcae6be87210e,2020-01-07 18:10:00 UTC,heat,hold,659,665,665,UT,La Verkin,15,False,False,False,Gas
2,13f96270c8a71a77436c5ae8bcac2ac905794107,2020-01-13 17:35:00 UTC,heat,hold,672,675,675,UT,Cottonwood Heights,67,False,False,False,Gas
3,e0445420710056055b12af721f6a8e45f0c36618,2020-01-13 16:55:00 UTC,heat,hold,726,721,721,UT,Grantsville,19,True,False,False,Gas
4,ce835130a9cb8ae686c924be8270e9cb200d509c,2020-01-11 19:55:00 UTC,heat,auto,683,776,690,UT,West Bountiful,45,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1073723,cba3981976f0792baccac515ba746b2549f962cc,2020-01-22 14:05:00 UTC,heat,auto,753,760,760,UT,West Valley City,20,False,False,False,Gas
1073724,cba3981976f0792baccac515ba746b2549f962cc,2020-01-22 17:55:00 UTC,heat,auto,755,760,760,UT,West Valley City,20,False,False,False,Gas
1073725,cba3981976f0792baccac515ba746b2549f962cc,2020-01-19 09:15:00 UTC,heat,auto,733,760,760,UT,West Valley City,20,False,False,False,Gas
1073726,cba3981976f0792baccac515ba746b2549f962cc,2020-01-23 16:05:00 UTC,heat,auto,756,760,760,UT,West Valley City,20,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/UT/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/UT-day/2021-jan-day-UT.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5a3e8aa55387a2ea2397b099743a690311c0399f,2021-01-19 18:55:00 UTC,auto,hold,714,776,726,UT,La Verkin,110,False,False,False,Gas
1,ec2ad09b9a8540f5ed55ce57cc7851f2d9de6449,2021-01-31 19:35:00 UTC,heat,hold,627,640,640,UT,Santa Clara,50,True,False,True,Electric
2,16e8929f4eba6265d0946c76e03e8999657510d0,2021-01-15 18:00:00 UTC,auto,hold,736,796,740,UT,Grantsville,5,True,False,False,Gas
3,534f45b0865ae8820e8a521e0509fc2632cecc5e,2021-01-27 15:35:00 UTC,auto,hold,729,830,730,UT,West Point,47,False,False,False,Gas
4,b212daec36ba699239d2a0c3d97d2c37bd9aa144,2021-01-15 18:55:00 UTC,heat,hold,673,675,675,UT,So. Weber,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
696700,6c85cd0cec22678e99c1152bc76223e815afc134,2021-01-19 17:45:00 UTC,auto,hold,724,760,700,UT,West Valley City,50,False,False,False,Gas
696701,6c85cd0cec22678e99c1152bc76223e815afc134,2021-01-07 17:05:00 UTC,auto,hold,698,760,700,UT,West Valley City,50,False,False,False,Gas
696702,6c85cd0cec22678e99c1152bc76223e815afc134,2021-01-10 15:40:00 UTC,auto,hold,700,760,700,UT,West Valley City,50,False,False,False,Gas
696703,6c85cd0cec22678e99c1152bc76223e815afc134,2021-01-15 18:30:00 UTC,auto,hold,699,760,700,UT,West Valley City,50,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/UT/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/UT/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
UT_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/UT/jan/" + file)
    UT_jan = pd.concat([UT_jan, df])
    
UT_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0026c5e19df4da540be6b77d831fde80082d7ad0,Jan,2017,heat,auto,Holladay,622.000000,656.500000,655.000000,20.0,False,False,False
1,0026c5e19df4da540be6b77d831fde80082d7ad0,Jan,2017,heat,hold,Holladay,673.892308,683.692308,683.261538,20.0,False,False,False
2,0042e2ecdbaeaf75adc185886abc85a3a85a9872,Jan,2017,heat,auto,Ivins,667.357143,745.714286,664.285714,15.0,False,False,False
3,0042e2ecdbaeaf75adc185886abc85a3a85a9872,Jan,2017,heat,hold,Ivins,689.475000,680.500000,680.500000,15.0,False,False,False
4,0096dc81438b6d126f8026d657a5ca1d511c2b36,Jan,2017,heat,auto,Pleasant View,618.250000,650.000000,610.000000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
911,fec1821487dc4c9c19e7be11cfaff24c77cf6d92,Jan,2021,heat,hold,Erda,704.608924,707.514436,707.517060,0.0,False,False,False
912,ff2d8a8e279a90b663e33e9449508bd30a5051b9,Jan,2021,heat,hold,Salt Lake City,679.976231,683.548387,683.546689,19.0,False,False,False
913,ff7f44785055c96a70bf67cc9551be64508c7af2,Jan,2021,heat,hold,Pleasant Grove,684.200000,700.600000,699.600000,25.0,False,False,False
914,ffbb3562acaa38803129f81718ee2d88b423dd35,Jan,2021,heat,hold,Holladay,697.578947,700.000000,700.000000,30.0,True,False,False


In [34]:
UT_jan.to_csv("Scraper_Output/State_Month_Day/UT/UT_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/UT-day/2017-feb-day-UT.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,58376b7a2dac72c00ae157b9d71e28cf73eed0b9,2017-02-06 18:40:00 UTC,heat,hold,700,699,699,UT,Alpine,15,False,False,False,Gas
1,79c898b994869218dcc42c57b97b77e5663b9009,2017-02-11 19:40:00 UTC,heat,hold,679,675,675,UT,Smithfield,5,False,False,False,Gas
2,58376b7a2dac72c00ae157b9d71e28cf73eed0b9,2017-02-09 19:10:00 UTC,heat,hold,721,709,709,UT,Alpine,15,False,False,False,Gas
3,58376b7a2dac72c00ae157b9d71e28cf73eed0b9,2017-02-09 17:50:00 UTC,heat,hold,718,709,709,UT,Alpine,15,False,False,False,Gas
4,58376b7a2dac72c00ae157b9d71e28cf73eed0b9,2017-02-06 18:10:00 UTC,heat,hold,697,699,699,UT,Alpine,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
241150,0fc8e5fdd377dcde2062888694f2e930a3743218,2017-02-06 18:50:00 UTC,heat,auto,702,700,700,UT,West Valley City,45,False,False,False,Gas
241151,0fc8e5fdd377dcde2062888694f2e930a3743218,2017-02-06 18:10:00 UTC,heat,auto,708,700,700,UT,West Valley City,45,False,False,False,Gas
241152,0fc8e5fdd377dcde2062888694f2e930a3743218,2017-02-25 19:30:00 UTC,heat,auto,730,740,740,UT,West Valley City,45,False,False,False,Gas
241153,0fc8e5fdd377dcde2062888694f2e930a3743218,2017-02-28 16:05:00 UTC,heat,hold,703,740,740,UT,West Valley City,45,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/UT/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/UT-day/2018-feb-day-UT.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,58376b7a2dac72c00ae157b9d71e28cf73eed0b9,2018-02-19 16:55:00 UTC,heat,hold,658,665,665,UT,Alpine,15,False,False,False,Gas
1,52d43033afe3705b70f9d8a94f4d4b132fe872a7,2018-02-09 19:00:00 UTC,heat,auto,648,776,653,UT,Naples,10,False,False,False,Gas
2,275926498f2cce85712eeae63309e58c0b54398e,2018-02-14 19:40:00 UTC,auto,hold,703,775,715,UT,Woods Cross,20,False,False,False,Gas
3,58376b7a2dac72c00ae157b9d71e28cf73eed0b9,2018-02-17 17:45:00 UTC,heat,hold,670,665,665,UT,Alpine,15,False,False,False,Gas
4,ed0a7cfdc7c58b6d1769e8f6d1c18c749567105c,2018-02-08 15:00:00 UTC,heat,hold,714,685,685,UT,herriman,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
628276,a332219bccd5ad2f5c121762337b99c3ecfbcd73,2018-02-02 17:30:00 UTC,auto,hold,704,755,705,UT,West Valley City,20,False,False,False,Gas
628277,a332219bccd5ad2f5c121762337b99c3ecfbcd73,2018-02-02 19:50:00 UTC,auto,hold,701,755,705,UT,West Valley City,20,False,False,False,Gas
628278,a332219bccd5ad2f5c121762337b99c3ecfbcd73,2018-02-02 19:55:00 UTC,auto,hold,701,755,705,UT,West Valley City,20,False,False,False,Gas
628279,a332219bccd5ad2f5c121762337b99c3ecfbcd73,2018-02-26 19:55:00 UTC,auto,hold,711,760,710,UT,West Valley City,20,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/UT/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/UT-day/2019-feb-day-UT.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bf773d886879c481cf074d2f44522a3e2b89beca,2019-02-03 15:05:00 UTC,heat,hold,638,675,675,UT,Millcreek,10,False,False,False,Gas
1,0c366fdd6136dc80cd546ba1fe8e920111c93c2a,2019-02-03 15:30:00 UTC,auto,hold,714,775,715,UT,Saly Lake City,98,False,False,False,Gas
2,268023e4dc64e561f6560bb1efe2745ece4c34dc,2019-02-17 16:35:00 UTC,heat,hold,680,685,685,UT,Kearns,5,False,False,False,Gas
3,ddda4dd226a2f8667ed8190bebf4e39dca45db76,2019-02-03 17:20:00 UTC,heat,hold,660,665,665,UT,Providence,100,False,False,False,Gas
4,0c366fdd6136dc80cd546ba1fe8e920111c93c2a,2019-02-26 18:45:00 UTC,auto,hold,712,775,715,UT,Saly Lake City,98,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
663695,dec7b003400f4fa8f6fa15be7c9e38540a9f2b11,2019-02-04 18:35:00 UTC,heat,hold,748,750,750,UT,West Valley City,0,False,False,False,Gas
663696,dec7b003400f4fa8f6fa15be7c9e38540a9f2b11,2019-02-18 14:15:00 UTC,heat,auto,741,750,750,UT,West Valley City,0,False,False,False,Gas
663697,dec7b003400f4fa8f6fa15be7c9e38540a9f2b11,2019-02-11 18:45:00 UTC,heat,auto,705,750,700,UT,West Valley City,0,False,False,False,Gas
663698,931fd9e18c0d107b457f7a51a4f319a330c1ebc7,2019-02-26 16:25:00 UTC,auto,auto,683,760,680,UT,West Valley City,0,True,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/UT/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/UT-day/2020-feb-day-UT.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0c366fdd6136dc80cd546ba1fe8e920111c93c2a,2020-02-27 19:40:00 UTC,auto,hold,712,775,715,UT,Saly Lake City,98,False,False,False,Gas
1,29e525e43480bf5544eb3ce5bf4fcae6be87210e,2020-02-09 18:15:00 UTC,heat,hold,687,685,685,UT,La Verkin,15,False,False,False,Gas
2,0c366fdd6136dc80cd546ba1fe8e920111c93c2a,2020-02-20 18:40:00 UTC,auto,hold,713,775,715,UT,Saly Lake City,98,False,False,False,Gas
3,13f96270c8a71a77436c5ae8bcac2ac905794107,2020-02-12 17:35:00 UTC,heat,hold,675,675,675,UT,Cottonwood Heights,67,False,False,False,Gas
4,d716b2134b26c2a102f6bbd4094aeb611916505f,2020-02-03 19:55:00 UTC,heat,hold,680,756,663,UT,Mapleton,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
947275,cba3981976f0792baccac515ba746b2549f962cc,2020-02-28 10:15:00 UTC,heat,auto,744,760,760,UT,West Valley City,20,False,False,False,Gas
947276,cba3981976f0792baccac515ba746b2549f962cc,2020-02-28 19:25:00 UTC,heat,auto,790,760,760,UT,West Valley City,20,False,False,False,Gas
947277,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2020-02-28 14:00:00 UTC,auto,hold,710,765,715,UT,West Valley City,35,False,False,False,Gas
947278,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2020-02-28 15:35:00 UTC,auto,hold,710,765,715,UT,West Valley City,35,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/UT/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/UT-day/2021-feb-day-UT.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,534f45b0865ae8820e8a521e0509fc2632cecc5e,2021-02-17 19:05:00 UTC,auto,hold,735,830,730,UT,West Point,47,False,False,False,Gas
1,e21951537ff82bfad70f3d35c780ce026d300e06,2021-02-12 09:25:00 UTC,heat,hold,737,739,739,UT,santaquin,0,False,False,False,Gas
2,0c366fdd6136dc80cd546ba1fe8e920111c93c2a,2021-02-23 15:10:00 UTC,auto,hold,710,785,715,UT,Saly Lake City,98,False,False,False,Gas
3,0c366fdd6136dc80cd546ba1fe8e920111c93c2a,2021-02-06 15:45:00 UTC,auto,hold,712,785,715,UT,Saly Lake City,98,False,False,False,Gas
4,81c5cb65ebb4e22cec34430880bc482d76001e17,2021-02-05 09:10:00 UTC,auto,hold,730,785,735,UT,kearns,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
593981,6c85cd0cec22678e99c1152bc76223e815afc134,2021-02-03 15:15:00 UTC,auto,hold,703,760,700,UT,West Valley City,50,False,False,False,Gas
593982,6c85cd0cec22678e99c1152bc76223e815afc134,2021-02-26 13:55:00 UTC,auto,hold,696,760,700,UT,West Valley City,50,False,False,False,Gas
593983,6c85cd0cec22678e99c1152bc76223e815afc134,2021-02-20 14:45:00 UTC,auto,hold,693,760,700,UT,West Valley City,50,False,False,False,Gas
593984,6c85cd0cec22678e99c1152bc76223e815afc134,2021-02-09 14:50:00 UTC,auto,hold,704,760,700,UT,West Valley City,50,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/UT/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/UT/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
UT_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/UT/feb/" + file)
    UT_feb = pd.concat([UT_feb, df])
    
UT_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0026c5e19df4da540be6b77d831fde80082d7ad0,feb,2017,heat,hold,Holladay,680.400000,680.000000,680.000000,20.0,False,False,False
1,0042e2ecdbaeaf75adc185886abc85a3a85a9872,feb,2017,heat,hold,Ivins,692.581340,685.222488,685.222488,15.0,False,False,False
2,0096dc81438b6d126f8026d657a5ca1d511c2b36,feb,2017,heat,auto,Pleasant View,672.154221,670.000000,670.000000,0.0,False,False,False
3,0096dc81438b6d126f8026d657a5ca1d511c2b36,feb,2017,heat,hold,Pleasant View,669.088419,673.877152,673.848200,0.0,False,False,False
4,00c5ee12719f2bf4360b90a0f70e1b0580488c22,feb,2017,heat,auto,Salt Lake City,696.943503,714.847458,698.135593,45.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
861,fec1821487dc4c9c19e7be11cfaff24c77cf6d92,feb,2021,heat,hold,Erda,716.073529,714.088235,714.088235,0.0,False,False,False
862,ff2d8a8e279a90b663e33e9449508bd30a5051b9,feb,2021,heat,hold,Salt Lake City,698.296131,700.662946,700.662946,19.0,False,False,False
863,ff6dea167280fc625ee09281a0562a61ba351bb7,feb,2021,auto,hold,American Fork,689.678571,758.571429,692.857143,20.0,False,False,False
864,ffbb3562acaa38803129f81718ee2d88b423dd35,feb,2021,heat,hold,Holladay,691.250000,700.000000,700.000000,30.0,True,False,False


In [67]:
UT_feb.to_csv("Scraper_Output/State_Month_Day/UT/UT_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/UT-day/2017-jun-day-UT.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8ee4969626be51dcfe058b7f6e1b12bebf8c3c95,2017-06-18 15:40:00 UTC,auto,auto,677,675,625,UT,Tremonton,0,True,False,False,Gas
1,acac17074a9dffd1ebd6a723bc992b51e072c884,2017-06-13 13:25:00 UTC,cool,hold,740,785,785,UT,Ivins,0,False,False,False,Gas
7,afc12a7d91651a4c8b7e0a2e75ac1635daae0cec,2017-06-06 14:10:00 UTC,auto,auto,738,752,630,UT,Riverton,20,False,False,False,Gas
8,519926b4d4ba52692cea50495fcbda8619bea81d,2017-06-26 13:20:00 UTC,cool,hold,683,731,695,UT,Midway,6,False,False,False,Gas
10,3bd8bf5aa5e09e7cae00249b5320ed9a17e2fbd6,2017-06-19 08:05:00 UTC,cool,hold,672,675,625,UT,Payson,20,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
436939,5a2416e160892c9cfa17deb069e1d4bea8af8c0d,2017-06-13 16:10:00 UTC,cool,hold,697,750,750,UT,West Valley City,5,False,False,False,Gas
436940,5a2416e160892c9cfa17deb069e1d4bea8af8c0d,2017-06-17 18:35:00 UTC,cool,auto,744,750,750,UT,West Valley City,5,False,False,False,Gas
436941,5a2416e160892c9cfa17deb069e1d4bea8af8c0d,2017-06-13 16:05:00 UTC,cool,hold,694,750,750,UT,West Valley City,5,False,False,False,Gas
436942,e96d2d3c5c768e37c9ca110cb5d7787596782d73,2017-06-18 15:50:00 UTC,cool,auto,749,750,650,UT,West Valley City,67,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/UT/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/UT-day/2018-jun-day-UT.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,afc12a7d91651a4c8b7e0a2e75ac1635daae0cec,2018-06-07 13:00:00 UTC,auto,hold,760,775,635,UT,Riverton,20,False,False,False,Gas
1,ebee6054672eb7b1ce09f6e3488f87faef253384,2018-06-30 19:45:00 UTC,cool,hold,752,795,790,UT,Magna,77,False,False,False,Gas
2,0c366fdd6136dc80cd546ba1fe8e920111c93c2a,2018-06-21 14:10:00 UTC,auto,hold,742,749,675,UT,Saly Lake City,98,False,False,False,Gas
3,0d6d7871cb14e6d150484b4e12a9bfc925d04247,2018-06-30 19:20:00 UTC,auto,auto,793,840,680,UT,Cottonwood Heights,38,False,False,False,Gas
4,afc12a7d91651a4c8b7e0a2e75ac1635daae0cec,2018-06-08 19:50:00 UTC,auto,hold,762,785,635,UT,Riverton,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
822551,cba3981976f0792baccac515ba746b2549f962cc,2018-06-14 15:05:00 UTC,auto,auto,759,760,700,UT,West Valley City,20,False,False,False,Gas
822552,cba3981976f0792baccac515ba746b2549f962cc,2018-06-13 18:55:00 UTC,auto,auto,761,760,700,UT,West Valley City,20,False,False,False,Gas
822553,cba3981976f0792baccac515ba746b2549f962cc,2018-06-08 15:20:00 UTC,auto,auto,746,760,700,UT,West Valley City,20,False,False,False,Gas
822554,cba3981976f0792baccac515ba746b2549f962cc,2018-06-01 17:45:00 UTC,auto,auto,719,760,690,UT,West Valley City,20,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/UT/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/UT-day/2019-jun-day-UT.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9d7449fa40c5a025e6633e5f56d8d6f174ee7dc8,2019-06-29 19:40:00 UTC,cool,hold,765,763,763,UT,Ivins,0,False,False,False,Gas
1,a971c081cfe8f04bc9fede526a9d6c91e59ea1f2,2019-06-18 13:40:00 UTC,auto,hold,729,840,730,UT,Vernal,48,False,False,False,Gas
2,69d0cae126649beab973d5f22c53307fe7c12e26,2019-06-27 13:30:00 UTC,cool,hold,655,758,755,UT,nibley,25,True,False,False,Gas
3,a971c081cfe8f04bc9fede526a9d6c91e59ea1f2,2019-06-02 19:10:00 UTC,auto,hold,732,840,730,UT,Vernal,48,False,False,False,Gas
4,a971c081cfe8f04bc9fede526a9d6c91e59ea1f2,2019-06-29 19:05:00 UTC,auto,hold,748,840,730,UT,Vernal,48,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1067306,41c068e50c35a7ebe3ea0cc9ebaef48930f8592f,2019-06-18 17:15:00 UTC,auto,hold,746,760,650,UT,West Valley City,49,True,False,False,Gas
1067307,74355ea6b21215a892c712f60eba2694f9205839,2019-06-25 18:30:00 UTC,cool,hold,758,760,760,UT,West Valley City,19,True,False,False,Gas
1067308,41c068e50c35a7ebe3ea0cc9ebaef48930f8592f,2019-06-30 15:25:00 UTC,cool,auto,761,760,760,UT,West Valley City,49,True,False,False,Gas
1067309,41c068e50c35a7ebe3ea0cc9ebaef48930f8592f,2019-06-18 17:20:00 UTC,auto,hold,746,760,650,UT,West Valley City,49,True,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/UT/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/UT-day/2020-jun-day-UT.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,fec1821487dc4c9c19e7be11cfaff24c77cf6d92,2020-06-03 14:45:00 UTC,cool,auto,720,830,690,UT,Erda,0,False,False,False,Gas
3,51a356d4567bca45aa1f827f824ff17024538768,2020-06-04 13:50:00 UTC,auto,hold,722,751,620,UT,Nibley,0,True,False,False,Gas
4,80aeef2742b4d82270058ddc7107099934ac62d1,2020-06-09 17:25:00 UTC,heat,hold,719,758,670,UT,Eden,5,False,False,False,Gas
5,fc077a47bf3384cb17fa778e777c5dfd1a521583,2020-06-20 16:05:00 UTC,cool,hold,696,702,702,UT,Cottonwood Heights,20,False,False,False,Gas
6,fc077a47bf3384cb17fa778e777c5dfd1a521583,2020-06-24 16:15:00 UTC,cool,hold,702,702,702,UT,Cottonwood Heights,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1100211,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2020-06-26 18:05:00 UTC,auto,hold,770,765,685,UT,West Valley City,35,False,False,False,Gas
1100212,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2020-06-22 14:55:00 UTC,auto,hold,748,765,685,UT,West Valley City,35,False,False,False,Gas
1100213,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2020-06-26 13:25:00 UTC,auto,hold,753,765,685,UT,West Valley City,35,False,False,False,Gas
1100214,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2020-06-22 14:45:00 UTC,auto,hold,748,765,685,UT,West Valley City,35,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/UT/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/UT-day/2021-jun-day-UT.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,51a356d4567bca45aa1f827f824ff17024538768,2021-06-15 14:55:00 UTC,auto,hold,706,701,631,UT,Nibley,0,True,False,False,Gas
1,81c5cb65ebb4e22cec34430880bc482d76001e17,2021-06-28 15:20:00 UTC,auto,hold,773,785,695,UT,kearns,0,True,False,False,Gas
2,567484348d693ef6562960faf3b298bc6cf8fe97,2021-06-19 19:35:00 UTC,cool,hold,766,775,750,UT,Ivins,0,False,False,False,Gas
3,268023e4dc64e561f6560bb1efe2745ece4c34dc,2021-06-09 13:15:00 UTC,cool,hold,731,727,727,UT,Kearns,5,False,False,False,Gas
4,268023e4dc64e561f6560bb1efe2745ece4c34dc,2021-06-14 17:25:00 UTC,cool,hold,711,707,707,UT,Kearns,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
766377,41c068e50c35a7ebe3ea0cc9ebaef48930f8592f,2021-06-25 14:10:00 UTC,cool,hold,749,765,765,UT,West Valley City,49,True,False,False,Gas
766378,41c068e50c35a7ebe3ea0cc9ebaef48930f8592f,2021-06-25 12:10:00 UTC,cool,hold,761,765,765,UT,West Valley City,49,True,False,False,Gas
766379,41c068e50c35a7ebe3ea0cc9ebaef48930f8592f,2021-06-25 14:05:00 UTC,cool,hold,749,765,765,UT,West Valley City,49,True,False,False,Gas
766380,41c068e50c35a7ebe3ea0cc9ebaef48930f8592f,2021-06-25 07:35:00 UTC,cool,hold,759,765,765,UT,West Valley City,49,True,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/UT/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/UT/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
UT_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/UT/jun/" + file)
    UT_jun = pd.concat([UT_jun, df])
    
UT_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0042e2ecdbaeaf75adc185886abc85a3a85a9872,jun,2017,cool,auto,Ivins,754.736111,747.673611,662.458333,15.0,False,False,False
1,0042e2ecdbaeaf75adc185886abc85a3a85a9872,jun,2017,cool,hold,Ivins,728.207955,736.888636,736.681818,15.0,False,False,False
2,0096dc81438b6d126f8026d657a5ca1d511c2b36,jun,2017,heat,auto,Pleasant View,677.247495,650.000000,650.000000,0.0,False,False,False
3,00c5ee12719f2bf4360b90a0f70e1b0580488c22,jun,2017,auto,auto,Salt Lake City,751.307692,750.000000,690.000000,45.0,False,False,False
4,00c5ee12719f2bf4360b90a0f70e1b0580488c22,jun,2017,auto,hold,Salt Lake City,749.124542,751.840049,690.427350,45.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
986,ff2c4ccc666ca6cacacd5fa84a3a4ba659e1ff87,jun,2021,cool,hold,Syracuse,755.600000,750.666667,750.666667,5.0,False,False,False
987,ff6dea167280fc625ee09281a0562a61ba351bb7,jun,2021,auto,hold,American Fork,710.710470,728.081197,669.628205,20.0,False,False,False
988,ff7f44785055c96a70bf67cc9551be64508c7af2,jun,2021,auto,hold,Pleasant Grove,713.561983,710.681818,659.710744,25.0,False,False,False
989,ffbb3562acaa38803129f81718ee2d88b423dd35,jun,2021,cool,hold,Holladay,725.727835,727.209622,727.121649,30.0,True,False,False


In [100]:
UT_jun.to_csv("Scraper_Output/State_Month_Day/UT/UT_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/UT-day/2017-jul-day-UT.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,69d0cae126649beab973d5f22c53307fe7c12e26,2017-07-28 14:45:00 UTC,cool,auto,823,830,700,UT,nibley,25,True,False,False,Gas
2,1231c7c050a3e3ea560dff12ffe325f720fb48be,2017-07-01 18:40:00 UTC,cool,hold,767,762,790,UT,Brigham City,60,False,False,False,Gas
3,8ee4969626be51dcfe058b7f6e1b12bebf8c3c95,2017-07-15 16:40:00 UTC,cool,hold,715,685,685,UT,Tremonton,0,True,False,False,Gas
6,8ee4969626be51dcfe058b7f6e1b12bebf8c3c95,2017-07-15 15:00:00 UTC,cool,hold,690,685,685,UT,Tremonton,0,True,False,False,Gas
7,23a0af5c4a236d72380b0d2e9fb58a7851209505,2017-07-09 18:45:00 UTC,cool,hold,732,757,730,UT,Rockville,37,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
504220,e96d2d3c5c768e37c9ca110cb5d7787596782d73,2017-07-31 19:00:00 UTC,cool,auto,752,750,650,UT,West Valley City,67,False,False,False,Gas
504221,e96d2d3c5c768e37c9ca110cb5d7787596782d73,2017-07-31 19:45:00 UTC,cool,auto,750,750,650,UT,West Valley City,67,False,False,False,Gas
504222,e96d2d3c5c768e37c9ca110cb5d7787596782d73,2017-07-06 16:00:00 UTC,cool,auto,744,750,650,UT,West Valley City,67,False,False,False,Gas
504223,e96d2d3c5c768e37c9ca110cb5d7787596782d73,2017-07-17 14:05:00 UTC,cool,auto,751,750,650,UT,West Valley City,67,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/UT/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/UT-day/2018-jul-day-UT.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5d53600c05bb81f14c20781dfe877ecead90504f,2018-07-07 19:05:00 UTC,cool,hold,756,759,732,UT,Fruit Heights,0,False,False,False,Gas
1,418c3741927f6dc56509368742842e0faf9c4dd1,2018-07-18 17:20:00 UTC,cool,auto,798,830,790,UT,Lindon,20,True,False,False,Gas
2,dc0f6c633c2b0eb1defe31f4b45468d2d78df4a8,2018-07-19 16:00:00 UTC,cool,auto,744,748,748,UT,Midway,10,False,False,False,Gas
3,69d0cae126649beab973d5f22c53307fe7c12e26,2018-07-07 19:05:00 UTC,cool,auto,810,838,790,UT,nibley,25,True,False,False,Gas
5,29e525e43480bf5544eb3ce5bf4fcae6be87210e,2018-07-20 18:05:00 UTC,cool,hold,748,759,759,UT,La Verkin,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
974766,7d8e797def6a41f466529b60d1e5a02995988161,2018-07-22 15:45:00 UTC,cool,hold,761,760,760,UT,West Valley City,50,False,False,False,Gas
974767,7d8e797def6a41f466529b60d1e5a02995988161,2018-07-21 17:30:00 UTC,cool,hold,761,760,760,UT,West Valley City,50,False,False,False,Gas
974768,7d8e797def6a41f466529b60d1e5a02995988161,2018-07-20 19:45:00 UTC,cool,hold,763,760,760,UT,West Valley City,50,False,False,False,Gas
974769,74355ea6b21215a892c712f60eba2694f9205839,2018-07-04 13:05:00 UTC,cool,auto,758,760,760,UT,West Valley City,19,True,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/UT/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/UT-day/2019-jul-day-UT.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9d2a5e058cef7d4825a7c001916db709296aa44f,2019-07-14 17:25:00 UTC,auto,hold,842,840,790,UT,La Verkin,0,False,False,False,Gas
1,e4f403e1aff0c63159b9ba9419b6c01671951309,2019-07-01 16:35:00 UTC,cool,hold,747,761,761,UT,Enoch,30,True,False,False,Gas
2,39eca45212bb9b42e74db486bb8253f87a90f610,2019-07-29 13:25:00 UTC,cool,auto,797,840,620,UT,Tremonton,60,True,False,False,Gas
3,a971c081cfe8f04bc9fede526a9d6c91e59ea1f2,2019-07-15 18:15:00 UTC,auto,hold,781,840,730,UT,Vernal,48,False,False,False,Gas
5,9d2a5e058cef7d4825a7c001916db709296aa44f,2019-07-12 18:40:00 UTC,auto,hold,833,840,790,UT,La Verkin,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1232883,cba3981976f0792baccac515ba746b2549f962cc,2019-07-28 16:45:00 UTC,auto,auto,759,760,710,UT,West Valley City,20,False,False,False,Gas
1232884,cba3981976f0792baccac515ba746b2549f962cc,2019-07-26 18:00:00 UTC,auto,auto,763,760,710,UT,West Valley City,20,False,False,False,Gas
1232885,7d8e797def6a41f466529b60d1e5a02995988161,2019-07-30 18:30:00 UTC,auto,hold,765,760,670,UT,West Valley City,50,False,False,False,Gas
1232886,7d8e797def6a41f466529b60d1e5a02995988161,2019-07-31 13:10:00 UTC,auto,hold,765,760,670,UT,West Valley City,50,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/UT/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/UT-day/2020-jul-day-UT.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,73fc0d574a3e575ffd74ef4283acf27ab8833ebe,2020-07-01 14:40:00 UTC,auto,hold,702,759,673,UT,Richmond,0,False,False,False,Gas
1,ddfc3c00799dc2857baeaac8e6cfed95b983760b,2020-07-26 15:30:00 UTC,auto,hold,701,702,652,UT,Heber City,0,False,False,False,Gas
2,fc077a47bf3384cb17fa778e777c5dfd1a521583,2020-07-13 12:35:00 UTC,cool,hold,723,722,722,UT,Cottonwood Heights,20,False,False,False,Gas
3,81c5cb65ebb4e22cec34430880bc482d76001e17,2020-07-24 17:00:00 UTC,auto,hold,752,815,715,UT,kearns,0,True,False,False,Gas
4,ac50d6054ed289e38cfc639ba4a8120120c9274a,2020-07-23 19:55:00 UTC,cool,hold,784,775,750,UT,Ivins,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1206320,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2020-07-13 18:30:00 UTC,auto,hold,769,765,685,UT,West Valley City,35,False,False,False,Gas
1206321,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2020-07-17 14:30:00 UTC,auto,hold,766,765,685,UT,West Valley City,35,False,False,False,Gas
1206322,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2020-07-12 17:10:00 UTC,auto,hold,766,765,685,UT,West Valley City,35,False,False,False,Gas
1206323,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2020-07-09 19:35:00 UTC,auto,hold,769,765,685,UT,West Valley City,35,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/UT/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/UT-day/2021-jul-day-UT.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8ee4969626be51dcfe058b7f6e1b12bebf8c3c95,2021-07-26 19:40:00 UTC,cool,hold,761,744,744,UT,Tremonton,0,True,False,False,Gas
1,14addb5523cfa94984b43b571a931ad04266f4ba,2021-07-19 13:20:00 UTC,cool,hold,816,830,790,UT,Lindon,20,True,False,False,Gas
3,275926498f2cce85712eeae63309e58c0b54398e,2021-07-13 19:50:00 UTC,cool,hold,739,737,737,UT,Woods Cross,20,False,False,False,Gas
4,51a356d4567bca45aa1f827f824ff17024538768,2021-07-03 16:00:00 UTC,auto,hold,762,771,630,UT,Nibley,0,True,False,False,Gas
5,14addb5523cfa94984b43b571a931ad04266f4ba,2021-07-21 11:15:00 UTC,cool,hold,803,840,790,UT,Lindon,20,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
735006,41c068e50c35a7ebe3ea0cc9ebaef48930f8592f,2021-07-05 15:30:00 UTC,cool,hold,767,760,760,UT,West Valley City,49,True,False,False,Gas
735007,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2021-07-12 13:55:00 UTC,cool,hold,763,760,760,UT,West Valley City,35,False,False,False,Gas
735008,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2021-07-29 13:05:00 UTC,cool,hold,755,760,760,UT,West Valley City,35,False,False,False,Gas
735009,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2021-07-12 17:00:00 UTC,cool,hold,766,760,760,UT,West Valley City,35,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/UT/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/UT/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
UT_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/UT/jul/" + file)
    UT_jul = pd.concat([UT_jul, df])
    
UT_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0042e2ecdbaeaf75adc185886abc85a3a85a9872,jul,2017,cool,auto,Ivins,708.000000,722.578947,660.000000,15.0,False,False,False
1,0042e2ecdbaeaf75adc185886abc85a3a85a9872,jul,2017,cool,hold,Ivins,709.053571,723.428571,723.428571,15.0,False,False,False
2,006819101bcb86ccbd55cc0558edf9c18560ca34,jul,2017,cool,auto,Centerville,711.896907,713.041237,689.597938,46.0,False,False,False
3,0096dc81438b6d126f8026d657a5ca1d511c2b36,jul,2017,heat,auto,Pleasant View,698.993185,650.000000,650.000000,0.0,False,False,False
4,00c5ee12719f2bf4360b90a0f70e1b0580488c22,jul,2017,auto,hold,Salt Lake City,753.790898,749.899139,689.880689,45.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
893,ff2d8a8e279a90b663e33e9449508bd30a5051b9,jul,2021,cool,hold,Salt Lake City,737.212677,732.988324,732.988324,19.0,False,False,False
894,ff38ee1d696db10ec167f097e142d37a592622c5,jul,2021,cool,hold,Layton,741.403226,730.000000,730.000000,15.0,False,False,False
895,ff7f44785055c96a70bf67cc9551be64508c7af2,jul,2021,auto,hold,Pleasant Grove,712.428571,709.309524,659.309524,25.0,False,False,False
896,ffbb3562acaa38803129f81718ee2d88b423dd35,jul,2021,cool,hold,Holladay,736.728625,737.568242,735.815720,30.0,True,False,False


In [133]:
UT_jul.to_csv("Scraper_Output/State_Month_Day/UT/UT_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/UT-day/2017-aug-day-UT.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,877712037a7a7bf239b758d0c1f7f34986ab05e1,2017-08-31 18:35:00 UTC,auto,hold,763,785,735,UT,Hooper,10,False,False,False,Gas
2,39eca45212bb9b42e74db486bb8253f87a90f610,2017-08-06 18:15:00 UTC,cool,auto,722,746,680,UT,Tremonton,60,True,False,False,Gas
3,8ee4969626be51dcfe058b7f6e1b12bebf8c3c95,2017-08-11 08:25:00 UTC,cool,hold,680,675,675,UT,Tremonton,0,True,False,False,Gas
4,877712037a7a7bf239b758d0c1f7f34986ab05e1,2017-08-31 18:40:00 UTC,auto,hold,763,785,735,UT,Hooper,10,False,False,False,Gas
5,e9935cfb2e6c5cfa202d2ae7650b13d232e87e52,2017-08-17 12:15:00 UTC,cool,auto,717,748,661,UT,West Point,37,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
518676,e96d2d3c5c768e37c9ca110cb5d7787596782d73,2017-08-26 17:20:00 UTC,cool,auto,755,760,650,UT,West Valley City,67,False,False,False,Gas
518677,e96d2d3c5c768e37c9ca110cb5d7787596782d73,2017-08-31 17:05:00 UTC,cool,auto,755,760,650,UT,West Valley City,67,False,False,False,Gas
518678,e96d2d3c5c768e37c9ca110cb5d7787596782d73,2017-08-26 17:05:00 UTC,cool,auto,752,760,650,UT,West Valley City,67,False,False,False,Gas
518679,e96d2d3c5c768e37c9ca110cb5d7787596782d73,2017-08-30 17:50:00 UTC,cool,auto,761,760,650,UT,West Valley City,67,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/UT/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/UT-day/2018-aug-day-UT.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,418c3741927f6dc56509368742842e0faf9c4dd1,2018-08-04 17:50:00 UTC,cool,auto,790,830,790,UT,Lindon,20,True,False,False,Gas
1,7ce3cce358f1c97a44ca03072d855068bd97b818,2018-08-09 14:00:00 UTC,auto,hold,763,830,700,UT,Cedar Hills,10,False,False,False,Gas
2,018f26dd7111dd572d7fefd31be35bc35fd77e56,2018-08-22 16:05:00 UTC,auto,hold,688,687,637,UT,Santaquin,0,False,False,False,Gas
3,018f26dd7111dd572d7fefd31be35bc35fd77e56,2018-08-18 15:30:00 UTC,auto,hold,692,687,637,UT,Santaquin,0,False,False,False,Gas
4,2fd7baa4916ddba47de0536d5afa870f2fe10bd0,2018-08-08 12:30:00 UTC,auto,hold,809,830,670,UT,Cedar Hills,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
927159,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2018-08-20 17:10:00 UTC,auto,hold,753,760,660,UT,West Valley City,35,False,False,False,Gas
927160,7d8e797def6a41f466529b60d1e5a02995988161,2018-08-20 19:20:00 UTC,cool,hold,759,760,760,UT,West Valley City,50,False,False,False,Gas
927161,5a2416e160892c9cfa17deb069e1d4bea8af8c0d,2018-08-10 17:55:00 UTC,cool,hold,754,760,760,UT,West Valley City,5,False,False,False,Gas
927162,7d8e797def6a41f466529b60d1e5a02995988161,2018-08-25 15:05:00 UTC,cool,hold,758,760,760,UT,West Valley City,50,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/UT/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/UT-day/2019-aug-day-UT.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5c18c01dcb0690435cb9af095941d5915445b90b,2019-08-21 10:30:00 UTC,cool,hold,720,739,739,UT,Smithfield,0,False,False,False,Gas
1,52d43033afe3705b70f9d8a94f4d4b132fe872a7,2019-08-12 19:20:00 UTC,cool,auto,752,752,671,UT,Naples,10,False,False,False,Gas
2,47990085aa75e927941779fadb8b6a989f355321,2019-08-05 18:00:00 UTC,cool,hold,788,731,728,UT,Lindon,20,False,False,False,Gas
3,afc12a7d91651a4c8b7e0a2e75ac1635daae0cec,2019-08-11 18:50:00 UTC,cool,hold,772,840,750,UT,Riverton,20,False,False,False,Gas
4,5c18c01dcb0690435cb9af095941d5915445b90b,2019-08-21 08:45:00 UTC,cool,hold,722,739,739,UT,Smithfield,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1180844,05b4f8f6771ed758763ae974a4d249c4a96c8d9f,2019-08-01 19:00:00 UTC,auto,hold,759,765,715,UT,West Valley City,40,False,False,False,Gas
1180845,05b4f8f6771ed758763ae974a4d249c4a96c8d9f,2019-08-01 17:05:00 UTC,auto,hold,741,765,715,UT,West Valley City,40,False,False,False,Gas
1180846,05b4f8f6771ed758763ae974a4d249c4a96c8d9f,2019-08-01 14:00:00 UTC,auto,hold,729,765,715,UT,West Valley City,40,False,False,False,Gas
1180847,74355ea6b21215a892c712f60eba2694f9205839,2019-08-02 16:05:00 UTC,cool,hold,746,765,765,UT,West Valley City,19,True,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/UT/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/UT-day/2020-aug-day-UT.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9414fbe30b0cbb6a05245c200ea91ae60f3edbb0,2020-08-14 13:45:00 UTC,cool,hold,706,731,710,UT,Nibley,45,False,False,False,Gas
1,fc077a47bf3384cb17fa778e777c5dfd1a521583,2020-08-13 14:05:00 UTC,cool,hold,715,718,718,UT,Cottonwood Heights,20,False,False,False,Gas
2,ddda4dd226a2f8667ed8190bebf4e39dca45db76,2020-08-01 19:55:00 UTC,cool,hold,698,685,685,UT,Providence,100,False,False,False,Gas
3,0c366fdd6136dc80cd546ba1fe8e920111c93c2a,2020-08-01 18:10:00 UTC,auto,hold,748,738,678,UT,Saly Lake City,98,False,False,False,Gas
4,feddd081ba67a2e7e78a409ea14934af40939690,2020-08-03 12:20:00 UTC,cool,auto,718,722,672,UT,Stansbury Park,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1214031,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2020-08-30 14:30:00 UTC,auto,hold,764,765,695,UT,West Valley City,35,False,False,False,Gas
1214032,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2020-08-02 13:15:00 UTC,auto,hold,744,765,685,UT,West Valley City,35,False,False,False,Gas
1214033,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2020-08-23 14:55:00 UTC,auto,hold,763,765,695,UT,West Valley City,35,False,False,False,Gas
1214034,73b224ba10d9c3f59d6ed8f7adbf08b07a4d5221,2020-08-29 13:00:00 UTC,auto,hold,752,765,695,UT,West Valley City,35,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/UT/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/UT/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
UT_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/UT/aug/" + file)
    UT_aug = pd.concat([UT_aug, df])
    
UT_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0042e2ecdbaeaf75adc185886abc85a3a85a9872,aug,2017,cool,auto,Ivins,732.348315,738.550562,697.078652,15.0,False,False,False
1,0042e2ecdbaeaf75adc185886abc85a3a85a9872,aug,2017,cool,hold,Ivins,748.037594,766.462406,761.515038,15.0,False,False,False
2,006819101bcb86ccbd55cc0558edf9c18560ca34,aug,2017,cool,auto,Centerville,706.393939,700.000000,690.000000,46.0,False,False,False
3,0096dc81438b6d126f8026d657a5ca1d511c2b36,aug,2017,heat,auto,Pleasant View,704.858065,650.000000,650.000000,0.0,False,False,False
4,00c5ee12719f2bf4360b90a0f70e1b0580488c22,aug,2017,auto,hold,Salt Lake City,751.187103,752.139873,690.000000,45.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1630,ff6dea167280fc625ee09281a0562a61ba351bb7,aug,2020,auto,hold,American Fork,714.481669,720.250390,627.535491,20.0,False,False,False
1631,ff7f44785055c96a70bf67cc9551be64508c7af2,aug,2020,cool,auto,Pleasant Grove,715.784946,712.645161,712.129032,25.0,False,False,False
1632,ff7f44785055c96a70bf67cc9551be64508c7af2,aug,2020,cool,hold,Pleasant Grove,707.494681,709.601064,693.521277,25.0,False,False,False
1633,ffbb3562acaa38803129f81718ee2d88b423dd35,aug,2020,cool,hold,Holladay,727.228389,734.929273,734.094303,30.0,True,False,False


In [160]:
UT_aug.to_csv("Scraper_Output/State_Month_Day/UT/UT_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/UT-day/2017-dec-day-UT.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e9935cfb2e6c5cfa202d2ae7650b13d232e87e52,2017-12-15 13:30:00 UTC,heat,auto,719,784,704,UT,West Point,37,True,False,False,Gas
1,9d377e2a176dfe7292b9ca0b5e145dadc364e4a9,2017-12-03 14:40:00 UTC,auto,hold,722,794,726,UT,Wellsville,0,False,False,False,Gas
2,7edc4e0e5f8a8d2959c82ec9345484ea1c326957,2017-12-11 15:55:00 UTC,heat,hold,660,665,665,UT,Midway,10,False,False,False,Gas
3,58376b7a2dac72c00ae157b9d71e28cf73eed0b9,2017-12-28 10:00:00 UTC,heat,hold,651,655,655,UT,Alpine,15,False,False,False,Gas
4,47990085aa75e927941779fadb8b6a989f355321,2017-12-09 19:00:00 UTC,heat,hold,689,737,692,UT,Lindon,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
657867,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2017-12-15 19:55:00 UTC,heat,hold,757,760,760,UT,West Valley City,47,False,False,False,Gas
657868,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2017-12-15 18:00:00 UTC,heat,hold,755,760,760,UT,West Valley City,47,False,False,False,Gas
657869,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2017-12-13 13:45:00 UTC,heat,hold,758,760,760,UT,West Valley City,47,False,False,False,Gas
657870,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2017-12-08 15:15:00 UTC,heat,hold,759,760,760,UT,West Valley City,47,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/UT/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/UT-day/2018-dec-day-UT.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0c366fdd6136dc80cd546ba1fe8e920111c93c2a,2018-12-02 17:25:00 UTC,auto,hold,703,775,705,UT,Saly Lake City,98,False,False,False,Gas
1,99d974dae432d5e3772ad08909d1a1ff737c4167,2018-12-04 18:50:00 UTC,auto,auto,709,773,700,UT,Smithfield,7,False,False,False,Gas
2,0c366fdd6136dc80cd546ba1fe8e920111c93c2a,2018-12-09 14:55:00 UTC,auto,hold,715,775,715,UT,Saly Lake City,98,False,False,False,Gas
3,c84c7f712ccaab200229fe27274668ac2742f7cd,2018-12-05 15:00:00 UTC,heat,hold,633,635,635,UT,riverton,0,False,False,False,Gas
4,9d377e2a176dfe7292b9ca0b5e145dadc364e4a9,2018-12-22 15:10:00 UTC,heat,hold,650,658,658,UT,Wellsville,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
983614,a332219bccd5ad2f5c121762337b99c3ecfbcd73,2018-12-03 19:25:00 UTC,auto,auto,736,760,710,UT,West Valley City,20,False,False,False,Gas
983615,05b4f8f6771ed758763ae974a4d249c4a96c8d9f,2018-12-24 19:05:00 UTC,auto,hold,707,760,710,UT,West Valley City,40,False,False,False,Gas
983616,05b4f8f6771ed758763ae974a4d249c4a96c8d9f,2018-12-24 17:15:00 UTC,auto,hold,711,760,710,UT,West Valley City,40,False,False,False,Gas
983617,a332219bccd5ad2f5c121762337b99c3ecfbcd73,2018-12-03 16:55:00 UTC,auto,auto,730,760,710,UT,West Valley City,20,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/UT/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/UT-day/2019-dec-day-UT.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,13f96270c8a71a77436c5ae8bcac2ac905794107,2019-12-08 18:35:00 UTC,heat,hold,685,685,685,UT,Cottonwood Heights,67,False,False,False,Gas
1,13f96270c8a71a77436c5ae8bcac2ac905794107,2019-12-24 16:50:00 UTC,heat,hold,676,724,652,UT,Cottonwood Heights,67,False,False,False,Gas
2,b212daec36ba699239d2a0c3d97d2c37bd9aa144,2019-12-12 15:05:00 UTC,heat,hold,696,687,681,UT,So. Weber,30,False,False,False,Gas
3,5c18c01dcb0690435cb9af095941d5915445b90b,2019-12-22 12:00:00 UTC,heat,hold,702,703,703,UT,Smithfield,0,False,False,False,Gas
4,b2bc0ef455c77b981e64964182a4b37658ad0cac,2019-12-13 18:50:00 UTC,heat,hold,681,685,685,UT,SOUTH JORDAN,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1131518,dec7b003400f4fa8f6fa15be7c9e38540a9f2b11,2019-12-11 18:40:00 UTC,heat,auto,751,760,760,UT,West Valley City,0,False,False,False,Gas
1131519,cba3981976f0792baccac515ba746b2549f962cc,2019-12-31 17:20:00 UTC,heat,auto,758,760,760,UT,West Valley City,20,False,False,False,Gas
1131520,dec7b003400f4fa8f6fa15be7c9e38540a9f2b11,2019-12-04 13:45:00 UTC,heat,auto,717,760,750,UT,West Valley City,0,False,False,False,Gas
1131521,cba3981976f0792baccac515ba746b2549f962cc,2019-12-29 17:50:00 UTC,heat,auto,756,760,760,UT,West Valley City,20,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/UT/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/UT-day/2020-dec-day-UT.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,13f96270c8a71a77436c5ae8bcac2ac905794107,2020-12-13 18:55:00 UTC,heat,hold,688,712,673,UT,Cottonwood Heights,67,False,False,False,Gas
1,0c366fdd6136dc80cd546ba1fe8e920111c93c2a,2020-12-31 14:50:00 UTC,auto,hold,712,785,715,UT,Saly Lake City,98,False,False,False,Gas
2,fc077a47bf3384cb17fa778e777c5dfd1a521583,2020-12-16 13:20:00 UTC,heat,hold,674,675,675,UT,Cottonwood Heights,20,False,False,False,Gas
3,0c366fdd6136dc80cd546ba1fe8e920111c93c2a,2020-12-29 14:25:00 UTC,auto,hold,712,785,715,UT,Saly Lake City,98,False,False,False,Gas
4,e9935cfb2e6c5cfa202d2ae7650b13d232e87e52,2020-12-01 13:25:00 UTC,heat,auto,678,712,680,UT,West Point,37,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
935154,6c85cd0cec22678e99c1152bc76223e815afc134,2020-12-10 13:40:00 UTC,auto,hold,695,760,700,UT,West Valley City,50,False,False,False,Gas
935155,6c85cd0cec22678e99c1152bc76223e815afc134,2020-12-09 18:35:00 UTC,auto,hold,700,760,700,UT,West Valley City,50,False,False,False,Gas
935156,6c85cd0cec22678e99c1152bc76223e815afc134,2020-12-12 18:40:00 UTC,auto,hold,686,760,700,UT,West Valley City,50,False,False,False,Gas
935157,ea6337f1729acb64d6e1d7f8f66641751fbabd93,2020-12-03 16:55:00 UTC,heat,hold,740,760,760,UT,West Valley City,30,True,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/UT/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/UT/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
UT_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/UT/dec/" + file)
    UT_dec = pd.concat([UT_dec, df])
    
UT_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0026c5e19df4da540be6b77d831fde80082d7ad0,dec,2017,heat,hold,Holladay,736.400000,733.200000,731.600000,20.0,False,False,False
1,0042e2ecdbaeaf75adc185886abc85a3a85a9872,dec,2017,heat,auto,Ivins,686.100000,650.000000,620.000000,15.0,False,False,False
2,005c537d772a7a0638fad7a834048796466b3e57,dec,2017,heat,auto,Highland,625.214286,820.000000,630.000000,0.0,True,False,False
3,005c537d772a7a0638fad7a834048796466b3e57,dec,2017,heat,hold,Highland,666.017730,664.123404,664.123404,0.0,True,False,False
4,006819101bcb86ccbd55cc0558edf9c18560ca34,dec,2017,heat,hold,Centerville,685.272727,670.054545,670.018182,46.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1672,ff7f44785055c96a70bf67cc9551be64508c7af2,dec,2020,heat,auto,Pleasant Grove,675.555556,690.000000,690.000000,25.0,False,False,False
1673,ff7f44785055c96a70bf67cc9551be64508c7af2,dec,2020,heat,hold,Pleasant Grove,685.183673,690.775510,689.795918,25.0,False,False,False
1674,ffbb3562acaa38803129f81718ee2d88b423dd35,dec,2020,heat,hold,Holladay,694.113208,700.226415,699.943396,30.0,True,False,False
1675,ffdd78b5691bf13d8f8ae8a4e9fed0b0c113d915,dec,2020,heat,auto,Bountiful,715.039813,720.002342,718.086651,0.0,False,False,False


In [187]:
UT_dec.to_csv("Scraper_Output/State_Month_Day/UT/UT_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/UT/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
UT_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/UT/" + file)
    UT_all = pd.concat([UT_all, df])
    
UT_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0042e2ecdbaeaf75adc185886abc85a3a85a9872,aug,2017,cool,auto,Ivins,732.348315,738.550562,697.078652,15.0,False,False,False
1,0042e2ecdbaeaf75adc185886abc85a3a85a9872,aug,2017,cool,hold,Ivins,748.037594,766.462406,761.515038,15.0,False,False,False
2,006819101bcb86ccbd55cc0558edf9c18560ca34,aug,2017,cool,auto,Centerville,706.393939,700.000000,690.000000,46.0,False,False,False
3,0096dc81438b6d126f8026d657a5ca1d511c2b36,aug,2017,heat,auto,Pleasant View,704.858065,650.000000,650.000000,0.0,False,False,False
4,00c5ee12719f2bf4360b90a0f70e1b0580488c22,aug,2017,auto,hold,Salt Lake City,751.187103,752.139873,690.000000,45.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7432,ff2c4ccc666ca6cacacd5fa84a3a4ba659e1ff87,jun,2021,cool,hold,Syracuse,755.600000,750.666667,750.666667,5.0,False,False,False
7433,ff6dea167280fc625ee09281a0562a61ba351bb7,jun,2021,auto,hold,American Fork,710.710470,728.081197,669.628205,20.0,False,False,False
7434,ff7f44785055c96a70bf67cc9551be64508c7af2,jun,2021,auto,hold,Pleasant Grove,713.561983,710.681818,659.710744,25.0,False,False,False
7435,ffbb3562acaa38803129f81718ee2d88b423dd35,jun,2021,cool,hold,Holladay,725.727835,727.209622,727.121649,30.0,True,False,False


In [190]:
UT_all.to_csv("Scraper_Output/State_Month_Day/UT_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mUTe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['UT']
Unique jan_2018: ['UT']
Unique jan_2019: ['UT']
Unique jan_2020: ['UT']
Unique jan_2021: ['UT']
Unique feb_2017: ['UT']
Unique feb_2018: ['UT']
Unique feb_2019: ['UT']
Unique feb_2020: ['UT']
Unique feb_2021: ['UT']
Unique jun_2017: ['UT']
Unique jun_2018: ['UT']
Unique jun_2019: ['UT']
Unique jun_2020: ['UT']
Unique jun_2021: ['UT']
Unique jul_2017: ['UT']
Unique jul_2018: ['UT']
Unique jul_2019: ['UT']
Unique jul_2020: ['UT']
Unique jul_2021: ['UT']
Unique aug_2017: ['UT']
Unique aug_2018: ['UT']
Unique aug_2019: ['UT']
Unique aug_2020: ['UT']
Unique dec_2017: ['UT']
Unique dec_2018: ['UT']
Unique dec_2019: ['UT']
Unique dec_2020: ['UT']
