# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/WA-day/2017-jan-day-WA.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,7e2d078e7ea21ca5d30abcd231a36f6a908ea5e5,2017-01-30 14:50:00 UTC,auto,hold,675,759,709,WA,Ridgefield,15,False,False,False,Gas
3,36318fcb544174abebcffdbd2e45415c9ac165fc,2017-01-22 18:00:00 UTC,auto,auto,684,708,687,WA,Duvall,5,False,False,False,Gas
4,7e2d078e7ea21ca5d30abcd231a36f6a908ea5e5,2017-01-14 15:15:00 UTC,auto,hold,699,755,705,WA,Ridgefield,15,False,False,False,Gas
5,36318fcb544174abebcffdbd2e45415c9ac165fc,2017-01-18 17:25:00 UTC,auto,auto,684,762,689,WA,Duvall,5,False,False,False,Gas
7,36318fcb544174abebcffdbd2e45415c9ac165fc,2017-01-14 19:50:00 UTC,auto,hold,717,766,716,WA,Duvall,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
171709,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-01-03 17:25:00 UTC,auto,auto,710,770,710,WA,Spokane Valley,40,False,False,False,Gas
171710,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-01-29 17:05:00 UTC,auto,auto,711,770,710,WA,Spokane Valley,40,False,False,False,Gas
171711,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-01-21 18:45:00 UTC,auto,auto,712,770,710,WA,Spokane Valley,40,False,False,False,Gas
171712,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-01-30 17:40:00 UTC,auto,hold,702,770,710,WA,Spokane Valley,40,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
019a1d7273650d66544a79b146b515ddfa3f0531,Jan,2017,heat,hold,Vancouver,689.850000,657.500000,640.000000,5.0,False,False,True
02890276e16188aa7cdbc5cf916e47185c35da9a,Jan,2017,heat,auto,Lynnwood,668.666667,780.000000,699.333333,5.0,False,False,False
02890276e16188aa7cdbc5cf916e47185c35da9a,Jan,2017,heat,hold,Lynnwood,730.694444,726.875000,726.555556,5.0,False,False,False
02f68c89b6dfd5538aec1991aad5ebc824d840e2,Jan,2017,heat,auto,Seattle,696.535714,779.000000,698.000000,65.0,False,False,False
042f2f6da4906ec405634547472d598154b6e3fc,Jan,2017,heat,auto,West Richland,635.772727,681.454545,635.454545,25.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...
fcbec171f9a8c2fdd7813f1510f88d9ffb8b1789,Jan,2017,heat,auto,Camas,727.700000,788.000000,686.000000,5.0,False,False,False
fe8591238eb60be06f35ae8dded14551efdfad35,Jan,2017,heat,hold,Maple Valley,670.403315,683.674033,677.359116,20.0,False,False,False
feb039fb97c44493a5351770a1e1ae13d38959a5,Jan,2017,heat,auto,Bellingham,678.333333,687.000000,678.166667,95.0,False,False,False
feb039fb97c44493a5351770a1e1ae13d38959a5,Jan,2017,heat,hold,Bellingham,683.795349,706.502326,704.293023,95.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/WA/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/WA-day/2018-jan-day-WA.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,32315bad8ac30472277fbfad311ff425ed4cdd5f,2018-01-19 17:50:00 UTC,heat,auto,629,630,630,WA,Rochester,45,True,False,True,Electric
1,160ebcafca6c48f4aef05e37f83334afe9f2282e,2018-01-16 15:55:00 UTC,auto,hold,708,775,705,WA,Fife,10,False,False,False,Gas
2,6b867254a7d2404c7d24b47135f9f6a41ff150c1,2018-01-19 16:15:00 UTC,auto,hold,664,795,665,WA,Kelso,20,False,False,True,Electric
3,32315bad8ac30472277fbfad311ff425ed4cdd5f,2018-01-23 15:55:00 UTC,heat,auto,629,610,610,WA,Rochester,45,True,False,True,Electric
4,e0765853b51bfa4aed139c70f21bd2943d453261,2018-01-21 18:35:00 UTC,heat,hold,693,691,691,WA,Lopez,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
386718,0c934c0fae5270491f44ada0ec919994dc79c6c0,2018-01-17 17:20:00 UTC,auto,auto,710,760,710,WA,Spokane Valley,40,False,False,False,Gas
386719,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2018-01-17 14:35:00 UTC,heat,auto,675,760,680,WA,Spokane Valley,7,False,False,False,Gas
386720,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2018-01-14 16:20:00 UTC,heat,auto,696,760,700,WA,Spokane Valley,7,False,False,False,Gas
386721,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2018-01-17 14:40:00 UTC,heat,auto,680,760,680,WA,Spokane Valley,7,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/WA/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/WA-day/2019-jan-day-WA.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1e05b24bec10368a01918f5049a0393b831c0ba5,2019-01-02 18:40:00 UTC,heat,hold,688,679,660,WA,duvall,30,False,False,False,Gas
1,1e05b24bec10368a01918f5049a0393b831c0ba5,2019-01-21 17:15:00 UTC,heat,auto,667,679,670,WA,duvall,30,False,False,False,Gas
3,a7c1d752109a7cf21b5a387be531f6a3d749bc1d,2019-01-09 18:00:00 UTC,auto,auto,688,742,702,WA,Longview,0,True,False,True,Electric
4,0f3f32423e7c4bcd62bc6b6dfe86adefa0e358d7,2019-01-14 19:50:00 UTC,heat,hold,674,679,679,WA,Union,5,False,False,True,Electric
5,6f6c16cbfe77d7ebf9d680277566dee999e0d4d5,2019-01-10 17:35:00 UTC,auto,hold,714,766,716,WA,Selah,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
551482,0c934c0fae5270491f44ada0ec919994dc79c6c0,2019-01-01 17:30:00 UTC,auto,auto,702,760,710,WA,Spokane Valley,40,False,False,False,Gas
551483,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2019-01-09 13:45:00 UTC,auto,auto,687,760,650,WA,Spokane Valley,7,False,False,False,Gas
551484,0c934c0fae5270491f44ada0ec919994dc79c6c0,2019-01-01 16:30:00 UTC,auto,auto,704,760,710,WA,Spokane Valley,40,False,False,False,Gas
551485,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2019-01-20 15:25:00 UTC,auto,auto,645,760,650,WA,Spokane Valley,7,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/WA/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/WA-day/2020-jan-day-WA.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,17f8b089fac2e973d182b4ef01ddbff574b63268,2020-01-18 18:00:00 UTC,auto,hold,691,765,695,WA,Prosser,9,False,False,True,Electric
2,17f8b089fac2e973d182b4ef01ddbff574b63268,2020-01-12 19:35:00 UTC,auto,hold,691,765,695,WA,Prosser,9,False,False,True,Electric
3,6ecccbddce03e906727d6849755f10e270854a61,2020-01-04 14:40:00 UTC,auto,auto,658,703,653,WA,Kalama,17,False,False,False,Gas
4,4bcba90384732de8b738ef0de7e175e9b502a9be,2020-01-30 18:30:00 UTC,heat,auto,718,712,712,WA,BONNEY LAKE,15,True,False,False,Gas
5,063c5ed87f4d7bc69993dc1784c120b55768c41d,2020-01-16 18:35:00 UTC,heat,hold,684,709,709,WA,Leavenworth,0,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
636755,c0522086d24320d88a6de0cfc363c999e6a7d61e,2020-01-05 16:00:00 UTC,heat,auto,740,745,745,WA,Spokane Valley,5,False,False,False,Gas
636756,b7ca95b07ce36a6a071763cd5db4038726c39688,2020-01-28 17:50:00 UTC,heat,auto,740,750,750,WA,Spokane Valley,39,False,False,False,Gas
636757,b7ca95b07ce36a6a071763cd5db4038726c39688,2020-01-28 19:55:00 UTC,heat,auto,755,750,750,WA,Spokane Valley,39,False,False,False,Gas
636758,c0522086d24320d88a6de0cfc363c999e6a7d61e,2020-01-12 18:25:00 UTC,heat,auto,750,750,750,WA,Spokane Valley,5,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/WA/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/WA-day/2021-jan-day-WA.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c1c8018f3fb989a05665329b4dd2049f7268ffe4,2021-01-27 19:05:00 UTC,heat,hold,637,640,640,WA,Tumwater,85,False,False,True,Electric
2,c1c8018f3fb989a05665329b4dd2049f7268ffe4,2021-01-27 16:00:00 UTC,heat,hold,630,640,640,WA,Tumwater,85,False,False,True,Electric
4,93949aace64e11360a850909aceeafd6d6f37900,2021-01-28 17:15:00 UTC,heat,hold,673,681,673,WA,Washougal,15,False,False,False,Gas
5,93949aace64e11360a850909aceeafd6d6f37900,2021-01-20 16:20:00 UTC,heat,hold,696,681,660,WA,Washougal,15,False,False,False,Gas
7,d087372e33a36ab54464198f8a69d5d7f799efae,2021-01-23 18:10:00 UTC,heat,hold,670,681,681,WA,,0,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
344347,138efe444f77e07c2ee37b86d8e12df20b7ab064,2021-01-02 12:00:00 UTC,auto,hold,720,740,720,WA,Spokane Valley,5,False,False,True,Electric
344348,138efe444f77e07c2ee37b86d8e12df20b7ab064,2021-01-07 11:45:00 UTC,auto,hold,714,740,720,WA,Spokane Valley,5,False,False,True,Electric
344349,138efe444f77e07c2ee37b86d8e12df20b7ab064,2021-01-04 16:05:00 UTC,auto,hold,723,740,720,WA,Spokane Valley,5,False,False,True,Electric
344350,138efe444f77e07c2ee37b86d8e12df20b7ab064,2021-01-09 14:00:00 UTC,auto,hold,719,740,720,WA,Spokane Valley,5,False,False,True,Electric


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/WA/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WA/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WA_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WA/jan/" + file)
    WA_jan = pd.concat([WA_jan, df])
    
WA_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,019a1d7273650d66544a79b146b515ddfa3f0531,Jan,2017,heat,hold,Vancouver,689.850000,657.500000,640.000000,5.0,False,False,True
1,02890276e16188aa7cdbc5cf916e47185c35da9a,Jan,2017,heat,auto,Lynnwood,668.666667,780.000000,699.333333,5.0,False,False,False
2,02890276e16188aa7cdbc5cf916e47185c35da9a,Jan,2017,heat,hold,Lynnwood,730.694444,726.875000,726.555556,5.0,False,False,False
3,02f68c89b6dfd5538aec1991aad5ebc824d840e2,Jan,2017,heat,auto,Seattle,696.535714,779.000000,698.000000,65.0,False,False,False
4,042f2f6da4906ec405634547472d598154b6e3fc,Jan,2017,heat,auto,West Richland,635.772727,681.454545,635.454545,25.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
790,fcb7d0ea0f57fb7544aadbc0e31620d53e44db8a,Jan,2021,heat,hold,Colbert,696.126729,698.192325,698.192325,20.0,False,False,False
791,fe8591238eb60be06f35ae8dded14551efdfad35,Jan,2021,heat,hold,Maple Valley,671.148936,700.468085,698.340426,20.0,False,False,False
792,feb039fb97c44493a5351770a1e1ae13d38959a5,Jan,2021,heat,hold,Bellingham,700.571429,674.214286,665.785714,95.0,False,False,False
793,ff1c1e3cdbfca9a5a91b1255072b7cd642f7e278,Jan,2021,heat,hold,Yakima,697.588123,700.593870,700.593870,25.0,False,False,False


In [34]:
WA_jan.to_csv("Scraper_Output/State_Month_Day/WA/WA_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/WA-day/2017-feb-day-WA.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,36318fcb544174abebcffdbd2e45415c9ac165fc,2017-02-15 18:20:00 UTC,auto,auto,720,743,721,WA,Duvall,5,False,False,False,Gas
1,36318fcb544174abebcffdbd2e45415c9ac165fc,2017-02-08 15:35:00 UTC,auto,auto,725,769,747,WA,Duvall,5,False,False,False,Gas
4,345bed8dd935c19735e0e472929079f1fc632104,2017-02-19 19:20:00 UTC,auto,hold,704,755,705,WA,Freeland,25,True,False,True,Electric
5,856bf6c249de8e8e76d12d9656f857eef51e202d,2017-02-18 19:35:00 UTC,heat,auto,667,708,678,WA,Kingston,0,True,False,True,Electric
6,5bb725bcc8dd210e0c3500ecea7fdbbc57a86403,2017-02-27 13:10:00 UTC,heat,hold,669,671,671,WA,issaquah,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
150650,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-02-12 18:40:00 UTC,auto,auto,705,770,710,WA,Spokane Valley,40,False,False,False,Gas
150651,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-02-24 16:20:00 UTC,auto,auto,704,770,710,WA,Spokane Valley,40,False,False,False,Gas
150652,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-02-12 18:30:00 UTC,auto,auto,706,770,710,WA,Spokane Valley,40,False,False,False,Gas
150653,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-02-19 16:35:00 UTC,auto,auto,692,770,710,WA,Spokane Valley,40,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/WA/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/WA-day/2018-feb-day-WA.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0bd80b780d9ddea1ab114b066ceebba8e12dceb8,2018-02-21 18:25:00 UTC,heat,auto,663,678,678,WA,Lake Tapps,10,False,False,False,Gas
1,e1b8814d8f007bb73d4773ce4018483dac35c75f,2018-02-25 17:40:00 UTC,heat,hold,700,698,698,WA,Wenatchee,47,True,False,True,Electric
2,32315bad8ac30472277fbfad311ff425ed4cdd5f,2018-02-06 19:00:00 UTC,heat,auto,678,630,630,WA,Rochester,45,True,False,True,Electric
3,32315bad8ac30472277fbfad311ff425ed4cdd5f,2018-02-07 16:45:00 UTC,heat,auto,649,630,630,WA,Rochester,45,True,False,True,Electric
4,32315bad8ac30472277fbfad311ff425ed4cdd5f,2018-02-12 17:45:00 UTC,heat,auto,651,630,630,WA,Rochester,45,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
365589,0c934c0fae5270491f44ada0ec919994dc79c6c0,2018-02-10 16:30:00 UTC,auto,auto,707,760,710,WA,Spokane Valley,40,False,False,False,Gas
365590,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2018-02-06 14:55:00 UTC,heat,auto,682,760,680,WA,Spokane Valley,7,False,False,False,Gas
365591,0c934c0fae5270491f44ada0ec919994dc79c6c0,2018-02-03 18:35:00 UTC,auto,auto,714,760,710,WA,Spokane Valley,40,False,False,False,Gas
365592,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2018-02-17 16:50:00 UTC,heat,auto,677,760,680,WA,Spokane Valley,7,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/WA/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/WA-day/2019-feb-day-WA.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,87a69da576ca110a3e99c79e4f5aabbd789f1b72,2019-02-23 19:35:00 UTC,heat,auto,680,669,650,WA,Maple Valley,0,True,False,False,Gas
3,5e142f843711dbcd89bb4f5f476ac1633e6815d1,2019-02-22 15:35:00 UTC,auto,auto,687,755,690,WA,Lynden,27,False,False,False,Gas
4,7ae6f5212bf741042179ad1496e96ffbaa667e57,2019-02-26 16:30:00 UTC,auto,auto,661,717,670,WA,Suquamish,17,False,False,True,Electric
5,f76a11e9855d445c2d2e1d0995444da32a92ac77,2019-02-03 17:35:00 UTC,auxHeatOnly,hold,703,702,702,WA,East Wenatchee,15,False,False,True,Electric
6,607b4876e05e51451d60f12907138e04c99faed0,2019-02-06 16:45:00 UTC,heat,auto,565,779,671,WA,Shoreline,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
384649,0c934c0fae5270491f44ada0ec919994dc79c6c0,2019-02-24 17:05:00 UTC,auto,auto,695,760,710,WA,Spokane Valley,40,False,False,False,Gas
384650,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2019-02-23 19:20:00 UTC,auto,auto,650,760,650,WA,Spokane Valley,7,False,False,False,Gas
384651,0c934c0fae5270491f44ada0ec919994dc79c6c0,2019-02-16 16:25:00 UTC,auto,auto,706,760,710,WA,Spokane Valley,40,False,False,False,Gas
384652,0c934c0fae5270491f44ada0ec919994dc79c6c0,2019-02-11 16:40:00 UTC,auto,auto,699,760,710,WA,Spokane Valley,40,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/WA/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/WA-day/2020-feb-day-WA.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5bb725bcc8dd210e0c3500ecea7fdbbc57a86403,2020-02-07 13:55:00 UTC,heat,hold,670,671,671,WA,issaquah,25,False,False,False,Gas
1,d1ec16fcba08010e495c86a3f835553b6af805c8,2020-02-14 15:45:00 UTC,heat,auto,624,687,632,WA,Shoreline,5,False,False,False,Gas
2,17f8b089fac2e973d182b4ef01ddbff574b63268,2020-02-05 13:00:00 UTC,auto,hold,691,765,695,WA,Prosser,9,False,False,True,Electric
3,a7c1d752109a7cf21b5a387be531f6a3d749bc1d,2020-02-11 18:55:00 UTC,auto,hold,704,757,707,WA,Longview,0,True,False,True,Electric
4,c184709ee30fbd739d5d1af3d9f4e95336e33f75,2020-02-13 15:05:00 UTC,auto,hold,668,723,673,WA,Rainier,50,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
554429,2db9bb8fd78474acbb163d049dc01c8887b2994e,2020-02-23 07:15:00 UTC,heat,hold,739,740,740,WA,Spokane Valley,50,True,False,False,Gas
554430,2db9bb8fd78474acbb163d049dc01c8887b2994e,2020-02-15 16:20:00 UTC,heat,hold,738,740,740,WA,Spokane Valley,50,True,False,False,Gas
554431,2db9bb8fd78474acbb163d049dc01c8887b2994e,2020-02-09 15:50:00 UTC,heat,hold,748,750,750,WA,Spokane Valley,50,True,False,False,Gas
554432,2db9bb8fd78474acbb163d049dc01c8887b2994e,2020-02-09 15:25:00 UTC,heat,hold,746,750,750,WA,Spokane Valley,50,True,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/WA/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/WA-day/2021-feb-day-WA.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1324facead2e9c0d0dc8961bd772bbde65007078,2021-02-08 16:05:00 UTC,heat,hold,663,669,669,WA,Ellensburg,40,False,False,False,Gas
1,93949aace64e11360a850909aceeafd6d6f37900,2021-02-11 13:40:00 UTC,heat,hold,689,693,693,WA,Washougal,15,False,False,False,Gas
2,d087372e33a36ab54464198f8a69d5d7f799efae,2021-02-03 16:20:00 UTC,heat,hold,689,698,698,WA,,0,True,False,True,Electric
3,665aab2d94b6161ef9c4ff9f973d7453c83f3946,2021-02-21 16:40:00 UTC,auto,hold,710,775,705,WA,Sumner,0,False,False,False,Gas
4,93949aace64e11360a850909aceeafd6d6f37900,2021-02-09 17:15:00 UTC,heat,hold,689,693,693,WA,Washougal,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
322549,9fd474dd68222973e5214b8f5f3f394633562a5e,2021-02-05 19:30:00 UTC,heat,hold,706,710,710,WA,Spokane Valley,10,False,False,False,Gas
322550,9fd474dd68222973e5214b8f5f3f394633562a5e,2021-02-01 17:00:00 UTC,heat,hold,709,710,710,WA,Spokane Valley,10,False,False,False,Gas
322551,9fd474dd68222973e5214b8f5f3f394633562a5e,2021-02-09 15:05:00 UTC,heat,hold,708,710,710,WA,Spokane Valley,10,False,False,False,Gas
322552,9fd474dd68222973e5214b8f5f3f394633562a5e,2021-02-07 17:15:00 UTC,heat,hold,716,710,710,WA,Spokane Valley,10,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/WA/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WA/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WA_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WA/feb/" + file)
    WA_feb = pd.concat([WA_feb, df])
    
WA_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,019a1d7273650d66544a79b146b515ddfa3f0531,feb,2017,heat,hold,Vancouver,697.875000,655.000000,655.000000,5.0,False,False,True
1,02890276e16188aa7cdbc5cf916e47185c35da9a,feb,2017,heat,auto,Lynnwood,732.891429,780.000000,747.481429,5.0,False,False,False
2,02890276e16188aa7cdbc5cf916e47185c35da9a,feb,2017,heat,hold,Lynnwood,725.197740,782.028249,750.853107,5.0,False,False,False
3,042f2f6da4906ec405634547472d598154b6e3fc,feb,2017,heat,auto,West Richland,668.858044,683.873817,666.561514,25.0,False,False,True
4,042f2f6da4906ec405634547472d598154b6e3fc,feb,2017,heat,hold,West Richland,644.500000,680.000000,680.000000,25.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
773,fcb7d0ea0f57fb7544aadbc0e31620d53e44db8a,feb,2021,heat,hold,Colbert,696.304348,699.130435,699.081285,20.0,False,False,False
774,fe8591238eb60be06f35ae8dded14551efdfad35,feb,2021,heat,hold,Maple Valley,690.552632,700.000000,700.000000,20.0,False,False,False
775,feb039fb97c44493a5351770a1e1ae13d38959a5,feb,2021,heat,hold,Bellingham,706.000000,700.625000,699.541667,95.0,False,False,False
776,ff1c1e3cdbfca9a5a91b1255072b7cd642f7e278,feb,2021,heat,hold,Yakima,695.624579,699.202020,699.202020,25.0,False,False,False


In [67]:
WA_feb.to_csv("Scraper_Output/State_Month_Day/WA/WA_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/WA-day/2017-jun-day-WA.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,36318fcb544174abebcffdbd2e45415c9ac165fc,2017-06-21 18:05:00 UTC,auto,auto,697,717,697,WA,Duvall,5,False,False,False,Gas
1,77ef9e32b7da9b2c1bc0e90666fe7dc8f0eac3f6,2017-06-14 13:20:00 UTC,auto,auto,655,685,635,WA,Sequim,15,False,False,True,Electric
3,2cb6c68d39b98d3f2869f13f76687e5d7b6fa2cc,2017-06-24 15:10:00 UTC,cool,auto,706,810,722,WA,pullman,10,False,False,False,Gas
4,5e142f843711dbcd89bb4f5f476ac1633e6815d1,2017-06-22 18:00:00 UTC,auto,hold,716,755,705,WA,Lynden,27,False,False,False,Gas
5,4a7cb170f10ebad94b27de5f60114c70ffcc5f4c,2017-06-21 13:50:00 UTC,auto,auto,754,810,610,WA,Manson,0,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223751,b49e68560dd35b0404964a0ee57127d0d27ef396,2017-06-06 14:10:00 UTC,cool,auto,714,750,670,WA,Spokane Valley,20,False,False,False,Gas
223752,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-06-20 16:55:00 UTC,auto,auto,753,750,700,WA,Spokane Valley,40,False,False,False,Gas
223753,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-06-20 16:20:00 UTC,auto,auto,752,750,700,WA,Spokane Valley,40,False,False,False,Gas
223754,b49e68560dd35b0404964a0ee57127d0d27ef396,2017-06-06 16:20:00 UTC,cool,auto,719,750,670,WA,Spokane Valley,20,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/WA/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/WA-day/2018-jun-day-WA.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a7c1d752109a7cf21b5a387be531f6a3d749bc1d,2018-06-05 16:30:00 UTC,cool,hold,694,685,685,WA,Longview,0,True,False,True,Electric
1,14f13b7870a2feafcd280f83960e081e40f12f68,2018-06-30 19:40:00 UTC,auto,hold,685,755,685,WA,Mountlake Terrace,30,False,False,False,Gas
2,a7c1d752109a7cf21b5a387be531f6a3d749bc1d,2018-06-18 19:05:00 UTC,cool,hold,680,675,675,WA,Longview,0,True,False,True,Electric
3,3ef8629833e2ab64c631cd2dd696e7bf6801d118,2018-06-23 19:25:00 UTC,auto,hold,730,765,695,WA,edmonds,5,True,False,False,Gas
4,d74a7bff7ea9ce1b6b961560a106f920f5fc4254,2018-06-13 15:30:00 UTC,cool,hold,693,699,699,WA,Lacey,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
379321,0c934c0fae5270491f44ada0ec919994dc79c6c0,2018-06-02 15:25:00 UTC,auto,auto,714,760,710,WA,Spokane Valley,40,False,False,False,Gas
379322,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2018-06-29 16:55:00 UTC,cool,hold,737,760,760,WA,Spokane Valley,47,False,False,False,Gas
379323,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2018-06-25 15:20:00 UTC,cool,hold,734,760,760,WA,Spokane Valley,47,False,False,False,Gas
379324,0c934c0fae5270491f44ada0ec919994dc79c6c0,2018-06-10 15:25:00 UTC,auto,auto,713,760,710,WA,Spokane Valley,40,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/WA/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/WA-day/2019-jun-day-WA.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6ecccbddce03e906727d6849755f10e270854a61,2019-06-21 17:55:00 UTC,cool,auto,669,685,635,WA,Kalama,17,False,False,False,Gas
1,6d7bc3daad173947535fc68f4321ed5fbcd8f65d,2019-06-13 14:55:00 UTC,auto,hold,680,685,625,WA,Fife,0,True,False,False,Gas
2,59944f0a9aea4e4c5f6fa6c031c2ade3127a62d7,2019-06-23 16:30:00 UTC,auto,hold,718,830,700,WA,Spokane valley,35,False,False,False,Gas
3,31fe11ab0b12531660a2e2e7882f5b691c93eff5,2019-06-19 11:05:00 UTC,heat,auto,650,754,640,WA,Yelm,0,False,False,False,Gas
5,665aab2d94b6161ef9c4ff9f973d7453c83f3946,2019-06-15 15:00:00 UTC,auto,hold,708,743,693,WA,Sumner,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
577122,11c690e515870873de6c58b49aa9bbbd473be566,2019-06-19 17:50:00 UTC,heat,hold,690,670,670,WA,Seattle,79,True,False,False,Gas
577123,a3b5c1ff280d59051d495065da69a20817dffd49,2019-06-26 18:10:00 UTC,heat,hold,686,670,670,WA,Seattle,110,False,False,False,Gas
577124,a3b5c1ff280d59051d495065da69a20817dffd49,2019-06-28 14:40:00 UTC,heat,hold,705,670,670,WA,Seattle,110,False,False,False,Gas
577125,a3b5c1ff280d59051d495065da69a20817dffd49,2019-06-28 15:55:00 UTC,heat,hold,703,670,670,WA,Seattle,110,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/WA/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/WA-day/2020-jun-day-WA.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6b0b743715b567761bebdff3bed54e65568bff4e,2020-06-25 19:50:00 UTC,auto,hold,712,712,652,WA,North Lynnwood,20,True,False,False,Gas
1,607b4876e05e51451d60f12907138e04c99faed0,2020-06-28 16:00:00 UTC,heat,hold,694,689,689,WA,Shoreline,57,False,False,False,Gas
2,6f6c16cbfe77d7ebf9d680277566dee999e0d4d5,2020-06-30 16:25:00 UTC,auto,hold,719,716,666,WA,Selah,5,False,False,False,Gas
3,5e142f843711dbcd89bb4f5f476ac1633e6815d1,2020-06-08 18:15:00 UTC,auto,auto,710,755,690,WA,Lynden,27,False,False,False,Gas
4,50fb39f63dbf408f1f79357a2ae7660742cc2f01,2020-06-19 18:45:00 UTC,auto,hold,733,733,683,WA,Klahanie,30,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
558965,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2020-06-26 19:15:00 UTC,cool,hold,769,760,760,WA,Spokane Valley,47,False,False,False,Gas
558966,b7ca95b07ce36a6a071763cd5db4038726c39688,2020-06-12 15:55:00 UTC,auto,auto,737,760,650,WA,Spokane Valley,39,False,False,False,Gas
558967,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2020-06-25 19:05:00 UTC,cool,auto,753,760,760,WA,Spokane Valley,47,False,False,False,Gas
558968,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2020-06-05 15:25:00 UTC,cool,hold,687,760,760,WA,Spokane Valley,47,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/WA/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/WA-day/2021-jun-day-WA.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2aa503669d32eeb4feb50b35883503ba279ce313,2021-06-29 17:00:00 UTC,auto,hold,756,766,696,WA,Greenacres,10,False,False,False,Gas
1,607b4876e05e51451d60f12907138e04c99faed0,2021-06-15 15:50:00 UTC,heat,hold,696,698,698,WA,Shoreline,57,False,False,False,Gas
2,c184709ee30fbd739d5d1af3d9f4e95336e33f75,2021-06-13 16:30:00 UTC,auto,hold,716,723,643,WA,Rainier,50,False,False,True,Electric
3,d087372e33a36ab54464198f8a69d5d7f799efae,2021-06-28 14:45:00 UTC,cool,hold,653,651,651,WA,,0,True,False,True,Electric
4,2968d640576fa151a8a4bae2c9a445efd1cd3de1,2021-06-22 16:15:00 UTC,auto,hold,694,697,647,WA,Buckley,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
357933,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2021-06-25 17:15:00 UTC,auto,hold,738,760,710,WA,Spokane Valley,47,False,False,False,Gas
357934,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2021-06-23 18:25:00 UTC,auto,hold,738,760,710,WA,Spokane Valley,47,False,False,False,Gas
357935,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2021-06-26 16:25:00 UTC,auto,hold,742,760,710,WA,Spokane Valley,47,False,False,False,Gas
357936,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2021-06-23 15:10:00 UTC,auto,hold,731,760,710,WA,Spokane Valley,47,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/WA/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WA/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WA_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WA/jun/" + file)
    WA_jun = pd.concat([WA_jun, df])
    
WA_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,02f68c89b6dfd5538aec1991aad5ebc824d840e2,jun,2017,heat,auto,Seattle,682.755245,779.000000,680.000000,65.0,False,False,False
1,035f0c14a560e0656ad645cfa397bafc8e0b8bce,jun,2017,heat,hold,sammamish,693.392560,672.712382,660.719045,5.0,True,False,False
2,038f6de02ac57361e4c383448667f66eff4e81f9,jun,2017,cool,auto,Issaquah,696.750000,710.000000,613.583333,17.0,False,False,True
3,042f2f6da4906ec405634547472d598154b6e3fc,jun,2017,cool,auto,West Richland,739.262791,760.976744,651.989535,25.0,False,False,True
4,04449f62ff931a61cbb48eadd2757093f5c9d3af,jun,2017,auto,auto,Richland,723.469428,730.000000,650.000000,70.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
930,fe8591238eb60be06f35ae8dded14551efdfad35,jun,2021,heat,hold,Maple Valley,670.000000,677.400000,670.400000,20.0,False,False,False
931,feb039fb97c44493a5351770a1e1ae13d38959a5,jun,2021,heat,hold,Bellingham,695.207792,650.363636,640.047619,95.0,False,False,False
932,fef8934c83e6b29016ef5b68074fc2d5a4da07f0,jun,2021,cool,hold,Kennewick,733.960382,736.644901,736.597946,0.0,True,False,True
933,ff1c1e3cdbfca9a5a91b1255072b7cd642f7e278,jun,2021,cool,hold,Yakima,713.561385,719.325103,719.325103,25.0,False,False,False


In [100]:
WA_jun.to_csv("Scraper_Output/State_Month_Day/WA/WA_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/WA-day/2017-jul-day-WA.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,00fb48d1cbd52525aa51333669e22d5eae06f262,2017-07-07 18:15:00 UTC,cool,hold,730,795,790,WA,Greenacres,50,False,False,False,Gas
3,5e142f843711dbcd89bb4f5f476ac1633e6815d1,2017-07-11 19:10:00 UTC,auto,hold,706,743,693,WA,Lynden,27,False,False,False,Gas
4,36318fcb544174abebcffdbd2e45415c9ac165fc,2017-07-21 19:10:00 UTC,auto,auto,701,717,696,WA,Duvall,5,False,False,False,Gas
6,36318fcb544174abebcffdbd2e45415c9ac165fc,2017-07-06 19:25:00 UTC,auto,auto,709,717,697,WA,Duvall,5,False,False,False,Gas
7,5e142f843711dbcd89bb4f5f476ac1633e6815d1,2017-07-19 17:45:00 UTC,auto,auto,724,706,656,WA,Lynden,27,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
244521,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-07-17 16:00:00 UTC,auto,auto,744,760,660,WA,Spokane Valley,40,False,False,False,Gas
244522,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-07-25 14:15:00 UTC,auto,auto,716,760,660,WA,Spokane Valley,40,False,False,False,Gas
244523,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-07-20 15:45:00 UTC,auto,auto,732,760,660,WA,Spokane Valley,40,False,False,False,Gas
244524,c0522086d24320d88a6de0cfc363c999e6a7d61e,2017-07-31 17:50:00 UTC,auto,auto,759,760,620,WA,Spokane Valley,5,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/WA/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/WA-day/2018-jul-day-WA.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,06069495ddfa64fe44855441e844467e819dc742,2018-07-17 19:10:00 UTC,auto,hold,730,732,670,WA,Maple Valley,15,True,False,False,Gas
1,d74a7bff7ea9ce1b6b961560a106f920f5fc4254,2018-07-14 15:25:00 UTC,cool,hold,690,709,709,WA,Lacey,0,False,False,True,Electric
2,a8f78becfdeafcd15e740373b6f71fe8a7e8b60d,2018-07-23 13:05:00 UTC,auto,auto,813,840,680,WA,Toppenish,17,True,False,True,Electric
3,2cb6c68d39b98d3f2869f13f76687e5d7b6fa2cc,2018-07-29 15:55:00 UTC,cool,auto,756,810,722,WA,pullman,10,False,False,False,Gas
4,a8f78becfdeafcd15e740373b6f71fe8a7e8b60d,2018-07-18 14:25:00 UTC,auto,auto,801,840,680,WA,Toppenish,17,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415607,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2018-07-31 15:10:00 UTC,cool,hold,746,760,760,WA,Spokane Valley,47,False,False,False,Gas
415608,0c934c0fae5270491f44ada0ec919994dc79c6c0,2018-07-11 15:00:00 UTC,auto,auto,725,760,680,WA,Spokane Valley,40,False,False,False,Gas
415609,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2018-07-03 13:35:00 UTC,cool,auto,716,760,650,WA,Spokane Valley,7,False,False,False,Gas
415610,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2018-07-15 19:45:00 UTC,cool,auto,736,760,650,WA,Spokane Valley,7,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/WA/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/WA-day/2019-jul-day-WA.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,806efb827c0eae0f5beea97ee97d02aba96920cd,2019-07-02 19:10:00 UTC,auto,hold,705,713,663,WA,Longview,10,True,False,True,Electric
1,a507c5ae8fc5894d8b75b7cc0f4cdc38ee1acf0a,2019-07-08 18:35:00 UTC,auto,hold,730,753,703,WA,Steilacoom,50,False,False,False,Gas
2,4a7cb170f10ebad94b27de5f60114c70ffcc5f4c,2019-07-15 09:55:00 UTC,cool,hold,726,742,742,WA,Manson,0,True,False,True,Electric
5,a507c5ae8fc5894d8b75b7cc0f4cdc38ee1acf0a,2019-07-27 15:20:00 UTC,auto,hold,752,703,623,WA,Steilacoom,50,False,False,False,Gas
6,6d7bc3daad173947535fc68f4321ed5fbcd8f65d,2019-07-16 17:20:00 UTC,auto,hold,689,685,625,WA,Fife,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
593774,9fd474dd68222973e5214b8f5f3f394633562a5e,2019-07-03 19:30:00 UTC,auto,auto,746,760,700,WA,Spokane Valley,10,False,False,False,Gas
593775,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2019-07-23 15:15:00 UTC,cool,hold,716,760,760,WA,Spokane Valley,47,False,False,False,Gas
593776,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2019-07-23 13:40:00 UTC,cool,hold,708,760,760,WA,Spokane Valley,47,False,False,False,Gas
593777,b7ca95b07ce36a6a071763cd5db4038726c39688,2019-07-22 19:10:00 UTC,cool,auto,740,760,760,WA,Spokane Valley,39,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/WA/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/WA-day/2020-jul-day-WA.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a56e1e8f35a7803368e77beea18a9f35da2de241,2020-07-30 15:05:00 UTC,cool,hold,777,795,790,WA,Leavenworth,20,True,False,True,Electric
1,607b4876e05e51451d60f12907138e04c99faed0,2020-07-05 15:35:00 UTC,heat,hold,687,689,689,WA,Shoreline,57,False,False,False,Gas
2,6b867254a7d2404c7d24b47135f9f6a41ff150c1,2020-07-27 15:20:00 UTC,auto,auto,680,686,630,WA,Kelso,20,False,False,True,Electric
3,4a7cb170f10ebad94b27de5f60114c70ffcc5f4c,2020-07-23 15:20:00 UTC,cool,hold,754,765,765,WA,Manson,0,True,False,True,Electric
4,25d90ac98d87776e64bb6d04d82909f97dd150ea,2020-07-27 13:35:00 UTC,cool,hold,723,774,774,WA,Moses Lake,5,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
572536,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2020-07-07 16:20:00 UTC,cool,auto,716,760,760,WA,Spokane Valley,47,False,False,False,Gas
572537,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2020-07-08 15:45:00 UTC,cool,auto,707,760,760,WA,Spokane Valley,47,False,False,False,Gas
572538,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2020-07-08 14:00:00 UTC,cool,auto,700,760,760,WA,Spokane Valley,47,False,False,False,Gas
572539,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2020-07-08 17:15:00 UTC,cool,auto,709,760,760,WA,Spokane Valley,47,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/WA/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/WA-day/2021-jul-day-WA.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7727cb56d25bb11ff9adbd0e2f0528f5f6767aed,2021-07-05 18:25:00 UTC,cool,hold,721,733,733,WA,Maple Valley,0,True,False,False,Gas
3,360d78e4c7af7940b12c7d6c1f46b528ff50582f,2021-07-17 19:45:00 UTC,auto,hold,767,815,765,WA,Duvall,18,False,False,False,Gas
4,856bf6c249de8e8e76d12d9656f857eef51e202d,2021-07-04 16:40:00 UTC,cool,hold,712,757,757,WA,Kingston,0,True,False,True,Electric
5,7727cb56d25bb11ff9adbd0e2f0528f5f6767aed,2021-07-03 19:00:00 UTC,cool,hold,733,733,733,WA,Maple Valley,0,True,False,False,Gas
6,6f6c16cbfe77d7ebf9d680277566dee999e0d4d5,2021-07-14 15:30:00 UTC,auto,hold,727,727,677,WA,Selah,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
289991,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2021-07-23 19:45:00 UTC,cool,hold,739,750,750,WA,Spokane Valley,47,False,False,False,Gas
290001,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2021-07-14 14:10:00 UTC,cool,hold,709,760,760,WA,Spokane Valley,47,False,False,False,Gas
290002,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2021-07-14 15:50:00 UTC,cool,hold,715,760,760,WA,Spokane Valley,47,False,False,False,Gas
290003,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2021-07-14 16:25:00 UTC,cool,hold,723,760,760,WA,Spokane Valley,47,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/WA/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WA/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WA_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WA/jul/" + file)
    WA_jul = pd.concat([WA_jul, df])
    
WA_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00fb48d1cbd52525aa51333669e22d5eae06f262,jul,2017,cool,hold,Greenacres,727.366972,792.183486,790.000000,50.0,False,False,False
1,02bcc0b8a0b37f0204b0d53afbe490b4f856b2bb,jul,2017,cool,hold,Richland,720.714286,720.000000,720.000000,70.0,True,False,True
2,038f6de02ac57361e4c383448667f66eff4e81f9,jul,2017,cool,hold,Issaquah,700.208333,700.416667,698.333333,17.0,False,False,True
3,042f2f6da4906ec405634547472d598154b6e3fc,jul,2017,cool,auto,West Richland,738.677596,739.677596,650.000000,25.0,False,False,True
4,04449f62ff931a61cbb48eadd2757093f5c9d3af,jul,2017,auto,hold,Richland,746.846154,749.249417,650.000000,70.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
560,fca85463bc93d6be856df0c32a4bce76a09d27b6,jul,2021,heat,hold,Bothell,695.333333,698.000000,700.666667,25.0,False,False,False
561,fcb7d0ea0f57fb7544aadbc0e31620d53e44db8a,jul,2021,auto,hold,Colbert,724.085470,731.346154,660.711538,20.0,False,False,False
562,fef8934c83e6b29016ef5b68074fc2d5a4da07f0,jul,2021,cool,hold,Kennewick,750.236480,757.266728,757.266728,0.0,True,False,True
563,ff1c1e3cdbfca9a5a91b1255072b7cd642f7e278,jul,2021,cool,hold,Yakima,719.565780,716.414634,716.414634,25.0,False,False,False


In [133]:
WA_jul.to_csv("Scraper_Output/State_Month_Day/WA/WA_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/WA-day/2017-aug-day-WA.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,78abf2ea6bdcf569be6e0675588ad3d5b1aa8cde,2017-08-13 18:30:00 UTC,auto,hold,732,764,694,WA,Graham,10,False,False,True,Electric
3,19f5ae392e4335e5b28f5e2762003325c04dd3e9,2017-08-19 13:45:00 UTC,auto,auto,684,685,635,WA,Colbert,0,False,False,False,Gas
4,5bb725bcc8dd210e0c3500ecea7fdbbc57a86403,2017-08-06 17:25:00 UTC,heat,hold,735,779,644,WA,issaquah,25,False,False,False,Gas
6,5bb725bcc8dd210e0c3500ecea7fdbbc57a86403,2017-08-06 14:55:00 UTC,heat,hold,720,779,644,WA,issaquah,25,False,False,False,Gas
7,36318fcb544174abebcffdbd2e45415c9ac165fc,2017-08-18 19:10:00 UTC,auto,auto,706,717,697,WA,Duvall,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
253296,c0522086d24320d88a6de0cfc363c999e6a7d61e,2017-08-18 15:45:00 UTC,cool,auto,725,760,620,WA,Spokane Valley,5,False,False,False,Gas
253297,c0522086d24320d88a6de0cfc363c999e6a7d61e,2017-08-20 18:35:00 UTC,auto,auto,723,760,620,WA,Spokane Valley,5,False,False,False,Gas
253298,c0522086d24320d88a6de0cfc363c999e6a7d61e,2017-08-09 15:10:00 UTC,auto,auto,732,760,620,WA,Spokane Valley,5,False,False,False,Gas
253299,c0522086d24320d88a6de0cfc363c999e6a7d61e,2017-08-19 14:35:00 UTC,cool,auto,731,760,620,WA,Spokane Valley,5,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/WA/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/WA-day/2018-aug-day-WA.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,135fe8e4c9cced46d18754ccce1efee55f08667a,2018-08-20 16:55:00 UTC,auto,auto,719,755,715,WA,Shelton,20,True,False,True,Electric
4,135fe8e4c9cced46d18754ccce1efee55f08667a,2018-08-25 16:15:00 UTC,auto,auto,701,748,708,WA,Shelton,20,True,False,True,Electric
5,6f905dab669493b777218abec1f282384fba57f7,2018-08-25 16:55:00 UTC,auto,hold,650,840,650,WA,Buckley,20,False,False,False,Gas
6,1389d87081f3336bea6e1f3f37a8d0fa465773d7,2018-08-15 17:20:00 UTC,auto,hold,670,696,646,WA,Kelso,0,True,False,True,Electric
9,135fe8e4c9cced46d18754ccce1efee55f08667a,2018-08-31 15:30:00 UTC,auto,auto,709,738,718,WA,Shelton,20,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
434135,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2018-08-05 15:30:00 UTC,cool,auto,748,760,650,WA,Spokane Valley,7,False,False,False,Gas
434136,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2018-08-18 15:00:00 UTC,cool,auto,750,760,650,WA,Spokane Valley,7,False,False,False,Gas
434137,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2018-08-13 13:40:00 UTC,cool,auto,737,760,650,WA,Spokane Valley,7,False,False,False,Gas
434138,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2018-08-29 12:55:00 UTC,cool,auto,693,760,650,WA,Spokane Valley,7,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/WA/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/WA-day/2019-aug-day-WA.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2234a1cc57e3f3272badb2d68217b38881bd0af7,2019-08-08 19:30:00 UTC,heat,hold,730,733,733,WA,Fircrest,40,False,False,False,Gas
3,c184709ee30fbd739d5d1af3d9f4e95336e33f75,2019-08-31 13:30:00 UTC,auto,hold,695,698,618,WA,Rainier,50,False,False,True,Electric
5,36318fcb544174abebcffdbd2e45415c9ac165fc,2019-08-15 19:15:00 UTC,auto,hold,699,698,678,WA,Duvall,5,False,False,False,Gas
6,411c8de7ca48740e170056d9f091ee69d2ab5456,2019-08-29 14:25:00 UTC,cool,hold,678,675,675,WA,University Place,59,True,False,False,Gas
7,d05718c39dcc73e214e29957ade540484e37aedb,2019-08-11 18:00:00 UTC,auto,hold,697,732,702,WA,Shelton,15,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
580672,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2019-08-09 15:25:00 UTC,cool,auto,732,760,760,WA,Spokane Valley,47,False,False,False,Gas
580673,9fd474dd68222973e5214b8f5f3f394633562a5e,2019-08-10 19:35:00 UTC,auto,hold,764,760,690,WA,Spokane Valley,10,False,False,False,Gas
580674,0c934c0fae5270491f44ada0ec919994dc79c6c0,2019-08-15 16:50:00 UTC,auto,auto,733,760,700,WA,Spokane Valley,40,False,False,False,Gas
580675,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2019-08-14 18:55:00 UTC,cool,hold,742,760,760,WA,Spokane Valley,47,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/WA/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/WA-day/2020-aug-day-WA.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,607b4876e05e51451d60f12907138e04c99faed0,2020-08-15 16:05:00 UTC,heat,hold,707,689,689,WA,Shoreline,57,False,False,False,Gas
2,a56e1e8f35a7803368e77beea18a9f35da2de241,2020-08-04 17:50:00 UTC,cool,hold,769,795,790,WA,Leavenworth,20,True,False,True,Electric
3,2bbc85849c4334c7efad2fea32c5dbee4c2fee3d,2020-08-30 17:15:00 UTC,cool,hold,707,658,646,WA,Dayton,57,False,False,False,Gas
4,c184709ee30fbd739d5d1af3d9f4e95336e33f75,2020-08-19 12:45:00 UTC,auto,hold,706,704,604,WA,Rainier,50,False,False,True,Electric
5,160ebcafca6c48f4aef05e37f83334afe9f2282e,2020-08-29 18:15:00 UTC,auto,hold,715,719,669,WA,Fife,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
557183,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2020-08-12 17:20:00 UTC,cool,auto,711,760,760,WA,Spokane Valley,47,False,False,False,Gas
557184,0c934c0fae5270491f44ada0ec919994dc79c6c0,2020-08-26 16:45:00 UTC,auto,auto,707,760,690,WA,Spokane Valley,40,False,False,False,Gas
557185,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2020-08-12 15:35:00 UTC,cool,auto,711,760,760,WA,Spokane Valley,47,False,False,False,Gas
557186,b5420ee9899999d0bf392fdf370f9683f8cf9ba9,2020-08-12 17:25:00 UTC,cool,auto,712,760,760,WA,Spokane Valley,47,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/WA/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WA/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WA_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WA/aug/" + file)
    WA_aug = pd.concat([WA_aug, df])
    
WA_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00fb48d1cbd52525aa51333669e22d5eae06f262,aug,2017,auto,hold,Greenacres,693.000000,657.000000,602.000000,50.0,False,False,False
1,019a1d7273650d66544a79b146b515ddfa3f0531,aug,2017,cool,hold,Vancouver,701.909091,715.727273,715.727273,5.0,False,False,True
2,02bcc0b8a0b37f0204b0d53afbe490b4f856b2bb,aug,2017,cool,auto,Richland,728.562500,720.000000,650.000000,70.0,True,False,True
3,038f6de02ac57361e4c383448667f66eff4e81f9,aug,2017,cool,auto,Issaquah,720.500000,690.000000,690.000000,17.0,False,False,True
4,038f6de02ac57361e4c383448667f66eff4e81f9,aug,2017,cool,hold,Issaquah,693.470588,687.294118,687.294118,17.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1087,fcb7d0ea0f57fb7544aadbc0e31620d53e44db8a,aug,2020,auto,hold,Colbert,728.868313,735.566667,650.000000,20.0,False,False,False
1088,fef8934c83e6b29016ef5b68074fc2d5a4da07f0,aug,2020,cool,hold,Kennewick,724.901985,730.000000,730.000000,0.0,True,False,True
1089,ff1c1e3cdbfca9a5a91b1255072b7cd642f7e278,aug,2020,auto,hold,Yakima,740.007218,763.828740,687.930446,25.0,False,False,False
1090,ff9d09ab72e971e34656b99dd7c9149bf12f7abe,aug,2020,auto,auto,Issaquah,762.409091,763.636364,710.000000,10.0,False,False,False


In [160]:
WA_aug.to_csv("Scraper_Output/State_Month_Day/WA/WA_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/WA-day/2017-dec-day-WA.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2968d640576fa151a8a4bae2c9a445efd1cd3de1,2017-12-24 07:10:00 UTC,auto,hold,704,755,705,WA,Buckley,5,False,False,False,Gas
1,bf856ad3b32ba9c8fb96f74d4dc0d3dadf5518ee,2017-12-21 07:20:00 UTC,heat,hold,714,719,719,WA,Fircrest,40,False,False,False,Gas
2,2234a1cc57e3f3272badb2d68217b38881bd0af7,2017-12-19 13:35:00 UTC,heat,hold,731,731,731,WA,Fircrest,40,False,False,False,Gas
3,a8f78becfdeafcd15e740373b6f71fe8a7e8b60d,2017-12-16 19:20:00 UTC,auto,auto,687,840,640,WA,Toppenish,17,True,False,True,Electric
4,2234a1cc57e3f3272badb2d68217b38881bd0af7,2017-12-19 19:05:00 UTC,heat,hold,730,731,731,WA,Fircrest,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
391847,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2017-12-22 14:35:00 UTC,heat,auto,662,760,660,WA,Spokane Valley,7,False,False,False,Gas
391848,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-12-24 15:35:00 UTC,auto,auto,694,760,710,WA,Spokane Valley,40,False,False,False,Gas
391849,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2017-12-08 14:35:00 UTC,heat,auto,681,760,680,WA,Spokane Valley,7,False,False,False,Gas
391850,0c934c0fae5270491f44ada0ec919994dc79c6c0,2017-12-24 16:45:00 UTC,auto,auto,712,760,710,WA,Spokane Valley,40,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/WA/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/WA-day/2018-dec-day-WA.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2234a1cc57e3f3272badb2d68217b38881bd0af7,2018-12-30 07:10:00 UTC,heat,hold,740,742,742,WA,Fircrest,40,False,False,False,Gas
1,59d38005a8042f63f116c018cde42bdcd5ad9334,2018-12-19 07:15:00 UTC,heat,hold,754,756,756,WA,Sumner,0,False,False,False,Gas
2,982d73694a28f9128af91f76cc248509be1e7a06,2018-12-18 14:30:00 UTC,heat,auto,673,707,670,WA,Moxee,5,False,False,False,Gas
3,31fe11ab0b12531660a2e2e7882f5b691c93eff5,2018-12-31 18:50:00 UTC,heat,auto,655,801,720,WA,Yelm,0,False,False,False,Gas
4,6f6c16cbfe77d7ebf9d680277566dee999e0d4d5,2018-12-12 19:25:00 UTC,auto,hold,713,766,716,WA,Selah,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
574926,ab49fc85312897420c3f1f9eba9e0a303f3dc559,2018-12-22 16:00:00 UTC,auto,auto,652,760,650,WA,Spokane Valley,7,False,False,False,Gas
574927,0c934c0fae5270491f44ada0ec919994dc79c6c0,2018-12-04 17:55:00 UTC,auto,auto,705,760,710,WA,Spokane Valley,40,False,False,False,Gas
574928,0c934c0fae5270491f44ada0ec919994dc79c6c0,2018-12-08 17:40:00 UTC,auto,auto,699,760,710,WA,Spokane Valley,40,False,False,False,Gas
574929,0c934c0fae5270491f44ada0ec919994dc79c6c0,2018-12-27 17:25:00 UTC,auto,auto,700,760,710,WA,Spokane Valley,40,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/WA/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/WA-day/2019-dec-day-WA.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0bd80b780d9ddea1ab114b066ceebba8e12dceb8,2019-12-06 19:40:00 UTC,heat,hold,714,717,717,WA,Lake Tapps,10,False,False,False,Gas
1,d087372e33a36ab54464198f8a69d5d7f799efae,2019-12-10 17:30:00 UTC,heat,hold,689,658,658,WA,,0,True,False,True,Electric
2,c184709ee30fbd739d5d1af3d9f4e95336e33f75,2019-12-29 16:15:00 UTC,auto,hold,705,763,713,WA,Rainier,50,False,False,True,Electric
4,a84d9f4b2c7693647f53816e6a407dc8bcbcada9,2019-12-08 18:40:00 UTC,heat,hold,699,698,698,WA,Mill Creek,0,False,False,False,Gas
5,5e142f843711dbcd89bb4f5f476ac1633e6815d1,2019-12-05 18:00:00 UTC,auto,auto,716,755,690,WA,Lynden,27,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
623355,2db9bb8fd78474acbb163d049dc01c8887b2994e,2019-12-31 17:40:00 UTC,heat,hold,701,740,740,WA,Spokane Valley,50,True,False,False,Gas
623356,5f8e08c092689466ca938f8e22aecde60063d84b,2019-12-03 15:10:00 UTC,heat,auto,687,750,650,WA,Spokane Valley,40,True,False,False,Gas
623357,5f8e08c092689466ca938f8e22aecde60063d84b,2019-12-30 07:25:00 UTC,heat,auto,684,750,677,WA,Spokane Valley,40,True,False,False,Gas
623358,5f8e08c092689466ca938f8e22aecde60063d84b,2019-12-28 17:45:00 UTC,heat,auto,674,750,660,WA,Spokane Valley,40,True,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/WA/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/WA-day/2020-dec-day-WA.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,93949aace64e11360a850909aceeafd6d6f37900,2020-12-26 17:25:00 UTC,heat,hold,637,693,693,WA,Washougal,15,False,False,False,Gas
1,fcb7d0ea0f57fb7544aadbc0e31620d53e44db8a,2020-12-05 17:45:00 UTC,heat,hold,701,702,702,WA,Colbert,20,False,False,False,Gas
3,93949aace64e11360a850909aceeafd6d6f37900,2020-12-31 14:15:00 UTC,heat,hold,657,681,660,WA,Washougal,15,False,False,False,Gas
4,9f93be13a3d66b0f019c730e48b8974cd8703bf9,2020-12-11 16:00:00 UTC,auto,hold,696,747,697,WA,Centralia,90,False,False,False,Gas
6,982d73694a28f9128af91f76cc248509be1e7a06,2020-12-03 14:40:00 UTC,heat,auto,673,727,680,WA,Moxee,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
548343,c0522086d24320d88a6de0cfc363c999e6a7d61e,2020-12-01 12:25:00 UTC,heat,auto,745,750,750,WA,Spokane Valley,5,False,False,False,Gas
548344,c0522086d24320d88a6de0cfc363c999e6a7d61e,2020-12-01 08:05:00 UTC,heat,auto,751,750,750,WA,Spokane Valley,5,False,False,False,Gas
548345,c0522086d24320d88a6de0cfc363c999e6a7d61e,2020-12-01 07:30:00 UTC,heat,auto,746,750,750,WA,Spokane Valley,5,False,False,False,Gas
548346,c0522086d24320d88a6de0cfc363c999e6a7d61e,2020-12-01 11:10:00 UTC,heat,auto,745,750,750,WA,Spokane Valley,5,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/WA/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WA/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WA_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WA/dec/" + file)
    WA_dec = pd.concat([WA_dec, df])
    
WA_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,019a1d7273650d66544a79b146b515ddfa3f0531,dec,2017,heat,hold,Vancouver,684.434783,721.043478,719.695652,5.0,False,False,True
1,0232fcb7383720194f81e9ed746d72364c2b60b6,dec,2017,auto,hold,Seattle,684.000000,771.000000,690.000000,10.0,False,False,False
2,0232fcb7383720194f81e9ed746d72364c2b60b6,dec,2017,cool,auto,Seattle,711.000000,682.000000,682.000000,10.0,False,False,False
3,0232fcb7383720194f81e9ed746d72364c2b60b6,dec,2017,cool,hold,Seattle,698.750000,691.625000,691.625000,10.0,False,False,False
4,0232fcb7383720194f81e9ed746d72364c2b60b6,dec,2017,heat,auto,Seattle,689.500000,701.366667,698.933333,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1338,fe7fec41cb28daf1bda70c75e42106ddf1526a4c,dec,2020,heat,auto,Issaquah,682.721053,771.197368,683.157895,57.0,False,False,False
1339,fe8591238eb60be06f35ae8dded14551efdfad35,dec,2020,heat,hold,Maple Valley,661.305556,700.791667,698.159722,20.0,False,False,False
1340,feb039fb97c44493a5351770a1e1ae13d38959a5,dec,2020,heat,hold,Bellingham,616.142857,732.142857,725.285714,95.0,False,False,False
1341,ff1c1e3cdbfca9a5a91b1255072b7cd642f7e278,dec,2020,heat,hold,Yakima,695.232819,699.045348,699.045348,25.0,False,False,False


In [187]:
WA_dec.to_csv("Scraper_Output/State_Month_Day/WA/WA_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/WA/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WA_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/WA/" + file)
    WA_all = pd.concat([WA_all, df])
    
WA_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00fb48d1cbd52525aa51333669e22d5eae06f262,aug,2017,auto,hold,Greenacres,693.000000,657.000000,602.000000,50.0,False,False,False
1,019a1d7273650d66544a79b146b515ddfa3f0531,aug,2017,cool,hold,Vancouver,701.909091,715.727273,715.727273,5.0,False,False,True
2,02bcc0b8a0b37f0204b0d53afbe490b4f856b2bb,aug,2017,cool,auto,Richland,728.562500,720.000000,650.000000,70.0,True,False,True
3,038f6de02ac57361e4c383448667f66eff4e81f9,aug,2017,cool,auto,Issaquah,720.500000,690.000000,690.000000,17.0,False,False,True
4,038f6de02ac57361e4c383448667f66eff4e81f9,aug,2017,cool,hold,Issaquah,693.470588,687.294118,687.294118,17.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5276,fe8591238eb60be06f35ae8dded14551efdfad35,jun,2021,heat,hold,Maple Valley,670.000000,677.400000,670.400000,20.0,False,False,False
5277,feb039fb97c44493a5351770a1e1ae13d38959a5,jun,2021,heat,hold,Bellingham,695.207792,650.363636,640.047619,95.0,False,False,False
5278,fef8934c83e6b29016ef5b68074fc2d5a4da07f0,jun,2021,cool,hold,Kennewick,733.960382,736.644901,736.597946,0.0,True,False,True
5279,ff1c1e3cdbfca9a5a91b1255072b7cd642f7e278,jun,2021,cool,hold,Yakima,713.561385,719.325103,719.325103,25.0,False,False,False


In [190]:
WA_all.to_csv("Scraper_Output/State_Month_Day/WA_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mWAe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['WA']
Unique jan_2018: ['WA']
Unique jan_2019: ['WA']
Unique jan_2020: ['WA']
Unique jan_2021: ['WA']
Unique feb_2017: ['WA']
Unique feb_2018: ['WA']
Unique feb_2019: ['WA']
Unique feb_2020: ['WA']
Unique feb_2021: ['WA']
Unique jun_2017: ['WA']
Unique jun_2018: ['WA']
Unique jun_2019: ['WA']
Unique jun_2020: ['WA']
Unique jun_2021: ['WA']
Unique jul_2017: ['WA']
Unique jul_2018: ['WA']
Unique jul_2019: ['WA']
Unique jul_2020: ['WA']
Unique jul_2021: ['WA']
Unique aug_2017: ['WA']
Unique aug_2018: ['WA']
Unique aug_2019: ['WA']
Unique aug_2020: ['WA']
Unique dec_2017: ['WA']
Unique dec_2018: ['WA']
Unique dec_2019: ['WA']
Unique dec_2020: ['WA']
