# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/OR-day/2017-jan-day-OR.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dccaf065befac19f089838bd56e3d6ca78f374c6,2017-01-30 15:40:00 UTC,heat,hold,695,689,689,OR,corvallis,5,False,False,False,Gas
1,dccaf065befac19f089838bd56e3d6ca78f374c6,2017-01-28 19:35:00 UTC,heat,hold,688,689,689,OR,corvallis,5,False,False,False,Gas
2,dccaf065befac19f089838bd56e3d6ca78f374c6,2017-01-25 14:50:00 UTC,heat,hold,685,689,689,OR,corvallis,5,False,False,False,Gas
3,dccaf065befac19f089838bd56e3d6ca78f374c6,2017-01-07 18:30:00 UTC,heat,hold,696,698,698,OR,corvallis,5,False,False,False,Gas
4,dccaf065befac19f089838bd56e3d6ca78f374c6,2017-01-12 17:50:00 UTC,heat,auto,704,698,698,OR,corvallis,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115544,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-01-07 16:30:00 UTC,heat,hold,706,700,700,OR,forest grove,0,False,False,False,Gas
115545,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-01-03 16:20:00 UTC,heat,hold,712,710,710,OR,forest grove,0,False,False,False,Gas
115546,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-01-22 18:30:00 UTC,heat,hold,705,710,710,OR,forest grove,0,False,False,False,Gas
115547,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-01-22 19:05:00 UTC,heat,hold,704,710,710,OR,forest grove,0,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
03caad454a66062fc56b9128d6de2ca5ae4b7996,Jan,2017,heat,hold,Portland,669.295455,670.000000,670.000000,5.0,False,False,False
05923e8afce99ca323843d899da2f0c48ba57cec,Jan,2017,heat,hold,Happy Valley,658.655303,655.276515,645.196970,10.0,False,False,False
0624dc8fec23932f314f9e0f97a6c7fb059446b2,Jan,2017,auto,auto,Portland,719.402655,786.942478,725.176991,0.0,False,False,False
0624dc8fec23932f314f9e0f97a6c7fb059446b2,Jan,2017,auto,hold,Portland,724.582598,796.373310,732.052910,0.0,False,False,False
0d0cf9141a32e41f86fe6fd7539c44bdfb535d74,Jan,2017,auxHeatOnly,hold,Coos Bay,669.458333,650.000000,640.000000,75.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...
fd8e322cdf93026aa75f2e65d3a8ffe6a4cdeb59,Jan,2017,heat,auto,Roseburg,669.812500,780.000000,700.000000,20.0,False,False,True
fd8e322cdf93026aa75f2e65d3a8ffe6a4cdeb59,Jan,2017,heat,hold,Roseburg,684.888889,683.361111,679.777778,20.0,False,False,True
fe1baa9f35e9f8ec94a0cd7e1e93b43435a7ee8a,Jan,2017,heat,hold,Albany,684.099237,688.633588,687.961832,120.0,False,False,False
ff17c244fe42179fd802221bc9b6c177b3ad7d3e,Jan,2017,heat,auto,Beaverton,659.113689,819.415313,620.459397,5.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/OR/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/OR-day/2018-jan-day-OR.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,53a8afb9c2869da7d84a186b864ea3c571c2646c,2018-01-16 18:00:00 UTC,heat,auto,694,719,699,OR,Sheridan,10,False,False,True,Electric
6,78941a9bfdcd014723b1a9ddf3734e555ecea5ab,2018-01-20 17:10:00 UTC,heat,hold,657,655,655,OR,The Dalles,55,True,False,False,Gas
11,78941a9bfdcd014723b1a9ddf3734e555ecea5ab,2018-01-20 14:35:00 UTC,heat,hold,653,655,655,OR,The Dalles,55,True,False,False,Gas
12,78941a9bfdcd014723b1a9ddf3734e555ecea5ab,2018-01-20 14:55:00 UTC,heat,hold,653,655,655,OR,The Dalles,55,True,False,False,Gas
16,53a8afb9c2869da7d84a186b864ea3c571c2646c,2018-01-15 19:00:00 UTC,heat,auto,697,719,699,OR,Sheridan,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226620,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-01-28 17:50:00 UTC,heat,hold,707,700,700,OR,Klamath Falls,29,True,False,False,Gas
226621,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-01-23 19:50:00 UTC,heat,hold,696,700,700,OR,Klamath Falls,29,True,False,False,Gas
226622,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-01-18 18:15:00 UTC,heat,hold,701,700,700,OR,Klamath Falls,29,True,False,False,Gas
226623,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-01-23 17:50:00 UTC,heat,hold,689,700,700,OR,Klamath Falls,29,True,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/OR/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/OR-day/2019-jan-day-OR.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6c112fdef3cc591bfc328e7b5f040668e08165ca,2019-01-11 17:55:00 UTC,heat,auto,756,761,761,OR,North Plains,0,True,False,False,Gas
1,3ba502441ef84b6c417047c5cbfb88e7fa1776c2,2019-01-09 15:50:00 UTC,auto,hold,699,752,702,OR,Roseburg,90,False,False,False,Gas
3,d0fd4191a6b97277a0e5ad601efac94c0b9b3bb6,2019-01-03 17:00:00 UTC,auto,hold,648,840,650,OR,Elkton,40,True,False,True,Electric
7,0f6950236a3fc5e05032d841d5e3dd47e144ef02,2019-01-06 17:25:00 UTC,heat,auto,605,779,608,OR,PORTLAND,7,False,False,True,Electric
8,d667da3526d79b5b1165e06da5ea6013ccfb3f13,2019-01-08 19:40:00 UTC,heat,auto,700,775,730,OR,Saint Helens,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
341786,69bce0cead73bc1724ae942dad09e3393da90e97,2019-01-20 18:20:00 UTC,heat,hold,679,725,685,OR,Klamath Falls,40,False,False,False,Gas
341787,69bce0cead73bc1724ae942dad09e3393da90e97,2019-01-20 17:50:00 UTC,heat,hold,687,725,685,OR,Klamath Falls,40,False,False,False,Gas
341788,69bce0cead73bc1724ae942dad09e3393da90e97,2019-01-26 18:00:00 UTC,heat,hold,686,725,685,OR,Klamath Falls,40,False,False,False,Gas
341789,69bce0cead73bc1724ae942dad09e3393da90e97,2019-01-26 18:05:00 UTC,heat,hold,683,725,685,OR,Klamath Falls,40,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/OR/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/OR-day/2020-jan-day-OR.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,9baeb67c105aa1a3213d6a3c4636f821257c2295,2020-01-10 15:20:00 UTC,auto,hold,662,785,675,OR,Irrigon,20,False,False,False,Gas
2,713f15c7203a2fc2488c73994d47d2077a399cec,2020-01-13 15:55:00 UTC,heat,hold,708,716,716,OR,Cornelius,0,True,False,False,Gas
3,9baeb67c105aa1a3213d6a3c4636f821257c2295,2020-01-06 14:15:00 UTC,auto,hold,700,775,715,OR,Irrigon,20,False,False,False,Gas
4,e214b1ed3bedd6d0a347f642ce888341b6bf025c,2020-01-11 15:25:00 UTC,heat,hold,716,722,722,OR,North Plains,0,False,False,False,Gas
5,53a8afb9c2869da7d84a186b864ea3c571c2646c,2020-01-08 15:35:00 UTC,heat,hold,679,685,685,OR,Sheridan,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
417444,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2020-01-12 19:20:00 UTC,heat,hold,714,720,720,OR,Klamath Falls,29,True,False,False,Gas
417445,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2020-01-07 07:00:00 UTC,heat,hold,727,730,730,OR,Klamath Falls,29,True,False,False,Gas
417446,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2020-01-07 07:10:00 UTC,heat,hold,728,730,730,OR,Klamath Falls,29,True,False,False,Gas
417447,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2020-01-07 07:05:00 UTC,heat,hold,730,730,730,OR,Klamath Falls,29,True,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/OR/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/OR-day/2021-jan-day-OR.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,93a6efa7b51384760480622702a81ca561056d45,2021-01-23 18:50:00 UTC,auto,hold,706,753,710,OR,Central Point,9,False,False,False,Gas
1,114702ca6a8da3f42947dd25a6d56a0e60c41541,2021-01-11 19:50:00 UTC,heat,hold,668,671,671,OR,Central point,55,False,False,False,Gas
2,93a6efa7b51384760480622702a81ca561056d45,2021-01-23 17:10:00 UTC,auto,hold,706,753,710,OR,Central Point,9,False,False,False,Gas
3,32dfd347f85241965e760bd0d5bbc8255dabc80b,2021-01-25 19:15:00 UTC,heat,hold,687,685,685,OR,Shedd,27,False,False,False,Gas
4,0d0cf9141a32e41f86fe6fd7539c44bdfb535d74,2021-01-08 17:20:00 UTC,heat,hold,695,691,691,OR,Coos Bay,75,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259157,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2021-01-13 07:10:00 UTC,heat,hold,729,730,730,OR,Klamath Falls,29,True,False,False,Gas
259158,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2021-01-01 07:15:00 UTC,heat,hold,723,730,730,OR,Klamath Falls,29,True,False,False,Gas
259159,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2021-01-19 07:05:00 UTC,heat,hold,723,730,730,OR,Klamath Falls,29,True,False,False,Gas
259160,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2021-01-12 07:00:00 UTC,heat,hold,722,730,730,OR,Klamath Falls,29,True,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/OR/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/OR/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
OR_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/OR/jan/" + file)
    OR_jan = pd.concat([OR_jan, df])
    
OR_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,03caad454a66062fc56b9128d6de2ca5ae4b7996,Jan,2017,heat,hold,Portland,669.295455,670.000000,670.000000,5.0,False,False,False
1,05923e8afce99ca323843d899da2f0c48ba57cec,Jan,2017,heat,hold,Happy Valley,658.655303,655.276515,645.196970,10.0,False,False,False
2,0624dc8fec23932f314f9e0f97a6c7fb059446b2,Jan,2017,auto,auto,Portland,719.402655,786.942478,725.176991,0.0,False,False,False
3,0624dc8fec23932f314f9e0f97a6c7fb059446b2,Jan,2017,auto,hold,Portland,724.582598,796.373310,732.052910,0.0,False,False,False
4,0d0cf9141a32e41f86fe6fd7539c44bdfb535d74,Jan,2017,auxHeatOnly,hold,Coos Bay,669.458333,650.000000,640.000000,75.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
506,fd66b47e4d45c1c2ce2da22c440620fb85bc3d90,Jan,2021,heat,hold,Salem,672.972603,673.150685,673.150685,0.0,True,False,True
507,fd8e322cdf93026aa75f2e65d3a8ffe6a4cdeb59,Jan,2021,heat,hold,Roseburg,686.418960,684.351682,683.761468,20.0,False,False,True
508,fe918247d6bdbd347613dd68f15c85893bb512da,Jan,2021,heat,hold,White City,714.785714,721.714286,718.857143,57.0,False,False,False
509,ff17c244fe42179fd802221bc9b6c177b3ad7d3e,Jan,2021,heat,hold,Beaverton,697.237288,702.796610,702.508475,5.0,False,False,False


In [34]:
OR_jan.to_csv("Scraper_Output/State_Month_Day/OR/OR_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/OR-day/2017-feb-day-OR.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dccaf065befac19f089838bd56e3d6ca78f374c6,2017-02-05 18:05:00 UTC,heat,hold,693,689,689,OR,corvallis,5,False,False,False,Gas
1,dccaf065befac19f089838bd56e3d6ca78f374c6,2017-02-16 14:40:00 UTC,heat,auto,692,689,689,OR,corvallis,5,False,False,False,Gas
2,dccaf065befac19f089838bd56e3d6ca78f374c6,2017-02-21 17:25:00 UTC,heat,auto,689,689,689,OR,corvallis,5,False,False,False,Gas
3,dccaf065befac19f089838bd56e3d6ca78f374c6,2017-02-23 15:55:00 UTC,heat,auto,693,689,689,OR,corvallis,5,False,False,False,Gas
4,dccaf065befac19f089838bd56e3d6ca78f374c6,2017-02-07 16:45:00 UTC,heat,hold,689,689,689,OR,corvallis,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95492,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-02-17 17:10:00 UTC,heat,hold,689,700,700,OR,forest grove,0,False,False,False,Gas
95493,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-02-03 14:30:00 UTC,heat,hold,696,700,700,OR,forest grove,0,False,False,False,Gas
95494,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-02-04 15:25:00 UTC,heat,hold,705,700,700,OR,forest grove,0,False,False,False,Gas
95495,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-02-04 16:15:00 UTC,heat,hold,700,700,700,OR,forest grove,0,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/OR/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/OR-day/2018-feb-day-OR.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,78941a9bfdcd014723b1a9ddf3734e555ecea5ab,2018-02-09 15:05:00 UTC,heat,hold,660,640,640,OR,The Dalles,55,True,False,False,Gas
1,78941a9bfdcd014723b1a9ddf3734e555ecea5ab,2018-02-09 17:45:00 UTC,heat,hold,654,640,640,OR,The Dalles,55,True,False,False,Gas
6,78941a9bfdcd014723b1a9ddf3734e555ecea5ab,2018-02-11 16:05:00 UTC,heat,hold,640,640,640,OR,The Dalles,55,True,False,False,Gas
8,c4640a78e89b6b196e0f0b8b91cbe978b502f0d7,2018-02-14 19:30:00 UTC,auto,auto,661,701,651,OR,Eagle Point,16,False,False,False,Gas
9,cd71171c504caf17e5e4786c93311b9ff08bdf66,2018-02-27 17:05:00 UTC,heat,auto,715,754,711,OR,Scappoose,27,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
208375,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-02-18 07:20:00 UTC,heat,hold,694,700,700,OR,Klamath Falls,29,True,False,False,Gas
208376,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-02-17 07:10:00 UTC,heat,hold,697,700,700,OR,Klamath Falls,29,True,False,False,Gas
208377,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-02-18 07:15:00 UTC,heat,hold,696,700,700,OR,Klamath Falls,29,True,False,False,Gas
208378,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-02-18 07:00:00 UTC,heat,hold,700,700,700,OR,Klamath Falls,29,True,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/OR/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/OR-day/2019-feb-day-OR.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,1c927546884d866d0498c730035be2e4464967b2,2019-02-26 19:25:00 UTC,heat,hold,651,651,651,OR,Lincoln City,0,False,False,False,Gas
2,3ba502441ef84b6c417047c5cbfb88e7fa1776c2,2019-02-01 18:35:00 UTC,auto,hold,707,762,712,OR,Roseburg,90,False,False,False,Gas
3,d667da3526d79b5b1165e06da5ea6013ccfb3f13,2019-02-20 19:40:00 UTC,heat,auto,740,775,750,OR,Saint Helens,5,False,False,False,Gas
5,3ba502441ef84b6c417047c5cbfb88e7fa1776c2,2019-02-24 18:05:00 UTC,auto,hold,699,752,702,OR,Roseburg,90,False,False,False,Gas
6,1c927546884d866d0498c730035be2e4464967b2,2019-02-22 15:10:00 UTC,heat,hold,647,651,651,OR,Lincoln City,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250395,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-02-16 19:35:00 UTC,heat,hold,709,710,710,OR,Klamath Falls,29,True,False,False,Gas
250396,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-02-17 07:15:00 UTC,heat,hold,716,720,720,OR,Klamath Falls,29,True,False,False,Gas
250397,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-02-17 07:05:00 UTC,heat,hold,718,720,720,OR,Klamath Falls,29,True,False,False,Gas
250398,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-02-10 16:50:00 UTC,heat,hold,704,720,720,OR,Klamath Falls,29,True,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/OR/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/OR-day/2020-feb-day-OR.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fbd0793fdbaad09b9358ba988d5d27c2a4ea9c0b,2020-02-13 15:50:00 UTC,auto,hold,644,824,662,OR,Newberg,60,True,False,True,Electric
1,9baeb67c105aa1a3213d6a3c4636f821257c2295,2020-02-28 17:40:00 UTC,auto,hold,721,775,725,OR,Irrigon,20,False,False,False,Gas
2,53a8afb9c2869da7d84a186b864ea3c571c2646c,2020-02-17 10:05:00 UTC,heat,hold,701,699,699,OR,Sheridan,10,False,False,True,Electric
3,0f6950236a3fc5e05032d841d5e3dd47e144ef02,2020-02-01 16:30:00 UTC,heat,auto,648,781,653,OR,PORTLAND,7,False,False,True,Electric
5,53a8afb9c2869da7d84a186b864ea3c571c2646c,2020-02-19 09:10:00 UTC,heat,hold,693,699,699,OR,Sheridan,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
361640,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2020-02-08 07:00:00 UTC,heat,hold,720,720,720,OR,Klamath Falls,29,True,False,False,Gas
361641,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2020-02-14 07:25:00 UTC,heat,hold,714,725,716,OR,Klamath Falls,29,True,False,False,Gas
361642,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2020-02-28 07:20:00 UTC,heat,auto,723,730,730,OR,Klamath Falls,29,True,False,False,Gas
361643,69bce0cead73bc1724ae942dad09e3393da90e97,2020-02-10 15:40:00 UTC,heat,hold,681,745,690,OR,Klamath Falls,40,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/OR/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/OR-day/2021-feb-day-OR.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,114702ca6a8da3f42947dd25a6d56a0e60c41541,2021-02-02 18:40:00 UTC,heat,hold,668,671,671,OR,Central point,55,False,False,False,Gas
4,df98dfa78799eecb3c6e4ec08abd443231822f1b,2021-02-11 19:50:00 UTC,heat,hold,617,683,620,OR,Silverton,90,True,False,False,Gas
5,df98dfa78799eecb3c6e4ec08abd443231822f1b,2021-02-12 17:05:00 UTC,heat,hold,616,683,620,OR,Silverton,90,True,False,False,Gas
6,fbd0793fdbaad09b9358ba988d5d27c2a4ea9c0b,2021-02-04 19:00:00 UTC,auto,hold,660,779,680,OR,Newberg,60,True,False,True,Electric
7,114702ca6a8da3f42947dd25a6d56a0e60c41541,2021-02-05 19:30:00 UTC,heat,hold,655,671,671,OR,Central point,55,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
225312,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2021-02-10 07:20:00 UTC,heat,hold,715,720,720,OR,Klamath Falls,29,True,False,False,Gas
225313,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2021-02-18 17:10:00 UTC,heat,hold,708,720,720,OR,Klamath Falls,29,True,False,False,Gas
225314,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2021-02-04 07:15:00 UTC,heat,hold,728,730,730,OR,Klamath Falls,29,True,False,False,Gas
225315,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2021-02-05 07:00:00 UTC,heat,hold,723,730,730,OR,Klamath Falls,29,True,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/OR/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/OR/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
OR_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/OR/feb/" + file)
    OR_feb = pd.concat([OR_feb, df])
    
OR_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,03caad454a66062fc56b9128d6de2ca5ae4b7996,feb,2017,heat,hold,Portland,680.234848,670.000000,670.000000,5.0,False,False,False
1,05923e8afce99ca323843d899da2f0c48ba57cec,feb,2017,heat,hold,Happy Valley,667.117647,720.529412,700.941176,10.0,False,False,False
2,0624dc8fec23932f314f9e0f97a6c7fb059446b2,feb,2017,auto,auto,Portland,707.533235,782.225997,709.511078,0.0,False,False,False
3,0624dc8fec23932f314f9e0f97a6c7fb059446b2,feb,2017,auto,hold,Portland,717.454790,791.557744,722.641898,0.0,False,False,False
4,099d727a71f6df3adc1add16bab7358831990e2c,feb,2017,heat,hold,Portland,668.000000,780.000000,670.000000,100.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
490,fd6dd3503a19a2386a09f4b25e636ef7cfb34eb3,feb,2021,heat,hold,Albany,715.000000,730.000000,730.000000,19.0,False,False,False
491,fd8e322cdf93026aa75f2e65d3a8ffe6a4cdeb59,feb,2021,heat,hold,Roseburg,679.754266,680.795222,680.331058,20.0,False,False,True
492,fe918247d6bdbd347613dd68f15c85893bb512da,feb,2021,auto,hold,White City,686.166667,675.000000,615.000000,57.0,False,False,False
493,ff17c244fe42179fd802221bc9b6c177b3ad7d3e,feb,2021,heat,hold,Beaverton,695.854167,700.291667,699.854167,5.0,False,False,False


In [67]:
OR_feb.to_csv("Scraper_Output/State_Month_Day/OR/OR_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/OR-day/2017-jun-day-OR.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1c828c9306ff8ba3fcd6c4c1df5839adb9b68a33,2017-06-03 15:35:00 UTC,auto,hold,712,765,715,OR,Lebanon,47,False,False,False,Gas
1,1c828c9306ff8ba3fcd6c4c1df5839adb9b68a33,2017-06-03 13:30:00 UTC,auto,hold,728,765,715,OR,Lebanon,47,False,False,False,Gas
2,1c828c9306ff8ba3fcd6c4c1df5839adb9b68a33,2017-06-05 14:20:00 UTC,auto,hold,708,765,715,OR,Lebanon,47,False,False,False,Gas
3,1c828c9306ff8ba3fcd6c4c1df5839adb9b68a33,2017-06-02 17:05:00 UTC,auto,hold,715,765,715,OR,Lebanon,47,False,False,False,Gas
4,1c828c9306ff8ba3fcd6c4c1df5839adb9b68a33,2017-06-03 15:55:00 UTC,auto,hold,714,765,715,OR,Lebanon,47,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141742,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-06-21 14:35:00 UTC,cool,hold,683,740,740,OR,forest grove,0,False,False,False,Gas
141743,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-06-22 16:15:00 UTC,cool,hold,693,740,740,OR,forest grove,0,False,False,False,Gas
141744,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-06-22 18:00:00 UTC,cool,hold,694,740,740,OR,forest grove,0,False,False,False,Gas
141745,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-06-22 19:30:00 UTC,cool,hold,705,740,740,OR,forest grove,0,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/OR/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/OR-day/2018-jun-day-OR.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dd35868ee6a9a1ba55d65a6141fab45b29a4aac2,2018-06-04 18:20:00 UTC,auto,auto,683,699,669,OR,Cheshire,30,True,False,True,Electric
1,dd35868ee6a9a1ba55d65a6141fab45b29a4aac2,2018-06-10 15:00:00 UTC,auto,auto,660,699,669,OR,Cheshire,30,True,False,True,Electric
3,dd35868ee6a9a1ba55d65a6141fab45b29a4aac2,2018-06-08 18:35:00 UTC,auto,auto,681,699,669,OR,Cheshire,30,True,False,True,Electric
4,dd35868ee6a9a1ba55d65a6141fab45b29a4aac2,2018-06-04 15:10:00 UTC,auto,auto,673,699,669,OR,Cheshire,30,True,False,True,Electric
5,65a70dce251bb809254faa8d86961902e4719cc0,2018-06-08 19:10:00 UTC,auto,hold,685,756,701,OR,Ashland,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
241389,219fa6bfdeca2d936adab6a5d4394fb20284593f,2018-06-30 15:50:00 UTC,auto,hold,731,725,675,OR,Klamath Falls,17,True,False,False,Gas
241390,219fa6bfdeca2d936adab6a5d4394fb20284593f,2018-06-30 16:25:00 UTC,auto,hold,727,725,675,OR,Klamath Falls,17,True,False,False,Gas
241391,219fa6bfdeca2d936adab6a5d4394fb20284593f,2018-06-30 19:10:00 UTC,auto,auto,734,750,700,OR,Klamath Falls,17,True,False,False,Gas
241392,219fa6bfdeca2d936adab6a5d4394fb20284593f,2018-06-30 17:55:00 UTC,auto,auto,738,750,700,OR,Klamath Falls,17,True,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/OR/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/OR-day/2019-jun-day-OR.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e3af8fcbccf8a65ae71ec083381af39b3c7db7bd,2019-06-08 18:20:00 UTC,heat,hold,693,700,700,OR,Portland,0,True,False,True,Electric
1,32676105dcefc3518965de07308b21cea587bcd8,2019-06-23 14:55:00 UTC,heat,auto,671,700,660,OR,Portland,0,True,False,True,Electric
2,ec990339e6a982961acf69687c69ebff9cf7cc57,2019-06-22 16:10:00 UTC,heat,hold,697,700,700,OR,Portland,55,True,False,True,Electric
3,1753a4cacf76f48dd0cc631192a93019f35d7e14,2019-06-16 14:15:00 UTC,auto,hold,694,700,630,OR,Portland,20,True,False,False,Gas
4,3ed300d9998e3b10a7aec9227feee362f5e17d1c,2019-06-01 13:15:00 UTC,heat,auto,698,700,700,OR,Portland,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377790,219fa6bfdeca2d936adab6a5d4394fb20284593f,2019-06-27 19:00:00 UTC,auto,auto,727,760,700,OR,Klamath Falls,17,True,False,False,Gas
377791,219fa6bfdeca2d936adab6a5d4394fb20284593f,2019-06-27 18:25:00 UTC,auto,auto,726,760,700,OR,Klamath Falls,17,True,False,False,Gas
377792,219fa6bfdeca2d936adab6a5d4394fb20284593f,2019-06-08 16:55:00 UTC,auto,auto,709,760,710,OR,Klamath Falls,17,True,False,False,Gas
377793,219fa6bfdeca2d936adab6a5d4394fb20284593f,2019-06-08 17:15:00 UTC,auto,auto,707,760,710,OR,Klamath Falls,17,True,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/OR/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/OR-day/2020-jun-day-OR.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fbd0793fdbaad09b9358ba988d5d27c2a4ea9c0b,2020-06-19 16:50:00 UTC,auto,hold,703,743,662,OR,Newberg,60,True,False,True,Electric
1,114702ca6a8da3f42947dd25a6d56a0e60c41541,2020-06-16 19:00:00 UTC,heat,hold,657,671,671,OR,Central point,55,False,False,False,Gas
2,df98dfa78799eecb3c6e4ec08abd443231822f1b,2020-06-15 16:30:00 UTC,auto,hold,695,753,733,OR,Silverton,90,True,False,False,Gas
3,3ba502441ef84b6c417047c5cbfb88e7fa1776c2,2020-06-02 11:15:00 UTC,auto,hold,681,717,667,OR,Roseburg,90,False,False,False,Gas
4,6ad7aa34fc964bff771cd12f54fa040aa436b610,2020-06-24 16:35:00 UTC,cool,hold,763,810,790,OR,Rogue River,50,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
374921,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2020-06-20 17:50:00 UTC,cool,hold,742,730,730,OR,Klamath Falls,29,True,False,False,Gas
374922,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2020-06-20 17:15:00 UTC,cool,hold,750,730,730,OR,Klamath Falls,29,True,False,False,Gas
374923,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2020-06-20 17:35:00 UTC,cool,hold,746,730,730,OR,Klamath Falls,29,True,False,False,Gas
374924,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2020-06-02 16:45:00 UTC,cool,hold,725,730,730,OR,Klamath Falls,29,True,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/OR/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/OR-day/2021-jun-day-OR.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,8f0cff68721f61623e70c5ab7f16efaea52c1581,2021-06-11 19:20:00 UTC,heat,hold,706,680,680,OR,Elmira,75,False,False,False,Gas
2,8f0cff68721f61623e70c5ab7f16efaea52c1581,2021-06-06 19:50:00 UTC,heat,hold,690,680,680,OR,Elmira,75,False,False,False,Gas
3,8f0cff68721f61623e70c5ab7f16efaea52c1581,2021-06-06 19:15:00 UTC,heat,hold,686,680,680,OR,Elmira,75,False,False,False,Gas
4,0d0cf9141a32e41f86fe6fd7539c44bdfb535d74,2021-06-24 16:00:00 UTC,heat,hold,686,690,690,OR,Coos Bay,75,True,False,True,Electric
5,0d0cf9141a32e41f86fe6fd7539c44bdfb535d74,2021-06-05 17:55:00 UTC,heat,hold,694,690,690,OR,Coos Bay,75,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235560,e8cd29c205dfbd5aa461efb3de976b243a38835f,2021-06-05 16:40:00 UTC,auto,hold,692,760,650,OR,Gladstone,100,False,False,False,Gas
235561,e8cd29c205dfbd5aa461efb3de976b243a38835f,2021-06-05 15:45:00 UTC,auto,hold,691,760,650,OR,Gladstone,100,False,False,False,Gas
235562,e8cd29c205dfbd5aa461efb3de976b243a38835f,2021-06-06 17:40:00 UTC,auto,hold,644,760,650,OR,Gladstone,100,False,False,False,Gas
235563,e8cd29c205dfbd5aa461efb3de976b243a38835f,2021-06-05 14:45:00 UTC,auto,hold,693,760,650,OR,Gladstone,100,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/OR/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/OR/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
OR_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/OR/jun/" + file)
    OR_jun = pd.concat([OR_jun, df])
    
OR_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,006b3b1d03de005725492594b7733c3ef67d7dfe,jun,2017,auto,auto,Oregon City,714.644068,748.254237,694.186441,20.0,False,False,False
1,00b089459452edc92a0469d6ea9d74767924f3f5,jun,2017,cool,hold,Beaverton,669.036893,761.382524,761.079612,25.0,True,False,False
2,03caad454a66062fc56b9128d6de2ca5ae4b7996,jun,2017,auto,auto,Portland,727.666667,750.000000,645.000000,5.0,False,False,False
3,03caad454a66062fc56b9128d6de2ca5ae4b7996,jun,2017,auto,hold,Portland,747.333333,760.000000,670.000000,5.0,False,False,False
4,052a8daf4686b3a7f4a807ef3239f57490eb40d2,jun,2017,cool,hold,Forest Grove,715.320513,730.000000,730.000000,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
726,fd8e322cdf93026aa75f2e65d3a8ffe6a4cdeb59,jun,2021,cool,hold,Roseburg,761.627907,756.250000,756.366279,20.0,False,False,True
727,fe1baa9f35e9f8ec94a0cd7e1e93b43435a7ee8a,jun,2021,cool,hold,Albany,670.222222,773.777778,773.777778,120.0,False,False,False
728,fe918247d6bdbd347613dd68f15c85893bb512da,jun,2021,cool,hold,White City,691.432836,679.552239,679.059701,57.0,False,False,False
729,ff17c244fe42179fd802221bc9b6c177b3ad7d3e,jun,2021,cool,hold,Beaverton,712.991935,708.838710,708.766129,5.0,False,False,False


In [100]:
OR_jun.to_csv("Scraper_Output/State_Month_Day/OR/OR_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/OR-day/2017-jul-day-OR.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,368592eef613ea59348ff70f972c088ff48bd92f,2017-07-06 18:05:00 UTC,auto,hold,706,709,659,OR,Aloha,40,False,False,False,Gas
1,9248bc8a2f1e92e98edb4633dca150eca8f07d6f,2017-07-22 17:40:00 UTC,cool,hold,731,810,740,OR,Corvallis,40,True,False,False,Gas
2,9248bc8a2f1e92e98edb4633dca150eca8f07d6f,2017-07-22 16:50:00 UTC,cool,hold,727,810,740,OR,Corvallis,40,True,False,False,Gas
3,6ad7aa34fc964bff771cd12f54fa040aa436b610,2017-07-05 17:05:00 UTC,cool,hold,766,810,790,OR,Rogue River,50,False,False,True,Electric
4,9248bc8a2f1e92e98edb4633dca150eca8f07d6f,2017-07-22 17:00:00 UTC,cool,hold,727,810,740,OR,Corvallis,40,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
164655,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-07-21 18:45:00 UTC,cool,hold,704,750,750,OR,forest grove,0,False,False,False,Gas
164656,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-07-20 19:50:00 UTC,cool,hold,708,750,750,OR,forest grove,0,False,False,False,Gas
164657,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-07-01 17:40:00 UTC,cool,hold,701,750,750,OR,forest grove,0,False,False,False,Gas
164658,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-07-13 17:50:00 UTC,cool,hold,722,760,760,OR,forest grove,0,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/OR/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/OR-day/2018-jul-day-OR.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,65a70dce251bb809254faa8d86961902e4719cc0,2018-07-17 16:10:00 UTC,auto,hold,659,665,605,OR,Ashland,0,True,False,False,Gas
1,65a70dce251bb809254faa8d86961902e4719cc0,2018-07-16 17:05:00 UTC,auto,hold,666,665,615,OR,Ashland,0,True,False,False,Gas
2,65a70dce251bb809254faa8d86961902e4719cc0,2018-07-17 18:50:00 UTC,auto,hold,662,665,605,OR,Ashland,0,True,False,False,Gas
3,d32546856f82383cac4ce8007c048507ca518c28,2018-07-14 18:40:00 UTC,auto,hold,731,795,615,OR,oak grove,50,False,False,False,Gas
4,65a70dce251bb809254faa8d86961902e4719cc0,2018-07-17 17:40:00 UTC,auto,hold,669,665,605,OR,Ashland,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
322821,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-07-28 18:50:00 UTC,auto,hold,743,760,710,OR,Klamath Falls,29,True,False,False,Gas
322822,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-07-28 18:25:00 UTC,auto,hold,741,760,710,OR,Klamath Falls,29,True,False,False,Gas
322823,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-07-28 17:45:00 UTC,auto,hold,730,760,710,OR,Klamath Falls,29,True,False,False,Gas
322824,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-07-28 17:35:00 UTC,auto,hold,728,760,710,OR,Klamath Falls,29,True,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/OR/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/OR-day/2019-jul-day-OR.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,df98dfa78799eecb3c6e4ec08abd443231822f1b,2019-07-31 19:10:00 UTC,cool,hold,742,743,743,OR,Silverton,90,True,False,False,Gas
1,8ad786ec06c05d4ae243023be9b45144a11db7c4,2019-07-24 14:20:00 UTC,cool,auto,635,630,630,OR,Canyon City,10,False,False,False,Gas
2,876b9dfe5bf0540d52e634786e681978524b51a1,2019-07-21 19:15:00 UTC,cool,hold,696,784,747,OR,McMinnville,10,True,False,True,Electric
3,3ba502441ef84b6c417047c5cbfb88e7fa1776c2,2019-07-17 14:35:00 UTC,auto,hold,710,738,688,OR,Roseburg,90,False,False,False,Gas
4,876b9dfe5bf0540d52e634786e681978524b51a1,2019-07-16 18:40:00 UTC,cool,hold,693,784,747,OR,McMinnville,10,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
409558,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-07-13 18:30:00 UTC,auto,hold,753,750,680,OR,Klamath Falls,29,True,False,False,Gas
409559,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-07-15 18:00:00 UTC,auto,hold,747,750,680,OR,Klamath Falls,29,True,False,False,Gas
409560,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-07-12 18:15:00 UTC,auto,auto,752,750,680,OR,Klamath Falls,29,True,False,False,Gas
409561,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-07-13 17:35:00 UTC,auto,hold,761,750,680,OR,Klamath Falls,29,True,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/OR/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/OR-day/2020-jul-day-OR.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c4640a78e89b6b196e0f0b8b91cbe978b502f0d7,2020-07-15 18:30:00 UTC,cool,auto,628,620,620,OR,Eagle Point,16,False,False,False,Gas
1,0f6950236a3fc5e05032d841d5e3dd47e144ef02,2020-07-20 14:05:00 UTC,auto,hold,747,761,635,OR,PORTLAND,7,False,False,True,Electric
2,c4640a78e89b6b196e0f0b8b91cbe978b502f0d7,2020-07-08 17:55:00 UTC,cool,auto,634,620,620,OR,Eagle Point,16,False,False,False,Gas
3,114702ca6a8da3f42947dd25a6d56a0e60c41541,2020-07-23 19:50:00 UTC,cool,hold,722,721,702,OR,Central point,55,False,False,False,Gas
4,c4640a78e89b6b196e0f0b8b91cbe978b502f0d7,2020-07-01 18:05:00 UTC,cool,auto,624,620,620,OR,Eagle Point,16,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
382493,a84fd14a4996be166c9b90851342465523897e91,2020-07-06 17:30:00 UTC,heat,hold,698,670,670,OR,Portland,116,False,False,False,Gas
382494,a84fd14a4996be166c9b90851342465523897e91,2020-07-12 17:25:00 UTC,heat,hold,697,670,670,OR,Portland,116,False,False,False,Gas
382495,a84fd14a4996be166c9b90851342465523897e91,2020-07-25 19:55:00 UTC,heat,hold,691,670,670,OR,Portland,116,False,False,False,Gas
382496,a84fd14a4996be166c9b90851342465523897e91,2020-07-24 16:20:00 UTC,heat,hold,690,670,670,OR,Portland,116,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/OR/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/OR-day/2021-jul-day-OR.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fbd0793fdbaad09b9358ba988d5d27c2a4ea9c0b,2021-07-28 18:20:00 UTC,cool,hold,720,734,734,OR,Newberg,60,True,False,True,Electric
1,df98dfa78799eecb3c6e4ec08abd443231822f1b,2021-07-28 16:45:00 UTC,cool,hold,757,755,753,OR,Silverton,90,True,False,False,Gas
2,fbd0793fdbaad09b9358ba988d5d27c2a4ea9c0b,2021-07-24 18:25:00 UTC,cool,hold,717,716,716,OR,Newberg,60,True,False,True,Electric
3,b9218cc02444b2c134490e36cc878268181619ce,2021-07-26 19:35:00 UTC,auto,hold,675,675,625,OR,North Plains,0,False,False,True,Electric
4,fbd0793fdbaad09b9358ba988d5d27c2a4ea9c0b,2021-07-28 19:00:00 UTC,cool,hold,726,734,734,OR,Newberg,60,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199983,fa59ddbc4f617580e56d48d932fead97b7d8c83d,2021-07-27 14:50:00 UTC,cool,hold,738,740,740,OR,Hillsboro,20,False,False,False,Gas
199984,fa59ddbc4f617580e56d48d932fead97b7d8c83d,2021-07-27 16:15:00 UTC,cool,hold,737,740,740,OR,Hillsboro,20,False,False,False,Gas
199985,fa59ddbc4f617580e56d48d932fead97b7d8c83d,2021-07-28 15:30:00 UTC,cool,hold,728,740,740,OR,Hillsboro,20,False,False,False,Gas
199986,fa59ddbc4f617580e56d48d932fead97b7d8c83d,2021-07-05 19:25:00 UTC,cool,hold,727,745,745,OR,Hillsboro,20,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/OR/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/OR/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
OR_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/OR/jul/" + file)
    OR_jul = pd.concat([OR_jul, df])
    
OR_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,006b3b1d03de005725492594b7733c3ef67d7dfe,jul,2017,auto,auto,Oregon City,724.785714,720.000000,652.857143,20.0,False,False,False
1,00b089459452edc92a0469d6ea9d74767924f3f5,jul,2017,cool,hold,Beaverton,697.667536,773.134289,772.559322,25.0,True,False,False
2,03caad454a66062fc56b9128d6de2ca5ae4b7996,jul,2017,auto,auto,Portland,749.915663,760.337349,651.084337,5.0,False,False,False
3,03caad454a66062fc56b9128d6de2ca5ae4b7996,jul,2017,auto,hold,Portland,760.350000,766.087500,640.000000,5.0,False,False,False
4,04fac6711e3ad53990eebc8a7d198052deb2a9b1,jul,2017,cool,auto,Hood River,741.380952,740.751323,738.608466,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
436,fd6dd3503a19a2386a09f4b25e636ef7cfb34eb3,jul,2021,cool,hold,Albany,724.153846,717.000000,717.000000,19.0,False,False,False
437,fd8e322cdf93026aa75f2e65d3a8ffe6a4cdeb59,jul,2021,cool,hold,Roseburg,741.009009,745.033033,745.057057,20.0,False,False,True
438,ff17c244fe42179fd802221bc9b6c177b3ad7d3e,jul,2021,cool,hold,Beaverton,701.576923,709.990385,709.980769,5.0,False,False,False
439,ff38b7d2e7a775a99129867043a265bf5cbb48b2,jul,2021,auto,hold,Gresham,716.365385,736.346154,696.346154,20.0,True,False,False


In [133]:
OR_jul.to_csv("Scraper_Output/State_Month_Day/OR/OR_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/OR-day/2017-aug-day-OR.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6c112fdef3cc591bfc328e7b5f040668e08165ca,2017-08-21 16:35:00 UTC,auto,hold,776,772,752,OR,North Plains,0,True,False,False,Gas
1,6c112fdef3cc591bfc328e7b5f040668e08165ca,2017-08-24 17:20:00 UTC,auto,auto,783,806,644,OR,North Plains,0,True,False,False,Gas
3,6c112fdef3cc591bfc328e7b5f040668e08165ca,2017-08-24 16:50:00 UTC,auto,auto,783,806,644,OR,North Plains,0,True,False,False,Gas
4,ea5eb651c8e70d9cccebe4bb01a228a12f1079da,2017-08-06 19:10:00 UTC,cool,hold,745,758,758,OR,Hood River,0,False,False,False,Gas
5,6c112fdef3cc591bfc328e7b5f040668e08165ca,2017-08-21 17:50:00 UTC,auto,hold,775,772,752,OR,North Plains,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173493,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-08-22 18:20:00 UTC,cool,hold,717,750,750,OR,forest grove,0,False,False,False,Gas
173494,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-08-22 16:30:00 UTC,cool,hold,713,750,750,OR,forest grove,0,False,False,False,Gas
173495,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-08-20 19:45:00 UTC,cool,hold,695,760,760,OR,forest grove,0,False,False,False,Gas
173496,a6c9df0eb0fa81661baef9d1a67507a9837690a5,2017-08-10 19:55:00 UTC,cool,hold,734,760,760,OR,forest grove,0,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/OR/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/OR-day/2018-aug-day-OR.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a41d1655a42dc8d83a638379cc627a6c3107fa58,2018-08-07 13:30:00 UTC,cool,hold,719,762,762,OR,Sunriver,30,False,False,False,Gas
1,d32546856f82383cac4ce8007c048507ca518c28,2018-08-19 19:15:00 UTC,auto,hold,736,755,705,OR,oak grove,50,False,False,False,Gas
2,a41d1655a42dc8d83a638379cc627a6c3107fa58,2018-08-13 19:30:00 UTC,cool,hold,707,782,782,OR,Sunriver,30,False,False,False,Gas
3,8b80ed37aa10fdf227ad6fb3601047d49d0e049f,2018-08-01 16:35:00 UTC,auto,hold,714,767,708,OR,Stayton,10,True,False,True,Electric
4,a41d1655a42dc8d83a638379cc627a6c3107fa58,2018-08-09 17:05:00 UTC,cool,hold,689,762,762,OR,Sunriver,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
322870,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-08-15 16:05:00 UTC,auto,hold,724,760,710,OR,Klamath Falls,29,True,False,False,Gas
322871,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-08-15 17:45:00 UTC,auto,hold,737,760,710,OR,Klamath Falls,29,True,False,False,Gas
322872,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-08-15 18:15:00 UTC,auto,hold,738,760,710,OR,Klamath Falls,29,True,False,False,Gas
322873,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-08-16 17:30:00 UTC,auto,hold,754,760,710,OR,Klamath Falls,29,True,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/OR/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/OR-day/2019-aug-day-OR.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8ad786ec06c05d4ae243023be9b45144a11db7c4,2019-08-04 18:00:00 UTC,cool,hold,628,620,620,OR,Canyon City,10,False,False,False,Gas
1,2300fb4ee7123dad86ce6ac6318cc9156338d0c5,2019-08-06 16:05:00 UTC,auto,hold,761,810,620,OR,PORTLAND,40,True,False,True,Electric
2,8ad786ec06c05d4ae243023be9b45144a11db7c4,2019-08-10 15:35:00 UTC,cool,auto,626,620,620,OR,Canyon City,10,False,False,False,Gas
3,2bc2a0b579a3741fc70ee49b2c34c5849de49bde,2019-08-02 17:20:00 UTC,auto,hold,698,692,642,OR,Stayton,20,False,False,True,Electric
4,9bd7ce0e761eabf64adc070831d3afa3244f32e6,2019-08-16 18:20:00 UTC,auto,hold,768,775,685,OR,Shady Cove,9,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
419484,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-08-13 18:15:00 UTC,auto,hold,764,760,680,OR,Klamath Falls,29,True,False,False,Gas
419485,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-08-30 17:20:00 UTC,auto,hold,764,760,680,OR,Klamath Falls,29,True,False,False,Gas
419486,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-08-30 19:50:00 UTC,auto,hold,763,760,680,OR,Klamath Falls,29,True,False,False,Gas
419487,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-08-13 16:40:00 UTC,auto,hold,748,760,680,OR,Klamath Falls,29,True,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/OR/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/OR-day/2020-aug-day-OR.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,0f6950236a3fc5e05032d841d5e3dd47e144ef02,2020-08-24 16:50:00 UTC,auto,hold,742,752,635,OR,PORTLAND,7,False,False,True,Electric
3,114702ca6a8da3f42947dd25a6d56a0e60c41541,2020-08-05 17:50:00 UTC,cool,hold,737,731,702,OR,Central point,55,False,False,False,Gas
4,74c4cb80267ba7e9bca86d95b6e46b07a55940c9,2020-08-14 19:55:00 UTC,cool,hold,723,722,722,OR,Springfield,19,True,False,False,Gas
6,114702ca6a8da3f42947dd25a6d56a0e60c41541,2020-08-30 19:55:00 UTC,cool,hold,722,721,702,OR,Central point,55,False,False,False,Gas
7,0f6950236a3fc5e05032d841d5e3dd47e144ef02,2020-08-24 15:20:00 UTC,auto,hold,737,752,635,OR,PORTLAND,7,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377433,e8cd29c205dfbd5aa461efb3de976b243a38835f,2020-08-21 16:10:00 UTC,auto,hold,734,735,665,OR,Gladstone,100,False,False,False,Gas
377434,e8cd29c205dfbd5aa461efb3de976b243a38835f,2020-08-30 19:55:00 UTC,auto,hold,685,735,665,OR,Gladstone,100,False,False,False,Gas
377435,e8cd29c205dfbd5aa461efb3de976b243a38835f,2020-08-30 18:05:00 UTC,auto,hold,678,735,665,OR,Gladstone,100,False,False,False,Gas
377436,e8cd29c205dfbd5aa461efb3de976b243a38835f,2020-08-30 16:00:00 UTC,auto,hold,685,745,685,OR,Gladstone,100,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/OR/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/OR/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
OR_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/OR/aug/" + file)
    OR_aug = pd.concat([OR_aug, df])
    
OR_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,006b3b1d03de005725492594b7733c3ef67d7dfe,aug,2017,auto,auto,Oregon City,730.163462,742.846154,678.269231,20.0,False,False,False
1,00b089459452edc92a0469d6ea9d74767924f3f5,aug,2017,cool,hold,Beaverton,715.278226,774.619624,773.611559,25.0,True,False,False
2,052a8daf4686b3a7f4a807ef3239f57490eb40d2,aug,2017,auto,hold,Forest Grove,726.020833,735.000000,665.000000,10.0,False,False,False
3,0624dc8fec23932f314f9e0f97a6c7fb059446b2,aug,2017,auto,auto,Portland,710.452656,737.496536,616.678984,0.0,False,False,False
4,0624dc8fec23932f314f9e0f97a6c7fb059446b2,aug,2017,auto,hold,Portland,717.414830,758.788243,663.389446,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
821,ff17c244fe42179fd802221bc9b6c177b3ad7d3e,aug,2020,cool,auto,Beaverton,723.323529,720.264706,718.929412,5.0,False,False,False
822,ff17c244fe42179fd802221bc9b6c177b3ad7d3e,aug,2020,cool,hold,Beaverton,733.481481,716.296296,716.296296,5.0,False,False,False
823,ff38b7d2e7a775a99129867043a265bf5cbb48b2,aug,2020,auto,hold,Gresham,727.075301,744.662651,712.975904,20.0,True,False,False
824,ff38b7d2e7a775a99129867043a265bf5cbb48b2,aug,2020,cool,auto,Gresham,732.364583,740.000000,755.000000,20.0,True,False,False


In [160]:
OR_aug.to_csv("Scraper_Output/State_Month_Day/OR/OR_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/OR-day/2017-dec-day-OR.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cfeeb226f8297cddd35bf2408bdb7bd4071160d8,2017-12-13 15:45:00 UTC,heat,auto,652,799,651,OR,Dundee,47,False,False,False,Gas
2,97d0f5da7db1e75514930e823feaf6c84421dce2,2017-12-24 17:00:00 UTC,heat,hold,737,761,726,OR,Portland,20,False,False,False,Gas
4,cd71171c504caf17e5e4786c93311b9ff08bdf66,2017-12-15 19:05:00 UTC,heat,auto,705,736,702,OR,Scappoose,27,False,False,False,Gas
10,dccaf065befac19f089838bd56e3d6ca78f374c6,2017-12-11 16:10:00 UTC,heat,auto,675,681,681,OR,corvallis,5,False,False,False,Gas
11,1c927546884d866d0498c730035be2e4464967b2,2017-12-22 19:10:00 UTC,heat,auto,691,687,687,OR,Lincoln City,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237622,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2017-12-02 19:05:00 UTC,heat,hold,709,710,710,OR,Klamath Falls,29,True,False,False,Gas
237623,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2017-12-02 19:25:00 UTC,heat,hold,704,710,710,OR,Klamath Falls,29,True,False,False,Gas
237624,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2017-12-12 15:45:00 UTC,heat,hold,698,710,710,OR,Klamath Falls,29,True,False,False,Gas
237625,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2017-12-02 19:20:00 UTC,heat,hold,706,710,710,OR,Klamath Falls,29,True,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/OR/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/OR-day/2018-dec-day-OR.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7474bc9ac147902665559a735c4b7820195e2d9d,2018-12-08 13:10:00 UTC,heat,auto,687,716,685,OR,King City,0,False,False,False,Gas
1,d0fd4191a6b97277a0e5ad601efac94c0b9b3bb6,2018-12-24 19:50:00 UTC,auto,auto,778,840,650,OR,Elkton,40,True,False,True,Electric
2,6c112fdef3cc591bfc328e7b5f040668e08165ca,2018-12-12 19:20:00 UTC,heat,hold,755,761,761,OR,North Plains,0,True,False,False,Gas
3,dc462408624c5f44e41d050ae70ca0bd895f78d0,2018-12-24 15:00:00 UTC,heat,auto,614,630,630,OR,Redmond,0,False,False,False,Gas
5,d667da3526d79b5b1165e06da5ea6013ccfb3f13,2018-12-06 19:50:00 UTC,heat,hold,687,722,722,OR,Saint Helens,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
350185,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-12-02 07:15:00 UTC,heat,hold,705,710,710,OR,Klamath Falls,29,True,False,False,Gas
350186,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2018-12-01 07:15:00 UTC,heat,hold,716,720,720,OR,Klamath Falls,29,True,False,False,Gas
350187,69bce0cead73bc1724ae942dad09e3393da90e97,2018-12-08 17:40:00 UTC,heat,hold,685,725,685,OR,Klamath Falls,40,False,False,False,Gas
350188,69bce0cead73bc1724ae942dad09e3393da90e97,2018-12-09 17:10:00 UTC,heat,hold,685,725,685,OR,Klamath Falls,40,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/OR/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/OR-day/2019-dec-day-OR.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,876b9dfe5bf0540d52e634786e681978524b51a1,2019-12-31 15:30:00 UTC,heat,hold,661,714,670,OR,McMinnville,10,True,False,True,Electric
4,d0fd4191a6b97277a0e5ad601efac94c0b9b3bb6,2019-12-14 19:50:00 UTC,auto,hold,652,840,650,OR,Elkton,40,True,False,True,Electric
5,3ba502441ef84b6c417047c5cbfb88e7fa1776c2,2019-12-12 13:45:00 UTC,auto,hold,693,755,705,OR,Roseburg,90,False,False,False,Gas
6,3ba502441ef84b6c417047c5cbfb88e7fa1776c2,2019-12-03 14:30:00 UTC,auto,hold,698,765,705,OR,Roseburg,90,False,False,False,Gas
7,53a8afb9c2869da7d84a186b864ea3c571c2646c,2019-12-18 07:25:00 UTC,heat,hold,670,679,679,OR,Sheridan,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
424212,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-12-25 07:05:00 UTC,heat,hold,712,720,720,OR,Klamath Falls,29,True,False,False,Gas
424213,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-12-04 07:05:00 UTC,heat,hold,713,720,720,OR,Klamath Falls,29,True,False,False,Gas
424214,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-12-25 19:55:00 UTC,heat,hold,717,720,720,OR,Klamath Falls,29,True,False,False,Gas
424215,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2019-12-25 19:25:00 UTC,heat,hold,719,720,720,OR,Klamath Falls,29,True,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/OR/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/OR-day/2020-dec-day-OR.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,114702ca6a8da3f42947dd25a6d56a0e60c41541,2020-12-18 17:45:00 UTC,heat,hold,674,681,681,OR,Central point,55,False,False,False,Gas
1,0f6950236a3fc5e05032d841d5e3dd47e144ef02,2020-12-24 18:15:00 UTC,auto,hold,698,788,698,OR,PORTLAND,7,False,False,True,Electric
2,876b9dfe5bf0540d52e634786e681978524b51a1,2020-12-12 16:20:00 UTC,heat,auto,628,714,630,OR,McMinnville,10,True,False,True,Electric
3,114702ca6a8da3f42947dd25a6d56a0e60c41541,2020-12-09 19:05:00 UTC,heat,hold,666,671,671,OR,Central point,55,False,False,False,Gas
4,876b9dfe5bf0540d52e634786e681978524b51a1,2020-12-10 16:10:00 UTC,heat,hold,645,714,690,OR,McMinnville,10,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
383056,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2020-12-08 18:45:00 UTC,heat,hold,716,720,720,OR,Klamath Falls,29,True,False,False,Gas
383057,7fa4b9bccf19f52cac8c9a629651366fd8570a89,2020-12-13 19:35:00 UTC,heat,hold,699,720,720,OR,Klamath Falls,29,True,False,False,Gas
383058,69bce0cead73bc1724ae942dad09e3393da90e97,2020-12-27 18:30:00 UTC,heat,hold,683,725,690,OR,Klamath Falls,40,False,False,False,Gas
383059,69bce0cead73bc1724ae942dad09e3393da90e97,2020-12-27 18:40:00 UTC,heat,hold,690,725,690,OR,Klamath Falls,40,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/OR/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/OR/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
OR_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/OR/dec/" + file)
    OR_dec = pd.concat([OR_dec, df])
    
OR_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,006b3b1d03de005725492594b7733c3ef67d7dfe,dec,2017,heat,auto,Oregon City,716.652778,757.222222,723.194444,20.0,False,False,False
1,00bc4f328673432b1fd2fff05c6a88a846f61605,dec,2017,heat,auto,Portland,656.181818,669.090909,668.590909,20.0,False,False,False
2,00bc4f328673432b1fd2fff05c6a88a846f61605,dec,2017,heat,hold,Portland,646.182160,658.383099,652.127700,20.0,False,False,False
3,03caad454a66062fc56b9128d6de2ca5ae4b7996,dec,2017,heat,auto,Portland,691.159533,691.027237,690.513619,5.0,False,False,False
4,03caad454a66062fc56b9128d6de2ca5ae4b7996,dec,2017,heat,hold,Portland,688.194444,690.000000,690.000000,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
908,fe918247d6bdbd347613dd68f15c85893bb512da,dec,2020,heat,auto,White City,704.694444,745.277778,693.888889,57.0,False,False,False
909,fe918247d6bdbd347613dd68f15c85893bb512da,dec,2020,heat,hold,White City,714.272727,698.181818,698.181818,57.0,False,False,False
910,ff17c244fe42179fd802221bc9b6c177b3ad7d3e,dec,2020,heat,auto,Beaverton,710.416667,740.000000,719.750000,5.0,False,False,False
911,ff17c244fe42179fd802221bc9b6c177b3ad7d3e,dec,2020,heat,hold,Beaverton,701.941176,708.358289,708.181818,5.0,False,False,False


In [187]:
OR_dec.to_csv("Scraper_Output/State_Month_Day/OR/OR_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/OR/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
OR_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/OR/" + file)
    OR_all = pd.concat([OR_all, df])
    
OR_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,006b3b1d03de005725492594b7733c3ef67d7dfe,aug,2017,auto,auto,Oregon City,730.163462,742.846154,678.269231,20.0,False,False,False
1,00b089459452edc92a0469d6ea9d74767924f3f5,aug,2017,cool,hold,Beaverton,715.278226,774.619624,773.611559,25.0,True,False,False
2,052a8daf4686b3a7f4a807ef3239f57490eb40d2,aug,2017,auto,hold,Forest Grove,726.020833,735.000000,665.000000,10.0,False,False,False
3,0624dc8fec23932f314f9e0f97a6c7fb059446b2,aug,2017,auto,auto,Portland,710.452656,737.496536,616.678984,0.0,False,False,False
4,0624dc8fec23932f314f9e0f97a6c7fb059446b2,aug,2017,auto,hold,Portland,717.414830,758.788243,663.389446,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3876,fd8e322cdf93026aa75f2e65d3a8ffe6a4cdeb59,jun,2021,cool,hold,Roseburg,761.627907,756.250000,756.366279,20.0,False,False,True
3877,fe1baa9f35e9f8ec94a0cd7e1e93b43435a7ee8a,jun,2021,cool,hold,Albany,670.222222,773.777778,773.777778,120.0,False,False,False
3878,fe918247d6bdbd347613dd68f15c85893bb512da,jun,2021,cool,hold,White City,691.432836,679.552239,679.059701,57.0,False,False,False
3879,ff17c244fe42179fd802221bc9b6c177b3ad7d3e,jun,2021,cool,hold,Beaverton,712.991935,708.838710,708.766129,5.0,False,False,False


In [190]:
OR_all.to_csv("Scraper_Output/State_Month_Day/OR_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mORe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['OR']
Unique jan_2018: ['OR']
Unique jan_2019: ['OR']
Unique jan_2020: ['OR']
Unique jan_2021: ['OR']
Unique feb_2017: ['OR']
Unique feb_2018: ['OR']
Unique feb_2019: ['OR']
Unique feb_2020: ['OR']
Unique feb_2021: ['OR']
Unique jun_2017: ['OR']
Unique jun_2018: ['OR']
Unique jun_2019: ['OR']
Unique jun_2020: ['OR']
Unique jun_2021: ['OR']
Unique jul_2017: ['OR']
Unique jul_2018: ['OR']
Unique jul_2019: ['OR']
Unique jul_2020: ['OR']
Unique jul_2021: ['OR']
Unique aug_2017: ['OR']
Unique aug_2018: ['OR']
Unique aug_2019: ['OR']
Unique aug_2020: ['OR']
Unique dec_2017: ['OR']
Unique dec_2018: ['OR']
Unique dec_2019: ['OR']
Unique dec_2020: ['OR']
