# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/WY-day/2017-jan-day-WY.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-01-07T16:45:00Z,heat,hold,681,660,660,WY,Rock Springs,25,False,False,False,Gas
1,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-01-07T17:10:00Z,heat,hold,649,660,660,WY,Rock Springs,25,False,False,False,Gas
2,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-01-07T16:35:00Z,heat,hold,715,660,660,WY,Rock Springs,25,False,False,False,Gas
3,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-01-07T16:55:00Z,heat,hold,666,660,660,WY,Rock Springs,25,False,False,False,Gas
4,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-01-07T16:40:00Z,heat,hold,689,660,660,WY,Rock Springs,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5033,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-01-08T13:05:00Z,heat,hold,693,700,700,WY,Rock Springs,25,False,False,False,Gas
5034,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-01-15T15:40:00Z,heat,hold,703,700,700,WY,Rock Springs,25,False,False,False,Gas
5035,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-01-15T18:30:00Z,heat,hold,693,700,700,WY,Rock Springs,25,False,False,False,Gas
5036,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-01-08T13:45:00Z,heat,hold,832,700,700,WY,Rock Springs,25,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
16913980a67e1faa9fb0fe491f8d36b4477f965a,Jan,2017,heat,auto,Cheyenne,653.80625,656.375,655.01875,10.0,False,False,False
1af93c740d9d394407a734476ba8bcaf41c2f2dd,Jan,2017,heat,auto,Casper,634.134986,652.181818,639.917355,30.0,True,False,False
1af93c740d9d394407a734476ba8bcaf41c2f2dd,Jan,2017,heat,hold,Casper,633.070886,650.293671,631.321519,30.0,True,False,False
31af9764717cc87207aca52dabacb0bb1e2b8b95,Jan,2017,heat,auto,Gillette,678.065768,740.769272,675.767116,35.0,False,False,False
31af9764717cc87207aca52dabacb0bb1e2b8b95,Jan,2017,heat,hold,Gillette,705.9,674.333333,656.8,35.0,False,False,False
44788b127929a7bbc4a79d2b17a215fd5460cdc1,Jan,2017,heat,auto,Cheyenne,728.082192,741.657534,740.958904,15.0,False,False,False
44788b127929a7bbc4a79d2b17a215fd5460cdc1,Jan,2017,heat,hold,Cheyenne,735.53125,741.0,741.0,15.0,False,False,False
7ab7401981ce316104ff89f1cea82a92a571929d,Jan,2017,heat,auto,Gillette,669.732919,670.186335,670.074534,40.0,False,False,False
7ab7401981ce316104ff89f1cea82a92a571929d,Jan,2017,heat,hold,Gillette,669.972477,671.12844,669.825688,40.0,False,False,False
9318c54b085c228db7076172efce878b8d324baa,Jan,2017,heat,auto,Wyarno,676.010309,679.082474,679.082474,15.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/WY/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/WY-day/2018-jan-day-WY.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,336df4fa2ebfb1b7a1bc23dd117037a5955effa8,2018-01-27T13:50:00Z,heat,auto,609,660,660,WY,Evanston,0,False,False,False,Gas
1,336df4fa2ebfb1b7a1bc23dd117037a5955effa8,2018-01-02T13:30:00Z,heat,auto,633,660,660,WY,Evanston,0,False,False,False,Gas
2,336df4fa2ebfb1b7a1bc23dd117037a5955effa8,2018-01-26T15:20:00Z,heat,auto,656,660,660,WY,Evanston,0,False,False,False,Gas
3,336df4fa2ebfb1b7a1bc23dd117037a5955effa8,2018-01-26T16:15:00Z,heat,auto,660,660,660,WY,Evanston,0,False,False,False,Gas
4,759b41e328c1a31c0c37d8ee8ac5f45fdd644a17,2018-01-07T17:35:00Z,heat,hold,687,660,660,WY,Casper,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24520,5c3385a5b08903ef32083aacfc000fe752b78fdc,2018-01-16T14:20:00Z,heat,auto,701,710,710,WY,Gillette,10,False,False,False,Gas
24521,5c3385a5b08903ef32083aacfc000fe752b78fdc,2018-01-16T15:00:00Z,heat,auto,709,710,710,WY,Gillette,10,False,False,False,Gas
24522,5c3385a5b08903ef32083aacfc000fe752b78fdc,2018-01-13T17:50:00Z,heat,auto,710,710,710,WY,Gillette,10,False,False,False,Gas
24523,5c3385a5b08903ef32083aacfc000fe752b78fdc,2018-01-16T14:05:00Z,heat,auto,702,710,710,WY,Gillette,10,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/WY/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/WY-day/2019-jan-day-WY.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5ec6580adf42b467f21b502112c59efb3530f1cc,2019-01-02T13:55:00Z,heat,hold,688,745,660,WY,Cheyenne,8,False,False,False,Gas
1,5ec6580adf42b467f21b502112c59efb3530f1cc,2019-01-02T13:55:00Z,heat,hold,688,745,660,WY,Cheyenne,8,False,False,False,Gas
2,759b41e328c1a31c0c37d8ee8ac5f45fdd644a17,2019-01-06T17:30:00Z,heat,hold,673,660,660,WY,Casper,0,False,False,False,Gas
3,759b41e328c1a31c0c37d8ee8ac5f45fdd644a17,2019-01-06T18:45:00Z,heat,hold,655,660,660,WY,Casper,0,False,False,False,Gas
4,759b41e328c1a31c0c37d8ee8ac5f45fdd644a17,2019-01-06T19:10:00Z,heat,hold,656,660,660,WY,Casper,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30906,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2019-01-08T18:30:00Z,heat,hold,749,760,760,WY,Rock Springs,25,False,False,False,Gas
30907,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2019-01-08T19:55:00Z,heat,hold,763,760,760,WY,Rock Springs,25,False,False,False,Gas
30908,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2019-01-08T18:25:00Z,heat,hold,738,760,760,WY,Rock Springs,25,False,False,False,Gas
30909,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2019-01-08T19:40:00Z,heat,hold,758,760,760,WY,Rock Springs,25,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/WY/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/WY-day/2020-jan-day-WY.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4bbb3af3bf9fa9aad1b6506c83e556860e848321,2020-01-01T17:55:00Z,heat,hold,712,715,715,WY,Cheyenne,0,True,False,False,Gas
1,4bbb3af3bf9fa9aad1b6506c83e556860e848321,2020-01-26T18:45:00Z,heat,hold,685,715,715,WY,Cheyenne,0,True,False,False,Gas
2,338d0307f8579900d1f54af0c7475fd0e3a6af35,2020-01-26T18:55:00Z,heat,hold,688,688,688,WY,Green River,0,False,False,False,Gas
3,4bbb3af3bf9fa9aad1b6506c83e556860e848321,2020-01-01T17:15:00Z,heat,hold,705,715,715,WY,Cheyenne,0,True,False,False,Gas
4,4bbb3af3bf9fa9aad1b6506c83e556860e848321,2020-01-01T17:20:00Z,heat,hold,706,715,715,WY,Cheyenne,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29966,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2020-01-12T18:50:00Z,heat,auto,755,750,760,WY,Rock Springs,25,False,False,False,Gas
29967,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2020-01-05T19:35:00Z,heat,auto,759,750,760,WY,Rock Springs,25,False,False,False,Gas
29968,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2020-01-05T18:50:00Z,heat,auto,750,750,760,WY,Rock Springs,25,False,False,False,Gas
29969,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2020-01-05T13:05:00Z,heat,auto,746,750,760,WY,Rock Springs,25,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/WY/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/WY-day/2021-jan-day-WY.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,338d0307f8579900d1f54af0c7475fd0e3a6af35,2021-01-11T14:25:00Z,heat,hold,686,732,628,WY,Green River,0,False,False,False,Gas
1,db3d168d1ad94da910a56cc7efd5a591dbe48ca3,2021-01-19T19:15:00Z,heat,hold,716,723,713,WY,Powell,10,True,False,False,Gas
2,d975b1ad1272e42f9eb089583ef55bb8891c93c8,2021-01-27T16:05:00Z,heat,hold,679,699,699,WY,Cheyenne,30,False,False,False,Gas
3,ab20bcf291e58402ba42a43560f2f7f7ca72c776,2021-01-01T13:00:00Z,auto,hold,672,706,656,WY,Cheyenne,29,True,False,False,Gas
4,b68cd409bcc737b83dc2c52e21c19c9480638fd9,2021-01-25T18:45:00Z,heat,hold,654,779,668,WY,Gillette,7,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22426,31af9764717cc87207aca52dabacb0bb1e2b8b95,2021-01-13T15:35:00Z,heat,hold,710,710,710,WY,Gillette,35,False,False,False,Gas
22427,31af9764717cc87207aca52dabacb0bb1e2b8b95,2021-01-13T17:45:00Z,heat,hold,711,710,710,WY,Gillette,35,False,False,False,Gas
22428,31af9764717cc87207aca52dabacb0bb1e2b8b95,2021-01-13T14:30:00Z,heat,hold,712,710,710,WY,Gillette,35,False,False,False,Gas
22429,44788b127929a7bbc4a79d2b17a215fd5460cdc1,2021-01-19T13:40:00Z,heat,hold,713,710,710,WY,Cheyenne,15,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/WY/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WY/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WY_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WY/jan/" + file)
    WY_jan = pd.concat([WY_jan, df])
    
WY_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,16913980a67e1faa9fb0fe491f8d36b4477f965a,Jan,2017,heat,auto,Cheyenne,653.806250,656.375000,655.018750,10.0,False,False,False
1,1af93c740d9d394407a734476ba8bcaf41c2f2dd,Jan,2017,heat,auto,Casper,634.134986,652.181818,639.917355,30.0,True,False,False
2,1af93c740d9d394407a734476ba8bcaf41c2f2dd,Jan,2017,heat,hold,Casper,633.070886,650.293671,631.321519,30.0,True,False,False
3,31af9764717cc87207aca52dabacb0bb1e2b8b95,Jan,2017,heat,auto,Gillette,678.065768,740.769272,675.767116,35.0,False,False,False
4,31af9764717cc87207aca52dabacb0bb1e2b8b95,Jan,2017,heat,hold,Gillette,705.900000,674.333333,656.800000,35.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
32,e27714a971be3d2318c80f77a38f0635b4421172,Jan,2021,heat,hold,Rock Springs,631.766129,650.000000,631.000000,10.0,False,False,False
33,f045e632eb732cd0703e069e70aef34bb8b50ad0,Jan,2021,heat,hold,Evanston,691.615556,697.043333,697.043333,10.0,False,False,False
34,f3bd7952d2fbb216345ea8b272cbdee983b34ae6,Jan,2021,heat,hold,Casper,684.283688,700.120567,699.914894,40.0,False,False,False
35,f5be20356f7cafa6712707b496d91af1ddbdabe0,Jan,2021,auto,hold,Gillette,674.737681,718.111594,683.998551,10.0,True,False,False


In [34]:
WY_jan.to_csv("Scraper_Output/State_Month_Day/WY/WY_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/WY-day/2017-feb-day-WY.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,16913980a67e1faa9fb0fe491f8d36b4477f965a,2017-02-09T18:45:00Z,heat,auto,653,665,659,WY,Cheyenne,10,False,False,False,Gas
14,2a7d4fd1db87baa60518a2b2f1f050805e889a0b,2017-02-01T19:25:00Z,heat,hold,684,760,706,WY,Trail Dr,35,False,False,False,Gas
29,dfd6614b4f44b374e9c9f7c6d107bb8953ce70b9,2017-02-26T14:45:00Z,auto,hold,743,783,724,WY,Trail Dr,35,False,False,False,Gas
32,dfd6614b4f44b374e9c9f7c6d107bb8953ce70b9,2017-02-25T19:55:00Z,auto,hold,744,796,744,WY,Trail Dr,35,False,False,False,Gas
53,1af93c740d9d394407a734476ba8bcaf41c2f2dd,2017-02-28T15:10:00Z,heat,auto,625,721,629,WY,Casper,30,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3097,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-02-23T13:40:00Z,heat,hold,754,760,760,WY,Rock Springs,25,False,False,False,Gas
3098,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-02-23T13:15:00Z,heat,hold,756,760,760,WY,Rock Springs,25,False,False,False,Gas
3099,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-02-23T15:15:00Z,heat,hold,757,760,760,WY,Rock Springs,25,False,False,False,Gas
3100,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-02-23T14:25:00Z,heat,hold,754,760,760,WY,Rock Springs,25,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/WY/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/WY-day/2018-feb-day-WY.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dfd6614b4f44b374e9c9f7c6d107bb8953ce70b9,2018-02-15T11:55:00Z,heat,hold,725,725,725,WY,Trail Dr,35,False,False,False,Gas
1,9dad86d4f2bedcb59aa71c38c6812466cb5395bc,2018-02-03T15:00:00Z,heat,hold,655,669,669,WY,Worland,5,False,False,False,Gas
3,dfd6614b4f44b374e9c9f7c6d107bb8953ce70b9,2018-02-15T11:40:00Z,heat,hold,720,725,725,WY,Trail Dr,35,False,False,False,Gas
4,dfd6614b4f44b374e9c9f7c6d107bb8953ce70b9,2018-02-21T14:50:00Z,heat,hold,707,715,715,WY,Trail Dr,35,False,False,False,Gas
5,f7cdfd4cc8a741382df3f72255f8368549f3d066,2018-02-11T18:50:00Z,heat,hold,655,653,653,WY,Laramie,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22094,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2018-02-03T15:35:00Z,heat,auto,759,760,760,WY,Rock Springs,25,False,False,False,Gas
22095,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2018-02-03T14:35:00Z,heat,auto,740,760,760,WY,Rock Springs,25,False,False,False,Gas
22096,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2018-02-03T15:15:00Z,heat,auto,758,760,760,WY,Rock Springs,25,False,False,False,Gas
22097,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2018-02-03T15:20:00Z,heat,auto,759,760,760,WY,Rock Springs,25,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/WY/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/WY-day/2019-feb-day-WY.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,74345f495365ac5a65c267f410df97fac944a148,2019-02-27T14:00:00Z,heat,hold,598,650,603,WY,Casper,0,False,False,False,Gas
1,338d0307f8579900d1f54af0c7475fd0e3a6af35,2019-02-23T15:45:00Z,heat,hold,699,704,704,WY,Green River,0,False,False,False,Gas
2,6ab01ca06ae6a5674643e112395b2c684aef14f9,2019-02-13T14:25:00Z,heat,hold,715,812,677,WY,Cheyenne,55,False,False,False,Gas
4,b68cd409bcc737b83dc2c52e21c19c9480638fd9,2019-02-27T19:00:00Z,heat,auto,663,765,652,WY,Gillette,7,False,False,False,Gas
5,b68cd409bcc737b83dc2c52e21c19c9480638fd9,2019-02-27T16:20:00Z,heat,auto,604,790,622,WY,Gillette,7,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21971,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2019-02-24T13:10:00Z,heat,auto,754,750,760,WY,Rock Springs,25,False,False,False,Gas
21972,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2019-02-20T18:50:00Z,heat,hold,759,760,760,WY,Rock Springs,25,False,False,False,Gas
21973,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2019-02-24T19:30:00Z,heat,auto,759,750,760,WY,Rock Springs,25,False,False,False,Gas
21974,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2019-02-20T18:05:00Z,heat,hold,755,760,760,WY,Rock Springs,25,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/WY/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/WY-day/2020-feb-day-WY.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,338d0307f8579900d1f54af0c7475fd0e3a6af35,2020-02-08T18:50:00Z,heat,hold,690,650,648,WY,Green River,0,False,False,False,Gas
1,ab20bcf291e58402ba42a43560f2f7f7ca72c776,2020-02-22T15:50:00Z,auto,hold,684,783,733,WY,Cheyenne,29,True,False,False,Gas
2,4bbb3af3bf9fa9aad1b6506c83e556860e848321,2020-02-13T16:10:00Z,heat,hold,720,745,745,WY,Cheyenne,0,True,False,False,Gas
3,ab20bcf291e58402ba42a43560f2f7f7ca72c776,2020-02-08T15:45:00Z,auto,hold,693,753,703,WY,Cheyenne,29,True,False,False,Gas
4,4bbb3af3bf9fa9aad1b6506c83e556860e848321,2020-02-15T15:20:00Z,heat,hold,673,745,745,WY,Cheyenne,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25614,f045e632eb732cd0703e069e70aef34bb8b50ad0,2020-02-01T19:50:00Z,heat,hold,706,705,705,WY,Evanston,10,False,False,False,Gas
25615,f045e632eb732cd0703e069e70aef34bb8b50ad0,2020-02-01T19:55:00Z,heat,hold,706,705,705,WY,Evanston,10,False,False,False,Gas
25616,f045e632eb732cd0703e069e70aef34bb8b50ad0,2020-02-09T19:50:00Z,heat,hold,700,705,705,WY,Evanston,10,False,False,False,Gas
25617,f045e632eb732cd0703e069e70aef34bb8b50ad0,2020-02-09T19:55:00Z,heat,hold,705,705,705,WY,Evanston,10,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/WY/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/WY-day/2021-feb-day-WY.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d47cc2218ecd2b265d190fe1eeb96582e0255951,2021-02-19T16:00:00Z,auto,hold,712,778,692,WY,Cheyenne,0,False,False,False,Gas
1,338d0307f8579900d1f54af0c7475fd0e3a6af35,2021-02-07T19:20:00Z,heat,hold,684,682,682,WY,Green River,0,False,False,False,Gas
2,338d0307f8579900d1f54af0c7475fd0e3a6af35,2021-02-04T14:25:00Z,heat,hold,696,745,625,WY,Green River,0,False,False,False,Gas
3,3f04cb50110e414b14944a34de26133900b1d5ef,2021-02-13T19:55:00Z,heat,hold,618,739,749,WY,Cheyenne,10,True,False,False,Gas
4,3f04cb50110e414b14944a34de26133900b1d5ef,2021-02-14T16:30:00Z,heat,hold,603,699,699,WY,Cheyenne,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19504,d47cc2218ecd2b265d190fe1eeb96582e0255951,2021-02-05T15:55:00Z,auto,hold,693,750,700,WY,Cheyenne,0,False,False,False,Gas
19505,d975b1ad1272e42f9eb089583ef55bb8891c93c8,2021-02-28T15:45:00Z,heat,hold,700,700,700,WY,Cheyenne,30,False,False,False,Gas
19506,d975b1ad1272e42f9eb089583ef55bb8891c93c8,2021-02-13T15:35:00Z,heat,hold,699,700,700,WY,Cheyenne,30,False,False,False,Gas
19507,d47cc2218ecd2b265d190fe1eeb96582e0255951,2021-02-24T16:05:00Z,auto,hold,690,750,700,WY,Cheyenne,0,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/WY/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WY/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WY_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WY/feb/" + file)
    WY_feb = pd.concat([WY_feb, df])
    
WY_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,16913980a67e1faa9fb0fe491f8d36b4477f965a,feb,2017,heat,auto,Cheyenne,639.250000,660.208333,659.958333,10.0,False,False,False
1,1af93c740d9d394407a734476ba8bcaf41c2f2dd,feb,2017,heat,auto,Casper,628.057432,661.942568,638.635135,30.0,True,False,False
2,1af93c740d9d394407a734476ba8bcaf41c2f2dd,feb,2017,heat,hold,Casper,631.190349,651.721180,628.621984,30.0,True,False,False
3,2a7d4fd1db87baa60518a2b2f1f050805e889a0b,feb,2017,heat,hold,Trail Dr,685.250000,677.500000,649.000000,35.0,False,False,False
4,31af9764717cc87207aca52dabacb0bb1e2b8b95,feb,2017,heat,auto,Gillette,676.901961,720.254902,679.725490,35.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
28,d47cc2218ecd2b265d190fe1eeb96582e0255951,feb,2021,auto,hold,Cheyenne,701.183288,763.951482,706.053908,0.0,False,False,False
29,d975b1ad1272e42f9eb089583ef55bb8891c93c8,feb,2021,heat,hold,Cheyenne,695.966292,702.247191,702.247191,30.0,False,False,False
30,db3d168d1ad94da910a56cc7efd5a591dbe48ca3,feb,2021,heat,hold,Powell,725.750000,729.916667,729.687500,10.0,True,False,False
31,f045e632eb732cd0703e069e70aef34bb8b50ad0,feb,2021,heat,hold,Evanston,694.592992,701.231806,701.231806,10.0,False,False,False


In [67]:
WY_feb.to_csv("Scraper_Output/State_Month_Day/WY/WY_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/WY-day/2017-jun-day-WY.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2a7d4fd1db87baa60518a2b2f1f050805e889a0b,2017-06-14T18:50:00Z,heat,hold,707,722,682,WY,Trail Dr,35,False,False,False,Gas
1,dfd6614b4f44b374e9c9f7c6d107bb8953ce70b9,2017-06-13T14:50:00Z,auto,hold,710,757,707,WY,Trail Dr,35,False,False,False,Gas
2,dfd6614b4f44b374e9c9f7c6d107bb8953ce70b9,2017-06-14T18:50:00Z,auto,hold,721,786,678,WY,Trail Dr,35,False,False,False,Gas
3,2cc2736dfecd12ede0f334d504dacc109edfa726,2017-06-26T13:20:00Z,auto,auto,688,685,635,WY,Laramie,10,False,False,False,Gas
4,2cc2736dfecd12ede0f334d504dacc109edfa726,2017-06-26T15:15:00Z,auto,hold,695,675,625,WY,Laramie,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-06-04T19:25:00Z,cool,hold,776,760,760,WY,Rock Springs,25,False,False,False,Gas
4996,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-06-04T18:15:00Z,cool,hold,750,760,760,WY,Rock Springs,25,False,False,False,Gas
4997,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-06-04T18:25:00Z,cool,hold,754,760,760,WY,Rock Springs,25,False,False,False,Gas
4998,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2017-06-04T19:20:00Z,cool,hold,777,760,760,WY,Rock Springs,25,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/WY/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/WY-day/2018-jun-day-WY.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,338d0307f8579900d1f54af0c7475fd0e3a6af35,2018-06-17T16:50:00Z,cool,hold,715,713,713,WY,Green River,0,False,False,False,Gas
1,338d0307f8579900d1f54af0c7475fd0e3a6af35,2018-06-17T19:20:00Z,cool,hold,720,713,713,WY,Green River,0,False,False,False,Gas
2,0c66b3c257332a7615e55763a8a5add2f19a1aaa,2018-06-03T13:20:00Z,cool,hold,696,755,755,WY,Casper,10,False,False,False,Gas
3,0c66b3c257332a7615e55763a8a5add2f19a1aaa,2018-06-03T14:45:00Z,cool,hold,680,755,755,WY,Casper,10,False,False,False,Gas
4,0c66b3c257332a7615e55763a8a5add2f19a1aaa,2018-06-03T13:10:00Z,cool,hold,694,755,755,WY,Casper,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18995,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2018-06-08T13:35:00Z,heat,auto,735,750,760,WY,Rock Springs,25,False,False,False,Gas
18996,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2018-06-08T17:25:00Z,heat,auto,771,750,760,WY,Rock Springs,25,False,False,False,Gas
18997,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2018-06-27T14:50:00Z,heat,auto,758,750,760,WY,Rock Springs,25,False,False,False,Gas
18998,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2018-06-14T18:00:00Z,cool,hold,756,760,760,WY,Rock Springs,25,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/WY/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/WY-day/2019-jun-day-WY.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0c66b3c257332a7615e55763a8a5add2f19a1aaa,2019-06-29T15:40:00Z,cool,hold,720,722,722,WY,Casper,10,False,False,False,Gas
1,d975b1ad1272e42f9eb089583ef55bb8891c93c8,2019-06-28T12:40:00Z,cool,auto,708,727,677,WY,Cheyenne,30,False,False,False,Gas
2,d975b1ad1272e42f9eb089583ef55bb8891c93c8,2019-06-30T14:20:00Z,cool,auto,678,750,677,WY,Cheyenne,30,False,False,False,Gas
3,0c66b3c257332a7615e55763a8a5add2f19a1aaa,2019-06-02T18:25:00Z,cool,hold,713,724,687,WY,Casper,10,False,False,False,Gas
4,0c66b3c257332a7615e55763a8a5add2f19a1aaa,2019-06-29T16:25:00Z,cool,hold,728,722,722,WY,Casper,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26966,a9149145810c584aacd22f8c977f235117275828,2019-06-30T14:10:00Z,cool,hold,771,760,760,WY,Cheyenne,0,False,False,False,Gas
26967,a9149145810c584aacd22f8c977f235117275828,2019-06-30T14:00:00Z,cool,hold,770,760,760,WY,Cheyenne,0,False,False,False,Gas
26968,a9149145810c584aacd22f8c977f235117275828,2019-06-30T14:05:00Z,cool,hold,771,760,760,WY,Cheyenne,0,False,False,False,Gas
26969,a9149145810c584aacd22f8c977f235117275828,2019-06-30T14:20:00Z,heat,hold,772,760,760,WY,Cheyenne,0,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/WY/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/WY-day/2020-jun-day-WY.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,338d0307f8579900d1f54af0c7475fd0e3a6af35,2020-06-15 12:45:00 UTC,cool,hold,710,727,727,WY,Green River,0,False,False,False,Gas
2,4bbb3af3bf9fa9aad1b6506c83e556860e848321,2020-06-30 18:25:00 UTC,cool,hold,715,732,732,WY,Cheyenne,0,True,False,False,Gas
4,338d0307f8579900d1f54af0c7475fd0e3a6af35,2020-06-15 12:50:00 UTC,cool,hold,712,727,727,WY,Green River,0,False,False,False,Gas
5,4bbb3af3bf9fa9aad1b6506c83e556860e848321,2020-06-15 14:20:00 UTC,heat,hold,716,715,715,WY,Cheyenne,0,True,False,False,Gas
7,0c66b3c257332a7615e55763a8a5add2f19a1aaa,2020-06-06 16:10:00 UTC,cool,auto,734,722,722,WY,Casper,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29330,26956f7054af3cbd1c6945b87a40be93f68123bb,2020-06-03 15:35:00 UTC,auto,auto,707,780,710,WY,Gillette,10,True,False,False,Gas
29331,26956f7054af3cbd1c6945b87a40be93f68123bb,2020-06-05 17:25:00 UTC,auto,auto,712,760,710,WY,Gillette,10,True,False,False,Gas
29332,26956f7054af3cbd1c6945b87a40be93f68123bb,2020-06-03 14:50:00 UTC,auto,auto,704,780,710,WY,Gillette,10,True,False,False,Gas
29333,26956f7054af3cbd1c6945b87a40be93f68123bb,2020-06-03 16:30:00 UTC,auto,auto,718,780,710,WY,Gillette,10,True,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/WY/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/WY-day/2021-jun-day-WY.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3f04cb50110e414b14944a34de26133900b1d5ef,2021-06-06T19:35:00Z,cool,hold,693,609,619,WY,Cheyenne,10,True,False,False,Gas
1,338d0307f8579900d1f54af0c7475fd0e3a6af35,2021-06-11T13:05:00Z,heat,hold,716,678,678,WY,Green River,0,False,False,False,Gas
2,338d0307f8579900d1f54af0c7475fd0e3a6af35,2021-06-11T13:15:00Z,heat,hold,708,678,678,WY,Green River,0,False,False,False,Gas
3,338d0307f8579900d1f54af0c7475fd0e3a6af35,2021-06-02T13:20:00Z,cool,hold,736,736,736,WY,Green River,0,False,False,False,Gas
4,338d0307f8579900d1f54af0c7475fd0e3a6af35,2021-06-01T13:25:00Z,cool,hold,723,756,612,WY,Green River,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27035,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2021-06-04T09:30:00Z,cool,hold,742,760,760,WY,Rock Springs,25,False,False,False,Gas
27036,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2021-06-03T19:15:00Z,cool,hold,764,760,760,WY,Rock Springs,25,False,False,False,Gas
27037,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2021-06-18T19:05:00Z,cool,hold,785,760,760,WY,Rock Springs,25,False,False,False,Gas
27038,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2021-06-19T12:35:00Z,cool,hold,741,760,760,WY,Rock Springs,25,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/WY/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WY/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WY_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WY/jun/" + file)
    WY_jun = pd.concat([WY_jun, df])
    
WY_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,16913980a67e1faa9fb0fe491f8d36b4477f965a,jun,2017,cool,hold,Cheyenne,741.541667,740.000000,740.000000,10.0,False,False,False
1,2a7d4fd1db87baa60518a2b2f1f050805e889a0b,jun,2017,heat,auto,Trail Dr,710.090909,710.000000,710.000000,35.0,False,False,False
2,2a7d4fd1db87baa60518a2b2f1f050805e889a0b,jun,2017,heat,hold,Trail Dr,709.542373,710.203390,709.525424,35.0,False,False,False
3,2cc2736dfecd12ede0f334d504dacc109edfa726,jun,2017,auto,auto,Laramie,687.285714,685.000000,635.000000,10.0,False,False,False
4,2cc2736dfecd12ede0f334d504dacc109edfa726,jun,2017,auto,hold,Laramie,692.318182,675.772727,625.772727,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
39,f045e632eb732cd0703e069e70aef34bb8b50ad0,jun,2021,heat,hold,Evanston,678.679245,656.779874,656.779874,10.0,False,False,False
40,f3bd7952d2fbb216345ea8b272cbdee983b34ae6,jun,2021,cool,hold,Casper,724.198529,721.727941,721.507353,40.0,False,False,False
41,f5be20356f7cafa6712707b496d91af1ddbdabe0,jun,2021,auto,hold,Gillette,694.654762,705.000000,675.000000,10.0,True,False,False
42,f5be20356f7cafa6712707b496d91af1ddbdabe0,jun,2021,cool,hold,Gillette,695.628968,705.000000,693.666667,10.0,True,False,False


In [100]:
WY_jun.to_csv("Scraper_Output/State_Month_Day/WY/WY_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/WY-day/2017-jul-day-WY.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
112,9dad86d4f2bedcb59aa71c38c6812466cb5395bc,2017-07-29T19:05:00Z,auto,hold,707,725,645,WY,Worland,5,False,False,False,Gas
205,1af93c740d9d394407a734476ba8bcaf41c2f2dd,2017-07-14T15:25:00Z,cool,hold,739,766,646,WY,Casper,30,True,False,False,Gas
206,1af93c740d9d394407a734476ba8bcaf41c2f2dd,2017-07-09T14:55:00Z,cool,hold,725,764,644,WY,Casper,30,True,False,False,Gas
207,1af93c740d9d394407a734476ba8bcaf41c2f2dd,2017-07-02T17:10:00Z,cool,auto,678,680,654,WY,Casper,30,True,False,False,Gas
208,1af93c740d9d394407a734476ba8bcaf41c2f2dd,2017-07-21T15:25:00Z,cool,hold,711,759,654,WY,Casper,30,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10705,bf6d438eb76928160901cfa1eab34d8634017401,2017-07-15T13:40:00Z,cool,auto,743,740,760,WY,Casper,40,False,False,False,Gas
10706,bf6d438eb76928160901cfa1eab34d8634017401,2017-07-15T15:40:00Z,cool,auto,743,780,760,WY,Casper,40,False,False,False,Gas
10707,bf6d438eb76928160901cfa1eab34d8634017401,2017-07-15T15:55:00Z,cool,auto,747,820,760,WY,Casper,40,False,False,False,Gas
10708,bf6d438eb76928160901cfa1eab34d8634017401,2017-07-15T14:00:00Z,cool,auto,734,740,760,WY,Casper,40,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/WY/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/WY-day/2018-jul-day-WY.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,338d0307f8579900d1f54af0c7475fd0e3a6af35,2018-07-22T15:25:00Z,cool,hold,725,683,683,WY,Green River,0,False,False,False,Gas
1,d975b1ad1272e42f9eb089583ef55bb8891c93c8,2018-07-25T13:55:00Z,cool,hold,707,775,775,WY,Cheyenne,30,False,False,False,Gas
2,d975b1ad1272e42f9eb089583ef55bb8891c93c8,2018-07-25T12:20:00Z,cool,hold,707,775,775,WY,Cheyenne,30,False,False,False,Gas
3,338d0307f8579900d1f54af0c7475fd0e3a6af35,2018-07-22T17:20:00Z,cool,hold,680,683,683,WY,Green River,0,False,False,False,Gas
4,d975b1ad1272e42f9eb089583ef55bb8891c93c8,2018-07-26T12:45:00Z,cool,hold,708,715,715,WY,Cheyenne,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22108,5c3385a5b08903ef32083aacfc000fe752b78fdc,2018-07-31T13:45:00Z,cool,hold,709,710,710,WY,Gillette,10,False,False,False,Gas
22109,5c3385a5b08903ef32083aacfc000fe752b78fdc,2018-07-31T12:50:00Z,cool,hold,707,710,710,WY,Gillette,10,False,False,False,Gas
22110,5c3385a5b08903ef32083aacfc000fe752b78fdc,2018-07-31T13:00:00Z,cool,hold,709,710,710,WY,Gillette,10,False,False,False,Gas
22111,5c3385a5b08903ef32083aacfc000fe752b78fdc,2018-07-31T11:50:00Z,cool,hold,700,710,710,WY,Gillette,10,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/WY/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/WY-day/2019-jul-day-WY.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,338d0307f8579900d1f54af0c7475fd0e3a6af35,2019-07-28T17:50:00Z,cool,hold,741,726,726,WY,Green River,0,False,False,False,Gas
1,d975b1ad1272e42f9eb089583ef55bb8891c93c8,2019-07-14T15:25:00Z,cool,auto,680,750,677,WY,Cheyenne,30,False,False,False,Gas
2,338d0307f8579900d1f54af0c7475fd0e3a6af35,2019-07-07T16:40:00Z,cool,hold,715,716,716,WY,Green River,0,False,False,False,Gas
3,338d0307f8579900d1f54af0c7475fd0e3a6af35,2019-07-07T15:20:00Z,cool,hold,725,716,716,WY,Green River,0,False,False,False,Gas
4,338d0307f8579900d1f54af0c7475fd0e3a6af35,2019-07-28T18:05:00Z,cool,hold,735,726,726,WY,Green River,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26550,44788b127929a7bbc4a79d2b17a215fd5460cdc1,2019-07-14T18:50:00Z,cool,hold,717,710,710,WY,Cheyenne,15,False,False,False,Gas
26551,44788b127929a7bbc4a79d2b17a215fd5460cdc1,2019-07-14T19:55:00Z,cool,hold,712,710,710,WY,Cheyenne,15,False,False,False,Gas
26552,44788b127929a7bbc4a79d2b17a215fd5460cdc1,2019-07-14T18:55:00Z,cool,hold,716,710,710,WY,Cheyenne,15,False,False,False,Gas
26553,44788b127929a7bbc4a79d2b17a215fd5460cdc1,2019-07-14T18:35:00Z,cool,hold,709,710,710,WY,Cheyenne,15,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/WY/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/WY-day/2020-jul-day-WY.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,4bbb3af3bf9fa9aad1b6506c83e556860e848321,2020-07-04T18:15:00Z,cool,hold,706,692,692,WY,Cheyenne,0,True,False,False,Gas
3,4bbb3af3bf9fa9aad1b6506c83e556860e848321,2020-07-08T19:20:00Z,cool,hold,728,682,682,WY,Cheyenne,0,True,False,False,Gas
5,4bbb3af3bf9fa9aad1b6506c83e556860e848321,2020-07-04T18:05:00Z,cool,hold,709,692,692,WY,Cheyenne,0,True,False,False,Gas
10,4bbb3af3bf9fa9aad1b6506c83e556860e848321,2020-07-08T19:30:00Z,cool,hold,723,682,682,WY,Cheyenne,0,True,False,False,Gas
12,4bbb3af3bf9fa9aad1b6506c83e556860e848321,2020-07-06T17:35:00Z,cool,auto,694,680,662,WY,Cheyenne,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28532,8f3774e0edc69b802bb139e6708e033261622da5,2020-07-07T13:40:00Z,cool,hold,740,760,760,WY,Casper,10,True,False,False,Gas
28533,8f3774e0edc69b802bb139e6708e033261622da5,2020-07-07T16:20:00Z,cool,hold,758,760,760,WY,Casper,10,True,False,False,Gas
28534,8f3774e0edc69b802bb139e6708e033261622da5,2020-07-30T12:35:00Z,cool,hold,724,760,760,WY,Casper,10,True,False,False,Gas
28535,8f3774e0edc69b802bb139e6708e033261622da5,2020-07-10T18:50:00Z,cool,hold,758,760,760,WY,Casper,10,True,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/WY/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/WY-day/2021-jul-day-WY.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3f04cb50110e414b14944a34de26133900b1d5ef,2021-07-11T19:10:00Z,cool,hold,681,609,619,WY,Cheyenne,10,True,False,False,Gas
1,db3d168d1ad94da910a56cc7efd5a591dbe48ca3,2021-07-15T15:00:00Z,auto,hold,739,753,683,WY,Powell,10,True,False,False,Gas
2,338d0307f8579900d1f54af0c7475fd0e3a6af35,2021-07-04T17:55:00Z,cool,hold,719,718,718,WY,Green River,0,False,False,False,Gas
3,245aed2447802fca622bd7dc725d73c388f7a0a0,2021-07-14T15:05:00Z,cool,hold,732,768,745,WY,Cheyenne,15,False,False,False,Gas
4,338d0307f8579900d1f54af0c7475fd0e3a6af35,2021-07-24T19:15:00Z,cool,hold,733,718,718,WY,Green River,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22437,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2021-07-08T11:20:00Z,cool,hold,732,760,760,WY,Rock Springs,25,False,False,False,Gas
22438,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2021-07-08T11:30:00Z,cool,hold,732,760,760,WY,Rock Springs,25,False,False,False,Gas
22439,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2021-07-08T14:20:00Z,cool,hold,741,760,760,WY,Rock Springs,25,False,False,False,Gas
22440,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2021-07-08T12:25:00Z,cool,hold,732,760,760,WY,Rock Springs,25,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/WY/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WY/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WY_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WY/jul/" + file)
    WY_jul = pd.concat([WY_jul, df])
    
WY_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,16913980a67e1faa9fb0fe491f8d36b4477f965a,jul,2017,cool,hold,Cheyenne,748.319372,749.424084,749.424084,10.0,False,False,False
1,1af93c740d9d394407a734476ba8bcaf41c2f2dd,jul,2017,cool,auto,Casper,706.038339,779.357827,649.124601,30.0,True,False,False
2,1af93c740d9d394407a734476ba8bcaf41c2f2dd,jul,2017,cool,hold,Casper,714.810345,715.681034,711.810345,30.0,True,False,False
3,2cc2736dfecd12ede0f334d504dacc109edfa726,jul,2017,auto,auto,Laramie,672.204918,685.573770,623.114754,10.0,False,False,False
4,2cc2736dfecd12ede0f334d504dacc109edfa726,jul,2017,auto,hold,Laramie,694.487179,705.474359,655.474359,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
32,e27714a971be3d2318c80f77a38f0635b4421172,jul,2021,heat,hold,Rock Springs,690.029668,650.000000,621.000000,10.0,False,False,False
33,f045e632eb732cd0703e069e70aef34bb8b50ad0,jul,2021,cool,hold,Evanston,734.583333,736.879630,736.879630,10.0,False,False,False
34,f3bd7952d2fbb216345ea8b272cbdee983b34ae6,jul,2021,cool,hold,Casper,721.423469,720.076531,720.076531,40.0,False,False,False
35,f7cdfd4cc8a741382df3f72255f8368549f3d066,jul,2021,auto,hold,Laramie,785.666667,770.000000,710.000000,35.0,False,False,False


In [133]:
WY_jul.to_csv("Scraper_Output/State_Month_Day/WY/WY_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/WY-day/2017-aug-day-WY.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2cc2736dfecd12ede0f334d504dacc109edfa726,2017-08-18T16:00:00Z,cool,hold,702,717,674,WY,Laramie,10,False,False,False,Gas
1,2cc2736dfecd12ede0f334d504dacc109edfa726,2017-08-26T13:05:00Z,cool,hold,691,715,677,WY,Laramie,10,False,False,False,Gas
2,8d5e560068447e6cd095d1a35c4435a1fbf49342,2017-08-01T12:40:00Z,cool,hold,724,763,763,WY,Mills,5,False,False,False,Gas
3,8d5e560068447e6cd095d1a35c4435a1fbf49342,2017-08-21T16:30:00Z,cool,hold,730,751,751,WY,Mills,5,False,False,False,Gas
4,8d5e560068447e6cd095d1a35c4435a1fbf49342,2017-08-10T12:30:00Z,cool,hold,715,746,746,WY,Mills,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12444,8d5e560068447e6cd095d1a35c4435a1fbf49342,2017-08-30T15:50:00Z,cool,auto,734,780,760,WY,Mills,5,False,False,False,Gas
12445,8d5e560068447e6cd095d1a35c4435a1fbf49342,2017-08-31T16:50:00Z,cool,auto,763,780,760,WY,Mills,5,False,False,False,Gas
12446,8d5e560068447e6cd095d1a35c4435a1fbf49342,2017-08-30T14:00:00Z,cool,auto,734,780,760,WY,Mills,5,False,False,False,Gas
12447,8d5e560068447e6cd095d1a35c4435a1fbf49342,2017-08-31T19:35:00Z,cool,auto,783,780,760,WY,Mills,5,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/WY/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/WY-day/2018-aug-day-WY.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9702c844ff5c002bad15c001c0c0c6890fe63fad,2018-08-03T16:25:00Z,cool,hold,706,710,710,WY,Sundance,5,False,False,False,Gas
1,9702c844ff5c002bad15c001c0c0c6890fe63fad,2018-08-03T15:00:00Z,cool,hold,713,710,710,WY,Sundance,5,False,False,False,Gas
2,9702c844ff5c002bad15c001c0c0c6890fe63fad,2018-08-03T16:20:00Z,cool,hold,705,710,710,WY,Sundance,5,False,False,False,Gas
3,9702c844ff5c002bad15c001c0c0c6890fe63fad,2018-08-03T14:35:00Z,cool,hold,718,710,710,WY,Sundance,5,False,False,False,Gas
4,9702c844ff5c002bad15c001c0c0c6890fe63fad,2018-08-03T17:35:00Z,cool,hold,723,710,710,WY,Sundance,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25787,336df4fa2ebfb1b7a1bc23dd117037a5955effa8,2018-08-09T13:50:00Z,heat,hold,655,660,660,WY,Evanston,0,False,False,False,Gas
25788,336df4fa2ebfb1b7a1bc23dd117037a5955effa8,2018-08-08T17:20:00Z,heat,hold,721,660,660,WY,Evanston,0,False,False,False,Gas
25789,336df4fa2ebfb1b7a1bc23dd117037a5955effa8,2018-08-15T13:00:00Z,heat,hold,656,660,660,WY,Evanston,0,False,False,False,Gas
25790,336df4fa2ebfb1b7a1bc23dd117037a5955effa8,2018-08-15T12:40:00Z,heat,hold,659,660,660,WY,Evanston,0,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/WY/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/WY-day/2019-aug-day-WY.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ab20bcf291e58402ba42a43560f2f7f7ca72c776,2019-08-31T16:50:00Z,auto,hold,716,713,663,WY,Cheyenne,29,True,False,False,Gas
1,d975b1ad1272e42f9eb089583ef55bb8891c93c8,2019-08-06T12:15:00Z,cool,auto,669,750,677,WY,Cheyenne,30,False,False,False,Gas
2,0c66b3c257332a7615e55763a8a5add2f19a1aaa,2019-08-18T19:50:00Z,cool,hold,717,712,712,WY,Casper,10,False,False,False,Gas
3,d975b1ad1272e42f9eb089583ef55bb8891c93c8,2019-08-06T13:05:00Z,cool,auto,670,750,677,WY,Cheyenne,30,False,False,False,Gas
4,0c66b3c257332a7615e55763a8a5add2f19a1aaa,2019-08-18T18:40:00Z,cool,hold,715,712,712,WY,Casper,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20447,759b41e328c1a31c0c37d8ee8ac5f45fdd644a17,2019-08-03T18:40:00Z,cool,auto,759,760,760,WY,Casper,0,False,False,False,Gas
20448,759b41e328c1a31c0c37d8ee8ac5f45fdd644a17,2019-08-03T19:45:00Z,cool,auto,764,760,760,WY,Casper,0,False,False,False,Gas
20449,759b41e328c1a31c0c37d8ee8ac5f45fdd644a17,2019-08-03T17:25:00Z,cool,auto,752,760,760,WY,Casper,0,False,False,False,Gas
20450,759b41e328c1a31c0c37d8ee8ac5f45fdd644a17,2019-08-03T19:40:00Z,cool,auto,763,760,760,WY,Casper,0,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/WY/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/WY-day/2020-aug-day-WY.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0c66b3c257332a7615e55763a8a5add2f19a1aaa,2020-08-29T17:25:00Z,cool,auto,746,740,682,WY,Casper,10,False,False,False,Gas
1,0c66b3c257332a7615e55763a8a5add2f19a1aaa,2020-08-30T13:00:00Z,cool,auto,719,740,682,WY,Casper,10,False,False,False,Gas
2,d975b1ad1272e42f9eb089583ef55bb8891c93c8,2020-08-07T15:30:00Z,cool,hold,676,739,739,WY,Cheyenne,30,False,False,False,Gas
3,ab20bcf291e58402ba42a43560f2f7f7ca72c776,2020-08-02T16:50:00Z,auto,hold,674,673,623,WY,Cheyenne,29,True,False,False,Gas
4,0c66b3c257332a7615e55763a8a5add2f19a1aaa,2020-08-30T17:50:00Z,cool,auto,738,740,682,WY,Casper,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25284,b1ab05cfc5e253a7dbb44754bc3b0da3067fee99,2020-08-25T14:00:00Z,cool,hold,697,695,695,WY,Sheridan,10,True,False,False,Gas
25285,b1ab05cfc5e253a7dbb44754bc3b0da3067fee99,2020-08-26T17:55:00Z,cool,hold,694,695,695,WY,Sheridan,10,True,False,False,Gas
25286,b1ab05cfc5e253a7dbb44754bc3b0da3067fee99,2020-08-25T15:20:00Z,cool,hold,697,695,695,WY,Sheridan,10,True,False,False,Gas
25287,b1ab05cfc5e253a7dbb44754bc3b0da3067fee99,2020-08-25T15:15:00Z,cool,hold,700,695,695,WY,Sheridan,10,True,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/WY/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WY/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WY_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WY/aug/" + file)
    WY_aug = pd.concat([WY_aug, df])
    
WY_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,1af93c740d9d394407a734476ba8bcaf41c2f2dd,aug,2017,cool,auto,Casper,682.731932,805.132743,670.884956,30.0,True,False,False
1,1af93c740d9d394407a734476ba8bcaf41c2f2dd,aug,2017,cool,hold,Casper,688.805556,740.000000,740.000000,30.0,True,False,False
2,2cc2736dfecd12ede0f334d504dacc109edfa726,aug,2017,auto,hold,Laramie,698.172185,716.208609,666.208609,10.0,False,False,False
3,2cc2736dfecd12ede0f334d504dacc109edfa726,aug,2017,cool,auto,Laramie,677.333333,680.000000,660.000000,10.0,False,False,False
4,2cc2736dfecd12ede0f334d504dacc109edfa726,aug,2017,cool,hold,Laramie,697.642202,700.293578,699.550459,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,f3bd7952d2fbb216345ea8b272cbdee983b34ae6,aug,2020,cool,hold,Casper,724.928177,734.381215,690.784530,40.0,False,False,False
63,f5be20356f7cafa6712707b496d91af1ddbdabe0,aug,2020,auto,auto,Gillette,664.744048,660.000000,630.000000,10.0,True,False,False
64,f5be20356f7cafa6712707b496d91af1ddbdabe0,aug,2020,auto,hold,Gillette,696.318182,715.000000,655.000000,10.0,True,False,False
65,f766ec9ff77cacb6b7c07ef6ac6a6f039c751748,aug,2020,auto,auto,Cheyenne,711.280702,760.000000,710.000000,5.0,False,False,False


In [160]:
WY_aug.to_csv("Scraper_Output/State_Month_Day/WY/WY_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/WY-day/2017-dec-day-WY.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dfd6614b4f44b374e9c9f7c6d107bb8953ce70b9,2017-12-04T15:40:00Z,heat,hold,721,725,725,WY,Trail Dr,35,False,False,False,Gas
2,dfd6614b4f44b374e9c9f7c6d107bb8953ce70b9,2017-12-30T15:35:00Z,heat,hold,723,725,725,WY,Trail Dr,35,False,False,False,Gas
3,dfd6614b4f44b374e9c9f7c6d107bb8953ce70b9,2017-12-17T17:20:00Z,heat,hold,714,715,715,WY,Trail Dr,35,False,False,False,Gas
4,dfd6614b4f44b374e9c9f7c6d107bb8953ce70b9,2017-12-28T16:30:00Z,heat,hold,726,725,725,WY,Trail Dr,35,False,False,False,Gas
5,dfd6614b4f44b374e9c9f7c6d107bb8953ce70b9,2017-12-28T19:45:00Z,heat,hold,736,725,725,WY,Trail Dr,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18097,5c3385a5b08903ef32083aacfc000fe752b78fdc,2017-12-27T17:15:00Z,heat,auto,705,710,710,WY,Gillette,10,False,False,False,Gas
18098,5c3385a5b08903ef32083aacfc000fe752b78fdc,2017-12-28T14:00:00Z,heat,auto,706,710,710,WY,Gillette,10,False,False,False,Gas
18099,5c3385a5b08903ef32083aacfc000fe752b78fdc,2017-12-29T18:45:00Z,heat,hold,702,710,710,WY,Gillette,10,False,False,False,Gas
18100,5c3385a5b08903ef32083aacfc000fe752b78fdc,2017-12-27T15:15:00Z,heat,auto,703,710,710,WY,Gillette,10,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/WY/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/WY-day/2018-dec-day-WY.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8c1fdbfee5c6446be71c7789a3eb56b05093aac1,2018-12-10T14:25:00Z,heat,hold,668,692,652,WY,Rawlins,0,False,False,False,Gas
1,dfd6614b4f44b374e9c9f7c6d107bb8953ce70b9,2018-12-15T15:00:00Z,heat,hold,724,751,716,WY,Trail Dr,35,False,False,False,Gas
2,338d0307f8579900d1f54af0c7475fd0e3a6af35,2018-12-02T19:50:00Z,heat,hold,703,706,706,WY,Green River,0,False,False,False,Gas
3,338d0307f8579900d1f54af0c7475fd0e3a6af35,2018-12-02T18:20:00Z,heat,hold,707,706,706,WY,Green River,0,False,False,False,Gas
4,dfd6614b4f44b374e9c9f7c6d107bb8953ce70b9,2018-12-04T14:35:00Z,heat,hold,716,739,724,WY,Trail Dr,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31317,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2018-12-24T18:35:00Z,heat,hold,755,760,760,WY,Rock Springs,25,False,False,False,Gas
31318,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2018-12-08T10:50:00Z,heat,hold,713,760,760,WY,Rock Springs,25,False,False,False,Gas
31319,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2018-12-24T16:40:00Z,heat,hold,746,760,760,WY,Rock Springs,25,False,False,False,Gas
31320,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2018-12-24T16:35:00Z,heat,hold,738,760,760,WY,Rock Springs,25,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/WY/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/WY-day/2019-dec-day-WY.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,346d6a205129b9d881a6f6b4d15f12a6836eb41a,2019-12-22T14:35:00Z,heat,hold,660,660,660,WY,Laramie,10,False,False,False,Gas
1,346d6a205129b9d881a6f6b4d15f12a6836eb41a,2019-12-22T15:35:00Z,heat,hold,660,660,660,WY,Laramie,10,False,False,False,Gas
2,346d6a205129b9d881a6f6b4d15f12a6836eb41a,2019-12-22T16:10:00Z,heat,hold,654,660,660,WY,Laramie,10,False,False,False,Gas
3,346d6a205129b9d881a6f6b4d15f12a6836eb41a,2019-12-22T13:50:00Z,heat,hold,660,660,660,WY,Laramie,10,False,False,False,Gas
4,346d6a205129b9d881a6f6b4d15f12a6836eb41a,2019-12-22T13:45:00Z,heat,hold,661,660,660,WY,Laramie,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33306,5e37ac02a37fe85e838f75af25a2e05a1df84da1,2019-12-16T18:20:00Z,heat,hold,709,710,710,WY,Gillette,0,True,False,False,Gas
33307,5e37ac02a37fe85e838f75af25a2e05a1df84da1,2019-12-09T19:30:00Z,heat,auto,715,710,710,WY,Gillette,0,True,False,False,Gas
33308,5e37ac02a37fe85e838f75af25a2e05a1df84da1,2019-12-11T12:30:00Z,heat,auto,705,710,710,WY,Gillette,0,True,False,False,Gas
33309,5e37ac02a37fe85e838f75af25a2e05a1df84da1,2019-12-13T14:10:00Z,heat,auto,706,710,710,WY,Gillette,0,True,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/WY/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/WY-day/2020-dec-day-WY.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d975b1ad1272e42f9eb089583ef55bb8891c93c8,2020-12-05T13:50:00Z,heat,hold,655,659,659,WY,Cheyenne,30,False,False,False,Gas
1,d975b1ad1272e42f9eb089583ef55bb8891c93c8,2020-12-05T14:30:00Z,heat,hold,660,659,659,WY,Cheyenne,30,False,False,False,Gas
2,b68cd409bcc737b83dc2c52e21c19c9480638fd9,2020-12-09T19:15:00Z,heat,hold,642,702,664,WY,Gillette,7,False,False,False,Gas
3,ab20bcf291e58402ba42a43560f2f7f7ca72c776,2020-12-05T14:00:00Z,auto,auto,717,720,636,WY,Cheyenne,29,True,False,False,Gas
4,338d0307f8579900d1f54af0c7475fd0e3a6af35,2020-12-05T19:50:00Z,heat,hold,682,673,673,WY,Green River,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31429,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2020-12-15T18:30:00Z,heat,hold,749,760,760,WY,Rock Springs,25,False,False,False,Gas
31430,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2020-12-15T17:45:00Z,heat,hold,749,760,760,WY,Rock Springs,25,False,False,False,Gas
31431,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2020-12-19T18:35:00Z,heat,auto,746,740,760,WY,Rock Springs,25,False,False,False,Gas
31432,ac76fecdaa287fefcce1e1e9d8a04222a05f127f,2020-12-19T16:55:00Z,heat,auto,756,740,760,WY,Rock Springs,25,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/WY/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WY/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WY_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WY/dec/" + file)
    WY_dec = pd.concat([WY_dec, df])
    
WY_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,1526f2d264a2dbabb980283ffd50f085dd156393,dec,2017,heat,auto,Green River,634.166667,820.000000,620.000000,0.0,False,False,False
1,16913980a67e1faa9fb0fe491f8d36b4477f965a,dec,2017,heat,hold,Cheyenne,653.480769,660.000000,660.000000,10.0,False,False,False
2,1af93c740d9d394407a734476ba8bcaf41c2f2dd,dec,2017,heat,auto,Casper,634.593886,652.200873,641.401747,30.0,True,False,False
3,1af93c740d9d394407a734476ba8bcaf41c2f2dd,dec,2017,heat,hold,Casper,656.883117,670.688312,666.155844,30.0,True,False,False
4,1d00854c03d26e989b137715f96f49c087b12e6b,dec,2017,heat,auto,Casper,658.289474,666.842105,660.289474,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
57,f3bd7952d2fbb216345ea8b272cbdee983b34ae6,dec,2020,heat,hold,Casper,697.212121,700.772727,700.454545,40.0,False,False,False
58,f5be20356f7cafa6712707b496d91af1ddbdabe0,dec,2020,auto,hold,Gillette,673.100154,711.795069,681.795069,10.0,True,False,False
59,f766ec9ff77cacb6b7c07ef6ac6a6f039c751748,dec,2020,auto,auto,Cheyenne,690.705882,745.176471,672.294118,5.0,False,False,False
60,f7cdfd4cc8a741382df3f72255f8368549f3d066,dec,2020,heat,hold,Laramie,694.515152,680.000000,680.000000,35.0,False,False,False


In [187]:
WY_dec.to_csv("Scraper_Output/State_Month_Day/WY/WY_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/WY/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WY_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/WY/" + file)
    WY_all = pd.concat([WY_all, df])
    
WY_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,1af93c740d9d394407a734476ba8bcaf41c2f2dd,aug,2017,cool,auto,Casper,682.731932,805.132743,670.884956,30.0,True,False,False
1,1af93c740d9d394407a734476ba8bcaf41c2f2dd,aug,2017,cool,hold,Casper,688.805556,740.000000,740.000000,30.0,True,False,False
2,2cc2736dfecd12ede0f334d504dacc109edfa726,aug,2017,auto,hold,Laramie,698.172185,716.208609,666.208609,10.0,False,False,False
3,2cc2736dfecd12ede0f334d504dacc109edfa726,aug,2017,cool,auto,Laramie,677.333333,680.000000,660.000000,10.0,False,False,False
4,2cc2736dfecd12ede0f334d504dacc109edfa726,aug,2017,cool,hold,Laramie,697.642202,700.293578,699.550459,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
283,f045e632eb732cd0703e069e70aef34bb8b50ad0,jun,2021,heat,hold,Evanston,678.679245,656.779874,656.779874,10.0,False,False,False
284,f3bd7952d2fbb216345ea8b272cbdee983b34ae6,jun,2021,cool,hold,Casper,724.198529,721.727941,721.507353,40.0,False,False,False
285,f5be20356f7cafa6712707b496d91af1ddbdabe0,jun,2021,auto,hold,Gillette,694.654762,705.000000,675.000000,10.0,True,False,False
286,f5be20356f7cafa6712707b496d91af1ddbdabe0,jun,2021,cool,hold,Gillette,695.628968,705.000000,693.666667,10.0,True,False,False


In [190]:
WY_all.to_csv("Scraper_Output/State_Month_Day/WY_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mWYe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['WY']
Unique jan_2018: ['WY']
Unique jan_2019: ['WY']
Unique jan_2020: ['WY']
Unique jan_2021: ['WY']
Unique feb_2017: ['WY']
Unique feb_2018: ['WY']
Unique feb_2019: ['WY']
Unique feb_2020: ['WY']
Unique feb_2021: ['WY']
Unique jun_2017: ['WY']
Unique jun_2018: ['WY']
Unique jun_2019: ['WY']
Unique jun_2020: ['WY']
Unique jun_2021: ['WY']
Unique jul_2017: ['WY']
Unique jul_2018: ['WY']
Unique jul_2019: ['WY']
Unique jul_2020: ['WY']
Unique jul_2021: ['WY']
Unique aug_2017: ['WY']
Unique aug_2018: ['WY']
Unique aug_2019: ['WY']
Unique aug_2020: ['WY']
Unique dec_2017: ['WY']
Unique dec_2018: ['WY']
Unique dec_2019: ['WY']
Unique dec_2020: ['WY']
