# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/PA-day/2017-jan-day-PA.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3d1a50d677a59dc031f974f08163a01f05d25fd0,2017-01-31 17:25:00 UTC,heat,hold,751,750,750,PA,Nazareth,10,False,False,False,Gas
1,f282a293a71e21033dc413ac3af06a9a0a4f0d7f,2017-01-09 15:05:00 UTC,heat,auto,683,824,644,PA,East Bradford,0,False,False,False,Gas
2,d31e5aac8fe93389879b1c1dfd680cb0f10e9b36,2017-01-16 16:35:00 UTC,heat,auto,680,680,680,PA,Exton,0,False,False,False,Gas
3,28c52e72e7e782b68a49b51c1461040964898734,2017-01-16 17:15:00 UTC,heat,hold,738,695,691,PA,Phoenixville,100,False,False,False,Gas
4,33966c38c6fbb54a7485cd14112eae50f03dff26,2017-01-14 18:45:00 UTC,auxHeatOnly,hold,643,650,650,PA,Upper Saucon,30,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
329975,ff748c1355830330c5e5141fa1ffcb740ba69c50,2017-01-21 15:25:00 UTC,heat,auto,694,700,700,PA,Gettysburg,10,True,False,True,Electric
329976,ff748c1355830330c5e5141fa1ffcb740ba69c50,2017-01-21 17:45:00 UTC,heat,auto,684,680,680,PA,Gettysburg,10,True,False,True,Electric
329977,ff748c1355830330c5e5141fa1ffcb740ba69c50,2017-01-09 19:15:00 UTC,heat,hold,692,690,690,PA,Gettysburg,10,True,False,True,Electric
329978,ff748c1355830330c5e5141fa1ffcb740ba69c50,2017-01-31 18:40:00 UTC,heat,hold,691,700,700,PA,Gettysburg,10,True,False,True,Electric


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
001540b162bef02b1e52f9a93e03625d9f2492f6,Jan,2017,heat,auto,Tredyffrin,648.539216,650.000000,630.000000,65.0,False,False,True
001540b162bef02b1e52f9a93e03625d9f2492f6,Jan,2017,heat,hold,Tredyffrin,663.396074,659.162818,659.075058,65.0,False,False,True
0212f695761a719f8afa5ac8c450a099acec98ba,Jan,2017,auto,auto,pottstown,697.744186,759.542636,700.457364,20.0,False,False,False
0212f695761a719f8afa5ac8c450a099acec98ba,Jan,2017,auto,hold,pottstown,696.145833,760.041667,709.666667,20.0,False,False,False
0236a2992f6bb3a6c3369d8485cb0f6063a7efd8,Jan,2017,auto,hold,Northampton,615.541667,780.000000,629.458333,40.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...
fe31dc5fa2b5868a5bef81c8b3fa7b94fc1ec31a,Jan,2017,heat,auto,Feasterville-Trevose,732.263451,731.558442,731.558442,5.0,False,False,False
ff42754912f9d1247596cf4d40b9cc92c7658c6c,Jan,2017,heat,auto,Allentown,689.825000,692.250000,692.250000,5.0,False,False,False
ff42754912f9d1247596cf4d40b9cc92c7658c6c,Jan,2017,heat,hold,Allentown,688.709770,691.034483,691.034483,5.0,False,False,False
ff748c1355830330c5e5141fa1ffcb740ba69c50,Jan,2017,heat,auto,Gettysburg,696.607527,700.991935,700.440860,10.0,True,False,True


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/PA/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/PA-day/2018-jan-day-PA.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1e530eaf0464cb496aa3b10819b507e1b803ce1e,2018-01-03 15:00:00 UTC,heat,hold,708,830,830,PA,Norwood,0,False,False,False,Gas
1,2c44865f4be6194ecd67c29ab744039b731e21d1,2018-01-19 12:25:00 UTC,heat,hold,702,708,708,PA,York,10,False,False,False,Gas
2,cb52b83dd90eb7016fbea1946cb3563546935595,2018-01-17 16:20:00 UTC,auto,hold,684,725,665,PA,Lower Heidelberg,0,False,False,False,Gas
3,333aaa2217aff428d5e6c33be503b44ff085a19f,2018-01-16 14:25:00 UTC,auto,hold,704,810,710,PA,Pittsburgh,70,False,False,False,Gas
4,2c44865f4be6194ecd67c29ab744039b731e21d1,2018-01-05 15:00:00 UTC,heat,hold,706,708,708,PA,York,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
923564,0c3de1a1b51dc3a54e7c3934e5eed0d8b85f9240,2018-01-14 17:50:00 UTC,auto,hold,658,760,660,PA,Pittsburgh,117,False,False,False,Gas
923565,a29d48dc65b4ca21cf0807ca0bb05e665f70b10d,2018-01-24 13:40:00 UTC,heat,auto,690,760,680,PA,Sciota,0,False,False,True,Electric
923566,95de816608d26037c9076dac0a1883098d8d2f7a,2018-01-09 19:50:00 UTC,auto,hold,675,760,680,PA,Easton,0,False,False,False,Gas
923567,c8c73591f6f73415a839ddfff8ee78279fa7d1dc,2018-01-28 15:40:00 UTC,auto,hold,686,760,680,PA,Aston,30,True,False,True,Electric


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/PA/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/PA-day/2019-jan-day-PA.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,153f92add22d468196158070875c3f4045fa3ba7,2019-01-17 13:20:00 UTC,auto,hold,665,755,665,PA,Essington,50,False,False,False,Gas
1,612232d08ef17a29ae451c51aca96a4eef831780,2019-01-27 14:20:00 UTC,heat,auto,689,694,640,PA,Butler township,0,False,False,False,Gas
2,1a5eabd2e2a302af08c7e29e68a2fdf430dbf9cd,2019-01-28 19:25:00 UTC,heat,hold,683,689,689,PA,Pittsburgh,95,False,False,False,Gas
3,6304b9968a64a7af87435d6cd41e9e9f0ba46efe,2019-01-28 19:05:00 UTC,heat,hold,732,699,699,PA,lancaster,0,False,False,False,Gas
4,42ca22e848bb9a0e25b30819ed555f3d9d424454,2019-01-18 18:45:00 UTC,auto,hold,703,749,699,PA,Philadelphia,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1424947,5fe429cb510da85ef191676e8e758b501bcaafce,2019-01-23 18:15:00 UTC,auto,auto,695,760,690,PA,Philadelphia,70,False,False,False,Gas
1424948,67add1400b5e1e353d0eaecd702486368a415dde,2019-01-10 18:20:00 UTC,heat,auto,681,760,680,PA,Wyncote,115,False,False,False,Gas
1424949,5dbb4948f18954712f0e686a124dbc1dc7447cbd,2019-01-10 18:20:00 UTC,auto,hold,691,760,700,PA,Wrightsville,15,True,False,True,Electric
1424950,8fd0c04b068bc33cdbee0cb72d5d12bdfc51d528,2019-01-05 13:50:00 UTC,heat,auto,647,760,660,PA,Harrisburg,30,True,False,True,Electric


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/PA/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/PA-day/2020-jan-day-PA.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,75290a3086d130b35ffd59ff5a7014dd46562907,2020-01-17 13:00:00 UTC,heat,auto,693,711,711,PA,Bethel Park,69,True,False,False,Gas
1,5fe429cb510da85ef191676e8e758b501bcaafce,2020-01-23 19:00:00 UTC,heat,auto,734,757,740,PA,Philadelphia,70,False,False,False,Gas
2,549388dcbb5e38a13eece31d877310f4ca14934f,2020-01-05 14:40:00 UTC,auto,hold,666,790,670,PA,Irwin,86,False,False,False,Gas
4,c0c6f1c5c4580d334d936254fe588a4d6f5b0ecc,2020-01-16 12:15:00 UTC,heat,hold,660,665,665,PA,Felton,95,False,False,True,Electric
5,795cd901410d57caa1c749c321880de7b2df17d0,2020-01-21 12:40:00 UTC,heat,hold,718,729,729,PA,Philadelphia,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1564891,1dbe90c0843577ac1eceee8d56b07a702652617c,2020-01-11 12:30:00 UTC,auto,auto,687,760,690,PA,Mountville,10,False,False,True,Electric
1564892,776a0eb7115418bc1be9a817c9b4d0f1a3534e60,2020-01-30 11:50:00 UTC,auto,auto,694,760,700,PA,Bensalem,15,False,False,False,Gas
1564893,d01b11a0af5673455f062aba5f0b2b14557ccba8,2020-01-07 14:40:00 UTC,heat,auto,711,760,670,PA,Milford,120,False,False,False,Gas
1564894,776a0eb7115418bc1be9a817c9b4d0f1a3534e60,2020-01-25 13:35:00 UTC,auto,auto,697,760,700,PA,Bensalem,15,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/PA/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/PA-day/2021-jan-day-PA.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,de023a0d9a3fedf6fd41baccf5cbb10338543f43,2021-01-17 13:50:00 UTC,heat,hold,618,630,620,PA,Saylorsburg,20,True,False,True,Electric
2,63d512b446d87294a12c72fdfcf68ffae99c3e4e,2021-01-31 19:35:00 UTC,heat,hold,643,677,677,PA,Lebanon,65,True,False,False,Gas
3,adb96e8514eea0f85be6985ea3765ff0fffe3906,2021-01-16 17:25:00 UTC,heat,hold,664,620,620,PA,King of Prussia,56,False,False,False,Gas
4,89dacd1a308a60af2f94362a7d3bff370f272524,2021-01-07 15:00:00 UTC,heat,hold,655,662,662,PA,Kennett Square,57,False,False,False,Gas
5,8b70bb320377763eabea3d9db8a20da3f46ee787,2021-01-24 16:00:00 UTC,auto,hold,725,830,730,PA,York,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
900571,afb477fde586d95f9a690d6c6e452084a2d868e0,2021-01-18 10:55:00 UTC,auto,hold,649,760,650,PA,Lansdale,10,False,False,False,Gas
900572,4eda131c5d0883d40ff2ab864199017cc7fc3e89,2021-01-26 15:35:00 UTC,auto,hold,687,760,690,PA,Gwynedd Valley,40,False,False,False,Gas
900573,93aa9cf538df8ed82e56a57b9c7edb7a436b787a,2021-01-24 10:30:00 UTC,auto,hold,679,760,680,PA,Wayne,40,False,False,False,Gas
900574,94d03089c875997ceda31929a7b9326be3018460,2021-01-15 16:15:00 UTC,auto,hold,699,760,700,PA,East Hempfield,20,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/PA/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/PA/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
PA_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/PA/jan/" + file)
    PA_jan = pd.concat([PA_jan, df])
    
PA_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,001540b162bef02b1e52f9a93e03625d9f2492f6,Jan,2017,heat,auto,Tredyffrin,648.539216,650.000000,630.000000,65.0,False,False,True
1,001540b162bef02b1e52f9a93e03625d9f2492f6,Jan,2017,heat,hold,Tredyffrin,663.396074,659.162818,659.075058,65.0,False,False,True
2,0212f695761a719f8afa5ac8c450a099acec98ba,Jan,2017,auto,auto,pottstown,697.744186,759.542636,700.457364,20.0,False,False,False
3,0212f695761a719f8afa5ac8c450a099acec98ba,Jan,2017,auto,hold,pottstown,696.145833,760.041667,709.666667,20.0,False,False,False
4,0236a2992f6bb3a6c3369d8485cb0f6063a7efd8,Jan,2017,auto,hold,Northampton,615.541667,780.000000,629.458333,40.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1145,ff06e494220daaa501485dcb33646ac03d931271,Jan,2021,heat,hold,Collegeville,668.439744,669.457692,669.457692,0.0,False,False,False
1146,ff748c1355830330c5e5141fa1ffcb740ba69c50,Jan,2021,heat,hold,Gettysburg,701.889597,707.639774,707.639774,10.0,True,False,True
1147,ffd06e2a208d4876d29e70cc6d77c38abe5fe1cb,Jan,2021,heat,hold,Pittsburgh,713.433333,716.488889,716.255556,50.0,True,False,False
1148,ffda5b1af97787564c19df5f554a3e3ccf6362e1,Jan,2021,heat,hold,West Chester,683.829365,687.142857,687.142857,35.0,True,False,True


In [34]:
PA_jan.to_csv("Scraper_Output/State_Month_Day/PA/PA_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/PA-day/2017-feb-day-PA.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4d3e10810c933047a3c215c1b40f621d79b760fc,2017-02-12 14:40:00 UTC,heat,hold,666,660,660,PA,Mohnton,35,False,False,False,Gas
1,6c7f6ed27f563dd31122e1ceb15838db308156ed,2017-02-01 18:25:00 UTC,heat,hold,683,680,680,PA,Altoona,100,False,False,False,Gas
2,ff42754912f9d1247596cf4d40b9cc92c7658c6c,2017-02-11 15:40:00 UTC,heat,auto,698,700,700,PA,Allentown,5,False,False,False,Gas
3,bf0dc2e94cbe677f88540d9df1f63407a1a715a1,2017-02-03 14:40:00 UTC,heat,hold,707,700,700,PA,Edinboro,45,False,False,False,Gas
4,cae4729640c1851b5b1baf243e96c45410d4db3a,2017-02-04 15:25:00 UTC,heat,auto,702,700,700,PA,East Norriton,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
311073,fc51621b979bd5ad5d8023ceab7e20d9b6c68f5c,2017-02-21 14:00:00 UTC,heat,hold,700,700,700,PA,Spring,60,False,False,False,Gas
311074,fc51621b979bd5ad5d8023ceab7e20d9b6c68f5c,2017-02-28 16:00:00 UTC,heat,hold,697,700,700,PA,Spring,60,False,False,False,Gas
311075,fc51621b979bd5ad5d8023ceab7e20d9b6c68f5c,2017-02-02 13:40:00 UTC,heat,auto,714,710,710,PA,Spring,60,False,False,False,Gas
311076,fc51621b979bd5ad5d8023ceab7e20d9b6c68f5c,2017-02-15 14:50:00 UTC,heat,auto,705,710,710,PA,Spring,60,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/PA/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/PA-day/2018-feb-day-PA.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d5e08951e788f6ef01487c9617fbd3dcff318642,2018-02-16 14:35:00 UTC,heat,auto,707,755,705,PA,State College,0,True,False,False,Gas
1,8279e0782ae5f9e95c9bc258ced8eed79a308bef,2018-02-08 16:20:00 UTC,auto,hold,702,755,705,PA,Doylestown,20,False,False,False,Gas
2,74a66fc1abad27e8665aabbc8dab79b72061cf25,2018-02-26 19:50:00 UTC,heat,hold,694,695,695,PA,Philadelphia,90,False,False,False,Gas
3,8279e0782ae5f9e95c9bc258ced8eed79a308bef,2018-02-15 19:30:00 UTC,auto,hold,711,755,705,PA,Doylestown,20,False,False,False,Gas
4,8279e0782ae5f9e95c9bc258ced8eed79a308bef,2018-02-09 14:25:00 UTC,auto,hold,699,755,705,PA,Doylestown,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
849938,d4a25ebca884259e4a0349a5b19375468e6602d4,2018-02-20 14:15:00 UTC,auto,auto,732,760,700,PA,Philadelphia,67,False,False,True,Electric
849939,d4a25ebca884259e4a0349a5b19375468e6602d4,2018-02-28 16:45:00 UTC,auto,auto,692,760,700,PA,Philadelphia,67,False,False,True,Electric
849940,89dacd1a308a60af2f94362a7d3bff370f272524,2018-02-03 09:45:00 UTC,auto,auto,699,760,700,PA,Kennett Square,57,False,False,False,Gas
849941,bfaf389fb0de90c2c95d3df2f9ff7f6f7e0e6707,2018-02-12 18:45:00 UTC,auto,hold,746,760,720,PA,Bala-Cynwyd,25,False,False,True,Electric


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/PA/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/PA-day/2019-feb-day-PA.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5a2437889c1d42e1edbd608c6192d20e67d02b6e,2019-02-22 15:05:00 UTC,heat,hold,688,684,684,PA,Philadelphia,0,False,False,False,Gas
1,cbfb710a59eb43bf1836249a3d8c505f296933ed,2019-02-25 11:40:00 UTC,auto,hold,696,761,698,PA,Lititz,30,False,False,False,Gas
2,6405a68e8b2cc6f1d84ead07b951c458417bf209,2019-02-27 13:10:00 UTC,heat,hold,724,725,725,PA,Emmaus,70,False,False,False,Gas
3,c17e699e7a4d7ee7f5c558c3d4a04492cf269eee,2019-02-27 11:30:00 UTC,heat,hold,731,725,725,PA,Philadelphia,0,False,False,False,Gas
4,1fb856c7c487d571a2f323896d714b01b2062f95,2019-02-21 10:40:00 UTC,heat,hold,668,671,670,PA,Monroeville,60,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
948954,67add1400b5e1e353d0eaecd702486368a415dde,2019-02-15 13:55:00 UTC,heat,auto,719,760,650,PA,Wyncote,115,False,False,False,Gas
948955,4551d4b17a6714627543ecabe73f07fb8d643c6e,2019-02-18 15:05:00 UTC,auto,hold,704,760,700,PA,Dauphin,0,False,False,False,Gas
948956,67add1400b5e1e353d0eaecd702486368a415dde,2019-02-13 13:05:00 UTC,heat,auto,769,760,770,PA,Wyncote,115,False,False,False,Gas
948957,d4a25ebca884259e4a0349a5b19375468e6602d4,2019-02-26 15:00:00 UTC,auto,auto,689,760,700,PA,Philadelphia,67,False,False,True,Electric


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/PA/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/PA-day/2020-feb-day-PA.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d14e186ec38c2f678b2b1da4cd5b620436c79fd8,2020-02-27 11:40:00 UTC,auto,auto,724,790,730,PA,Upper Providence,20,False,False,False,Gas
1,be513d09d9f516fde1a0d70071b2b9d399a606dd,2020-02-15 19:05:00 UTC,heat,hold,703,715,715,PA,Camp Hill,40,False,False,False,Gas
2,ae7911c69920c4ca1d07d66e74c01bacb73ac6fb,2020-02-16 19:30:00 UTC,auto,auto,736,790,740,PA,Pittsburgh,0,True,False,False,Gas
3,5dbb4948f18954712f0e686a124dbc1dc7447cbd,2020-02-24 13:25:00 UTC,auto,hold,705,840,710,PA,Wrightsville,15,True,False,True,Electric
4,810664f765033c2042762404c802a4220d0dea2b,2020-02-10 15:35:00 UTC,heat,auto,625,610,630,PA,king of prussia,70,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1401667,2f6581d4285aaa3c0a3ce5ea97e2595041ff3a17,2020-02-13 14:15:00 UTC,heat,auto,701,760,710,PA,Wilkinsburg,75,False,False,False,Gas
1401668,1dbe90c0843577ac1eceee8d56b07a702652617c,2020-02-04 12:30:00 UTC,auto,hold,691,760,690,PA,Mountville,10,False,False,True,Electric
1401669,1dbe90c0843577ac1eceee8d56b07a702652617c,2020-02-03 17:35:00 UTC,auto,hold,702,760,690,PA,Mountville,10,False,False,True,Electric
1401670,776a0eb7115418bc1be9a817c9b4d0f1a3534e60,2020-02-05 14:50:00 UTC,auto,auto,700,760,700,PA,Bensalem,15,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/PA/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/PA-day/2021-feb-day-PA.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5ad858ab87ad515720d929cf8608e5eb73e02f4b,2021-02-18 13:05:00 UTC,heat,hold,671,737,737,PA,YORK,50,True,False,False,Gas
1,6dcc4a7477ca39e7878bab608043b03d4b5b0971,2021-02-08 17:10:00 UTC,heat,hold,726,725,725,PA,Palmyra,15,False,False,False,Gas
2,8db45e66ff8b1dcccfc534879014fefbb9531306,2021-02-27 12:00:00 UTC,auto,hold,607,810,610,PA,Lansdale,20,False,False,False,Gas
3,847ebae1e04b6a4f6124afbfc29308983562473b,2021-02-19 15:55:00 UTC,heat,hold,683,686,686,PA,Nazareth,10,True,False,False,Gas
4,9a47409a904ab452b53e5164fee4807f96533005,2021-02-05 10:55:00 UTC,heat,hold,659,665,665,PA,Manchester,20,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
824740,4351b6b12ff716f33ab291f29bbf62b52c7c17f0,2021-02-20 18:25:00 UTC,auto,hold,717,760,720,PA,Port Matilda,0,False,False,False,Gas
824741,f313d1cadb4a7e6c372ec25dfbccd96b4f38cccc,2021-02-02 14:05:00 UTC,heat,hold,757,760,760,PA,Venetia,30,False,False,False,Gas
824742,93aa9cf538df8ed82e56a57b9c7edb7a436b787a,2021-02-08 17:50:00 UTC,auto,hold,678,760,680,PA,Wayne,40,False,False,False,Gas
824743,94d03089c875997ceda31929a7b9326be3018460,2021-02-13 18:50:00 UTC,auto,hold,695,760,700,PA,East Hempfield,20,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/PA/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/PA/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
PA_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/PA/feb/" + file)
    PA_feb = pd.concat([PA_feb, df])
    
PA_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,001540b162bef02b1e52f9a93e03625d9f2492f6,feb,2017,heat,hold,Tredyffrin,684.705882,652.755418,647.693498,65.0,False,False,True
1,004207d68a3ccd055b3e699142250299486128ab,feb,2017,heat,hold,Sewickley,681.911765,750.000000,690.000000,55.0,True,False,False
2,0212f695761a719f8afa5ac8c450a099acec98ba,feb,2017,auto,hold,pottstown,701.593750,760.000000,688.750000,20.0,False,False,False
3,0236a2992f6bb3a6c3369d8485cb0f6063a7efd8,feb,2017,auto,hold,Northampton,642.352941,780.000000,650.000000,40.0,True,False,True
4,025a1d00df108f951005ee0dfe2539d4732aaeaa,feb,2017,heat,hold,Exton,690.400000,681.866667,681.866667,30.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1093,ff06e494220daaa501485dcb33646ac03d931271,feb,2021,heat,hold,Collegeville,664.339744,665.309295,665.309295,0.0,False,False,False
1094,ff748c1355830330c5e5141fa1ffcb740ba69c50,feb,2021,heat,hold,Gettysburg,703.233513,707.191790,707.191790,10.0,True,False,True
1095,ffd06e2a208d4876d29e70cc6d77c38abe5fe1cb,feb,2021,heat,hold,Pittsburgh,716.854227,720.262391,720.262391,50.0,True,False,False
1096,ffda5b1af97787564c19df5f554a3e3ccf6362e1,feb,2021,heat,hold,West Chester,675.821612,681.595197,681.595197,35.0,True,False,True


In [67]:
PA_feb.to_csv("Scraper_Output/State_Month_Day/PA/PA_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/PA-day/2017-jun-day-PA.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7b0231d36a14169a35c11dfd81f41dc3bcbc27a8,2017-06-03 18:25:00 UTC,auto,auto,680,770,640,PA,Lower Makefield,39,True,False,True,Electric
1,6fe1f57dd4e98c44b73e3930d9aad68d70cad507,2017-06-24 14:20:00 UTC,cool,hold,728,720,720,PA,Havertown,75,False,False,False,Gas
2,5ecb9b9047e543fa42f9b180be7d5eb2a569654b,2017-06-27 13:40:00 UTC,cool,auto,735,740,720,PA,Broomal,75,False,False,False,Gas
3,f710f513b2da3b0544a867aa5b323d01606a3a00,2017-06-07 19:05:00 UTC,cool,hold,678,720,720,PA,Rydal,45,False,False,False,Gas
4,ce7498af53b69ba9ab84f7e798a955353fa2a2ff,2017-06-23 14:50:00 UTC,cool,auto,719,730,730,PA,Lower Nazareth,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
544377,985c5349c4deb224780513a5a4c84d92fb85d97c,2017-06-29 15:40:00 UTC,auto,hold,737,740,660,PA,Bethlehem township,5,False,False,False,Gas
544378,985c5349c4deb224780513a5a4c84d92fb85d97c,2017-06-28 14:45:00 UTC,auto,hold,738,740,660,PA,Bethlehem township,5,False,False,False,Gas
544379,7d7cd8462eef4f03b2b684011cdae14c63827b06,2017-06-15 16:40:00 UTC,auto,hold,754,800,690,PA,Bethlehem township,0,False,False,False,Gas
544380,985c5349c4deb224780513a5a4c84d92fb85d97c,2017-06-28 10:05:00 UTC,auto,hold,690,740,660,PA,Bethlehem township,5,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/PA/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/PA-day/2018-jun-day-PA.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9014215909d9c288ad2b3d595c9e4c533cbeb1db,2018-06-18 13:10:00 UTC,auto,hold,721,715,655,PA,Chester Springs,5,False,False,False,Gas
1,d8ebed78dcadcb63faed3cd5a2f7bbfd71786813,2018-06-24 19:00:00 UTC,auto,hold,752,754,704,PA,Pittsburgh,100,True,False,False,Gas
2,f9aa961ec87d12c0deff2aa08a32e274db5211cc,2018-06-28 17:55:00 UTC,auto,hold,760,755,685,PA,Philadelphia,80,True,False,True,Electric
3,d3d1bffd3437f163765f83be6880684f5fc25cfc,2018-06-23 15:05:00 UTC,auto,hold,705,703,653,PA,Royersford,115,False,False,False,Gas
4,6129d78aaab9214b3c297ba843d88b46526b0631,2018-06-30 19:10:00 UTC,cool,auto,741,720,685,PA,Monroeville,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1116952,93144b40d83879a6abcb231dc5160adddd681014,2018-06-19 12:35:00 UTC,cool,auto,760,760,760,PA,Forks,5,False,False,False,Gas
1116953,aa07128a7206c45b865d3f674247f922d538d965,2018-06-07 16:55:00 UTC,cool,hold,663,760,760,PA,Jefferson Hills,0,False,False,False,Gas
1116954,93144b40d83879a6abcb231dc5160adddd681014,2018-06-24 16:25:00 UTC,cool,auto,762,760,760,PA,Forks,5,False,False,False,Gas
1116955,c6d4ca48885197b475eeb24b6e3ddc2056c973ea,2018-06-24 12:55:00 UTC,cool,hold,709,760,760,PA,Huntingdon Valley,60,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/PA/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/PA-day/2019-jun-day-PA.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fc21b9dced05f4aeb3a14cfff7ca27f786c6ad08,2019-06-14 12:10:00 UTC,auto,hold,728,753,673,PA,Downingtown,40,False,False,True,Electric
1,1a5eabd2e2a302af08c7e29e68a2fdf430dbf9cd,2019-06-05 16:00:00 UTC,heat,hold,737,650,635,PA,Pittsburgh,95,False,False,False,Gas
2,c519914aec3ddd77696c5bdb54164646c8aac69a,2019-06-05 19:35:00 UTC,auto,auto,697,702,652,PA,Wind Gap,0,False,False,False,Gas
3,2208af46c7277b9a7ff0228afda73fc6945b3c36,2019-06-14 13:45:00 UTC,cool,hold,678,721,721,PA,Pittsburgh,40,False,False,False,Gas
4,f2fb37d984c1793dff1e600c65b1ec2e5f1bca64,2019-06-05 12:45:00 UTC,auto,hold,746,760,669,PA,Pittsburgh,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1587736,756cf54b4bd92de0f490db08da6b34c1da37ecd1,2019-06-07 14:30:00 UTC,cool,hold,760,760,760,PA,Philadelphia,0,False,False,False,Gas
1587737,7cfcbbda9f1178e03b8bcae8af97e2c36ebf6140,2019-06-07 09:55:00 UTC,cool,auto,727,760,760,PA,Stroud,46,False,False,False,Gas
1587738,cdb135f55af34e3417e170da572308087d31e58c,2019-06-13 12:15:00 UTC,cool,auto,712,760,760,PA,Southampton,20,True,False,False,Gas
1587739,5ef52d8170064e8e5e06b4ca8fde1f2980774bb6,2019-06-29 17:45:00 UTC,cool,hold,760,760,760,PA,Abington,0,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/PA/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/PA-day/2020-jun-day-PA.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4c240bb84b974f9c839340a4cae4afaea49663d4,2020-06-13 19:10:00 UTC,cool,auto,687,800,717,PA,Philadelphia,9,True,False,False,Gas
1,17d4b4a310d1a7599fc310ba6f1c6097d481dbe4,2020-06-11 13:30:00 UTC,cool,hold,728,725,725,PA,Columbia,0,False,False,False,Gas
2,eabfdd36f31d1703b04a3823486f407a20f8e4ca,2020-06-10 12:00:00 UTC,cool,hold,741,745,745,PA,Philadelphia,5,False,False,False,Gas
3,e08974f2ceed0b0e9f831bc18e4e66e0c08fc1fb,2020-06-20 19:50:00 UTC,cool,hold,720,692,692,PA,allison park,25,False,False,False,Gas
4,78d8cadcca93a3fc690e80cac18a46dc3db49e2b,2020-06-14 11:45:00 UTC,cool,hold,723,745,745,PA,Young,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1581281,9d20fa945301d6ca6eee6f8995ac1d00862a0e09,2020-06-11 18:45:00 UTC,cool,hold,758,760,760,PA,Philadelphia,40,True,False,False,Gas
1581282,137df0b84fd2ad429f3b39e5119eeba8e0ab9dd6,2020-06-13 12:30:00 UTC,cool,hold,749,760,760,PA,Collegeville,20,False,False,False,Gas
1581283,c1346b07258dc9a05ecbe5a8267e93c7cd074176,2020-06-21 13:00:00 UTC,cool,hold,765,760,760,PA,Greensburg,9,False,False,True,Electric
1581284,9c4ca6174630ba6d2ebbad1c28a18fb89f7fce0f,2020-06-19 17:35:00 UTC,cool,hold,754,760,760,PA,Philadelphia,65,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/PA/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/PA-day/2021-jun-day-PA.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f32ed57bf1d9a43b38e8a7472724ab64cba96cfb,2021-06-24 17:40:00 UTC,cool,hold,736,739,739,PA,Philadelphia,0,False,False,False,Gas
1,1273d856ef788fcbc0f18b60999ba98b8f7a58c0,2021-06-21 16:10:00 UTC,auto,hold,697,695,625,PA,Gilbertsville,5,False,False,False,Gas
2,37f2d005efe0b8aac58b8bcb49f95378d96966fc,2021-06-26 16:05:00 UTC,cool,hold,744,745,745,PA,Coatesville,50,False,False,True,Electric
3,36362e8c16e17bc149cd8cd9e69d312166355682,2021-06-28 14:55:00 UTC,cool,hold,687,689,662,PA,Philadelphia,15,False,False,False,Gas
4,670a683eb7c051e85de0b9c395bc187b198852ba,2021-06-29 07:15:00 UTC,cool,hold,758,771,771,PA,Canonsburg,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
921571,a9eab4a49678a83d683047cb88ac5494d9009515,2021-06-13 13:55:00 UTC,cool,hold,756,760,760,PA,Derry,20,False,False,True,Electric
921572,dfbf627d62eb85aac76cb8b74aa99eb19efff1cd,2021-06-25 16:35:00 UTC,cool,hold,757,790,760,PA,Harmony,28,True,False,True,Electric
921573,dfce3b4f7c68c296103502a708c542f559c40a88,2021-06-29 16:10:00 UTC,cool,hold,782,760,760,PA,Paoli,70,False,False,False,Gas
921574,f024a0853dc35bdca119cfd459b9a220b4b0c8b4,2021-06-15 11:25:00 UTC,cool,hold,756,760,760,PA,East Vincent,10,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/PA/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/PA/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
PA_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/PA/jun/" + file)
    PA_jun = pd.concat([PA_jun, df])
    
PA_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,000cb4018ac842aa5428402d14cccb4c63045d08,jun,2017,cool,auto,Middletown,728.909091,730.454545,645.909091,27.0,False,False,True
1,000cb4018ac842aa5428402d14cccb4c63045d08,jun,2017,cool,hold,Middletown,716.849421,724.135135,722.687259,27.0,False,False,True
2,001540b162bef02b1e52f9a93e03625d9f2492f6,jun,2017,cool,hold,Tredyffrin,703.326955,702.470882,702.470882,65.0,False,False,True
3,004207d68a3ccd055b3e699142250299486128ab,jun,2017,cool,hold,Sewickley,719.937063,724.475524,713.286713,55.0,True,False,False
4,011de1ca8e0ee82d48ca2ac4cb1c182657d50248,jun,2017,cool,auto,Upper Nazareth,749.328014,762.864362,732.611702,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1334,ff06e494220daaa501485dcb33646ac03d931271,jun,2021,cool,hold,Collegeville,667.592949,667.375000,667.375000,0.0,False,False,False
1335,ff42754912f9d1247596cf4d40b9cc92c7658c6c,jun,2021,auto,hold,Allentown,711.000000,705.000000,655.000000,5.0,False,False,False
1336,ff748c1355830330c5e5141fa1ffcb740ba69c50,jun,2021,cool,hold,Gettysburg,745.020884,756.147791,743.637751,10.0,True,False,True
1337,ff748c1355830330c5e5141fa1ffcb740ba69c50,jun,2021,heat,hold,Gettysburg,729.113636,734.840909,734.022727,10.0,True,False,True


In [100]:
PA_jun.to_csv("Scraper_Output/State_Month_Day/PA/PA_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/PA-day/2017-jul-day-PA.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,30fbb6729c960f000ed6bd7714824fc3a0e4c656,2017-07-07 14:50:00 UTC,cool,auto,771,770,690,PA,Whitpain,30,True,False,True,Electric
1,57e1be76674ac4d0fb725dfc82df71824f656fb4,2017-07-07 13:50:00 UTC,cool,hold,741,720,720,PA,Lower Gwynedd,30,False,False,False,Gas
2,8279e0782ae5f9e95c9bc258ced8eed79a308bef,2017-07-12 17:35:00 UTC,auto,hold,706,700,640,PA,Doylestown,20,False,False,False,Gas
3,a9ecdab84f90f68bad1b43506bf18c7e2dd2ff93,2017-07-10 19:30:00 UTC,cool,hold,743,740,740,PA,Baldwin borough,60,False,False,False,Gas
4,890933c96861ca3f11e4bf6990068e0bb498e666,2017-07-22 15:50:00 UTC,cool,hold,713,710,700,PA,Pocono Pines,30,True,True,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
661053,8a2c1c06d6f8b7847e7566d48a09023e77646270,2017-07-02 16:55:00 UTC,auto,auto,743,740,660,PA,Bethlehem township,20,False,False,False,Gas
661054,8a2c1c06d6f8b7847e7566d48a09023e77646270,2017-07-19 14:10:00 UTC,auto,hold,735,740,660,PA,Bethlehem township,20,False,False,False,Gas
661055,5a13322a5c575df5e580ca660625a44727532fd3,2017-07-11 16:40:00 UTC,cool,auto,800,710,710,PA,Bethlehem township,0,False,False,False,Gas
661056,5a13322a5c575df5e580ca660625a44727532fd3,2017-07-13 14:35:00 UTC,cool,hold,740,650,650,PA,Bethlehem township,0,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/PA/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/PA-day/2018-jul-day-PA.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8f76428ac24ce047b6a93fcb11831d074965afff,2018-07-10 12:50:00 UTC,cool,hold,684,718,718,PA,Sewickley,90,False,False,False,Gas
1,93aa9cf538df8ed82e56a57b9c7edb7a436b787a,2018-07-28 11:05:00 UTC,auto,hold,707,705,655,PA,Wayne,40,False,False,False,Gas
2,8f76428ac24ce047b6a93fcb11831d074965afff,2018-07-09 17:05:00 UTC,cool,hold,684,678,678,PA,Sewickley,90,False,False,False,Gas
3,8279e0782ae5f9e95c9bc258ced8eed79a308bef,2018-07-14 13:45:00 UTC,auto,auto,713,710,631,PA,Doylestown,20,False,False,False,Gas
4,c9c5d4e27478c70bdf50bac49742cda3f276bf7f,2018-07-03 13:20:00 UTC,auto,auto,738,725,685,PA,Bala-Cynwyd,25,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1325416,94ac1b46c0cbee95ca97e02d01086a91deb95a00,2018-07-10 17:25:00 UTC,cool,auto,762,760,760,PA,Montgomery,10,False,False,False,Gas
1325417,6552d0b122dabf1ce3d42a4211e951be1aba2476,2018-07-18 15:15:00 UTC,cool,auto,761,760,760,PA,Kennedy,0,False,False,False,Gas
1325418,caf7bac533f750f576e85067c8a43570f6602ab8,2018-07-17 11:20:00 UTC,cool,auto,764,760,760,PA,Philadelphia,0,False,False,True,Electric
1325419,8b70bb320377763eabea3d9db8a20da3f46ee787,2018-07-10 19:00:00 UTC,cool,hold,769,760,760,PA,York,5,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/PA/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/PA-day/2019-jul-day-PA.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,31a340cd5c1e924484b07b4fc4912b21f63a601d,2019-07-06 15:50:00 UTC,cool,auto,738,730,729,PA,Lancaster,9,True,False,False,Gas
1,dae6b5283103d0f51c2170513b67ad01525fd865,2019-07-16 14:35:00 UTC,auto,hold,743,742,692,PA,York township,45,False,False,False,Gas
2,bd8581f250a3f04eedb872dde35a56e812563b69,2019-07-22 14:05:00 UTC,cool,hold,732,730,719,PA,Coraopolis,90,False,False,False,Gas
3,4eda131c5d0883d40ff2ab864199017cc7fc3e89,2019-07-31 17:15:00 UTC,auto,hold,747,731,681,PA,Gwynedd Valley,40,False,False,False,Gas
4,3e89aa8e0b4f2270386ac7914fe71cf4035a025a,2019-07-22 18:50:00 UTC,cool,hold,747,760,748,PA,Pittsburgh,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1846007,c0037d1940e19583b4b69f78c803b1ace09bb931,2019-07-24 15:05:00 UTC,cool,hold,757,760,760,PA,Macungie,17,False,False,False,Gas
1846008,4ed17c000e9a7f63b9cdf738463012ded3a4c6d8,2019-07-02 18:05:00 UTC,cool,auto,762,760,760,PA,Lancaster,25,False,False,True,Electric
1846009,852b7fc6d7d78b47e96ee7c9093bf11f74e0272e,2019-07-23 16:55:00 UTC,cool,auto,759,760,760,PA,Mars,10,False,False,False,Gas
1846010,5ef52d8170064e8e5e06b4ca8fde1f2980774bb6,2019-07-16 11:20:00 UTC,cool,hold,750,760,760,PA,Abington,0,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/PA/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/PA-day/2020-jul-day-PA.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,f9775dfeab3e8e4d4732a1311f6ad4f16756d380,2020-07-26 18:25:00 UTC,cool,hold,705,665,665,PA,Bensalem,69,True,False,True,Electric
3,f2fb37d984c1793dff1e600c65b1ec2e5f1bca64,2020-07-13 13:40:00 UTC,cool,hold,780,790,789,PA,Pittsburgh,50,False,False,False,Gas
4,79c2be39f7ac3fd8b941bd205c3063ca07411365,2020-07-05 14:40:00 UTC,auto,hold,689,682,612,PA,New Castle,17,False,False,False,Gas
5,6563cc06981b18e3afddd876942d26d42f66fa4d,2020-07-23 14:50:00 UTC,cool,auto,752,760,716,PA,Hatfield,69,True,False,False,Gas
6,d9817298db1503b86c1b0b7f2395922c4248e161,2020-07-30 19:20:00 UTC,auto,hold,722,715,665,PA,Easton,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1830026,dd372fefae0ed019a4764a154a9273ae4ac7c1de,2020-07-14 11:45:00 UTC,cool,auto,762,760,760,PA,Philadelphia,0,False,False,False,Gas
1830027,ffdf0f4bb190103795d9fbea6a3afe4fd1d2236c,2020-07-02 12:50:00 UTC,cool,auto,758,760,760,PA,Media,30,False,False,False,Gas
1830028,ec3408f22f93f35cdb0178530c526fdfe734b91b,2020-07-29 10:45:00 UTC,cool,hold,744,760,760,PA,South Fayette,0,False,False,False,Gas
1830029,e3b0f7532a1a6fca7886cde164e23bbce8786bc8,2020-07-06 15:20:00 UTC,cool,auto,740,760,760,PA,Mechanicsburg,19,True,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/PA/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/PA-day/2021-jul-day-PA.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d4eaa08eab148a375bf9cffd58bdd0a5d0da7d97,2021-07-23 10:55:00 UTC,cool,hold,710,713,713,PA,Glenshaw,50,False,False,False,Gas
2,b6ee24e4351fcef86a6e17428e4962e6cb0e4a8a,2021-07-17 16:35:00 UTC,cool,hold,769,776,776,PA,Dover,27,False,False,False,Gas
4,cdd6d2b9e37ba92716594f4ee4a12a89921493af,2021-07-08 15:00:00 UTC,cool,hold,727,729,729,PA,Northampton,10,False,False,False,Gas
5,75e8c48b57c4accfa0ccefea76837ce13c0e8e53,2021-07-26 12:55:00 UTC,cool,hold,717,722,722,PA,New Cumberland,19,True,False,False,Gas
6,795cd901410d57caa1c749c321880de7b2df17d0,2021-07-07 10:30:00 UTC,cool,hold,758,760,756,PA,Philadelphia,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
935658,a536d7fc5a161e7dae17dfd72b3e8e55006ac5c3,2021-07-09 19:50:00 UTC,cool,hold,750,760,760,PA,Pittsburgh,77,False,False,False,Gas
935659,624b844ba586b512ec8f773b7d7b801c5557501b,2021-07-14 17:30:00 UTC,cool,hold,762,760,760,PA,Huntingdon Valley,30,False,False,False,Gas
935660,3c5c19363dd3eae958f5d9e07d7e31764a534b7a,2021-07-11 12:20:00 UTC,cool,hold,749,760,760,PA,Philadelphia,120,False,False,False,Gas
935661,b29131585b28b66ef57530799aa2bcaeaacc4b27,2021-07-21 11:40:00 UTC,cool,hold,760,760,760,PA,Harrisburg,100,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/PA/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/PA/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
PA_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/PA/jul/" + file)
    PA_jul = pd.concat([PA_jul, df])
    
PA_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,000cb4018ac842aa5428402d14cccb4c63045d08,jul,2017,cool,auto,Middletown,761.097222,760.000000,650.000000,27.0,False,False,True
1,001540b162bef02b1e52f9a93e03625d9f2492f6,jul,2017,cool,hold,Tredyffrin,713.064103,712.246964,712.246964,65.0,False,False,True
2,011de1ca8e0ee82d48ca2ac4cb1c182657d50248,jul,2017,cool,auto,Upper Nazareth,738.311680,743.767060,732.029528,0.0,False,False,False
3,011de1ca8e0ee82d48ca2ac4cb1c182657d50248,jul,2017,cool,hold,Upper Nazareth,738.625718,745.123995,745.029851,0.0,False,False,False
4,0212f695761a719f8afa5ac8c450a099acec98ba,jul,2017,auto,hold,pottstown,728.645161,720.322581,670.322581,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1122,ff42754912f9d1247596cf4d40b9cc92c7658c6c,jul,2021,auto,hold,Allentown,729.500000,730.000000,620.000000,5.0,False,False,False
1123,ff467cd1a4b4b98a9116b2e0488608b1031994b2,jul,2021,auto,hold,Philadelphia,720.500000,710.000000,630.000000,0.0,True,False,False
1124,ff748c1355830330c5e5141fa1ffcb740ba69c50,jul,2021,cool,hold,Gettysburg,757.713962,766.920218,748.135993,10.0,True,False,True
1125,ffda5b1af97787564c19df5f554a3e3ccf6362e1,jul,2021,auto,hold,West Chester,750.784508,755.004965,675.491559,35.0,True,False,True


In [133]:
PA_jul.to_csv("Scraper_Output/State_Month_Day/PA/PA_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/PA-day/2017-aug-day-PA.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e78f8e1995a5427e1c9afb54c21ebf4b9c26d2cd,2017-08-12 15:00:00 UTC,cool,auto,682,680,680,PA,Mahoning Township,20,False,False,False,Gas
1,e77fad866ee240dcd4a5e86a79f91f5c9dc2d88c,2017-08-27 10:25:00 UTC,cool,auto,747,650,670,PA,Downingtown,20,False,False,False,Gas
2,93144b40d83879a6abcb231dc5160adddd681014,2017-08-04 11:20:00 UTC,cool,hold,692,690,690,PA,Forks,5,False,False,False,Gas
3,4add341e0b5b20b8535c8560f6186d85bff88fc9,2017-08-27 19:45:00 UTC,cool,auto,691,720,640,PA,holland,0,False,False,False,Gas
4,25a41c758e0dad375337d15d1d2b8c38efa503c5,2017-08-20 12:25:00 UTC,auto,auto,809,810,630,PA,Cheltenham,70,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
621865,5a13322a5c575df5e580ca660625a44727532fd3,2017-08-01 17:50:00 UTC,cool,hold,755,650,650,PA,Bethlehem township,0,False,False,False,Gas
621866,43b45bb3a423c0f9a5b76e1c065af2f99554eaa6,2017-08-30 12:50:00 UTC,cool,hold,750,740,740,PA,Bethlehem township,0,False,False,False,Gas
621867,5a13322a5c575df5e580ca660625a44727532fd3,2017-08-04 13:15:00 UTC,cool,hold,764,660,660,PA,Bethlehem township,0,False,False,False,Gas
621869,985c5349c4deb224780513a5a4c84d92fb85d97c,2017-08-18 19:45:00 UTC,cool,hold,744,750,750,PA,Bethlehem township,5,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/PA/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/PA-day/2018-aug-day-PA.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,309b8a0fa407f745fe8b11057e88d6db6f0c7481,2018-08-26 11:20:00 UTC,auto,auto,758,805,755,PA,Landenberg,20,False,False,False,Gas
1,5d3402384fc275a251772d3e18c521baa0a726cf,2018-08-03 12:05:00 UTC,cool,auto,727,725,695,PA,Collegeville,5,False,False,True,Electric
2,8f76428ac24ce047b6a93fcb11831d074965afff,2018-08-28 16:10:00 UTC,cool,hold,706,708,708,PA,Sewickley,90,False,False,False,Gas
3,783aa185cf0e9abf19ac0d935c4546d8b783a9e4,2018-08-30 12:50:00 UTC,cool,hold,664,673,646,PA,Nether Providence,60,False,False,False,Gas
4,2c44865f4be6194ecd67c29ab744039b731e21d1,2018-08-26 15:00:00 UTC,cool,hold,716,708,708,PA,York,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1333573,c64b282a2f1d05483a9de8b66b90a8e354afc151,2018-08-02 13:10:00 UTC,cool,auto,716,760,760,PA,Erie,30,False,False,False,Gas
1333574,d48f2bc1d814465f967caa5a301dd337d0ecd315,2018-08-27 14:05:00 UTC,cool,auto,738,760,760,PA,Oreland,80,False,False,False,Gas
1333575,f32ed57bf1d9a43b38e8a7472724ab64cba96cfb,2018-08-10 19:05:00 UTC,cool,hold,779,760,760,PA,Philadelphia,0,False,False,False,Gas
1333576,82c3c515d558064832ff0e00f6223ccb39a8f6d4,2018-08-01 17:40:00 UTC,cool,auto,764,760,760,PA,Hanover,27,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/PA/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/PA-day/2019-aug-day-PA.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,92a629020f37c331e59faaccd4280469f063f5c2,2019-08-04 13:50:00 UTC,cool,hold,745,735,735,PA,Fogelsville,35,True,False,True,Electric
1,bb6c280dc9f1b83ad05cbfeb1ae979bcb33eb110,2019-08-16 19:50:00 UTC,cool,auto,745,740,735,PA,Mohnton,16,True,False,False,Gas
2,c3c0b277cf96aeae05db9ceb3c463cf04e36b41d,2019-08-02 11:00:00 UTC,auto,hold,738,735,675,PA,Hatfield township,40,False,False,True,Electric
3,373b8568ac5bf6b6596eab5c50138960df40d3ff,2019-08-23 14:15:00 UTC,cool,hold,735,735,735,PA,Reading,57,False,False,False,Gas
4,5e17fda39acf8d0af72a98696915695f4cc95bb8,2019-08-24 13:15:00 UTC,cool,hold,728,736,736,PA,Lansdale,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1724018,6552d0b122dabf1ce3d42a4211e951be1aba2476,2019-08-21 19:30:00 UTC,cool,auto,760,760,760,PA,Kennedy,0,False,False,False,Gas
1724019,26e7c3b8521ea22d1e99898ebe023223fd0f4808,2019-08-26 10:05:00 UTC,cool,hold,714,760,760,PA,Warminster,25,False,False,False,Gas
1724020,6552d0b122dabf1ce3d42a4211e951be1aba2476,2019-08-31 12:15:00 UTC,cool,auto,754,760,760,PA,Kennedy,0,False,False,False,Gas
1724021,6d70b5ed8dd77b294987debd1b614eba1b6bbb23,2019-08-30 17:35:00 UTC,cool,auto,774,760,760,PA,Southampton,20,True,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/PA/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/PA-day/2020-aug-day-PA.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,29da8499f630fb8a75e6ae6314b27335bea3f29e,2020-08-21 19:35:00 UTC,auto,hold,702,702,652,PA,Irwin,30,False,False,True,Electric
2,fb54c72523324eb5ee028017672f399346bed149,2020-08-19 18:55:00 UTC,auto,hold,766,770,671,PA,East Washington,45,True,False,False,Gas
3,a085ba02953e4a665f297252ff104f0bac757b73,2020-08-07 17:25:00 UTC,auto,hold,700,695,625,PA,McMurray,0,False,False,False,Gas
4,0deaa0e64064ca89c74b70f6119fa19c65a00b0b,2020-08-24 19:50:00 UTC,cool,hold,765,759,759,PA,Mc Kees Rocks,70,False,False,True,Electric
5,eabfdd36f31d1703b04a3823486f407a20f8e4ca,2020-08-11 10:45:00 UTC,cool,hold,747,745,745,PA,Philadelphia,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1717116,c1346b07258dc9a05ecbe5a8267e93c7cd074176,2020-08-15 12:50:00 UTC,cool,hold,765,760,760,PA,Greensburg,9,False,False,True,Electric
1717117,e17f58d4de3aff2c18d34be2491c784e43efc464,2020-08-17 12:00:00 UTC,cool,hold,760,760,760,PA,Philadelphia,40,False,False,False,Gas
1717118,61b0fdfc2d8846c84faf93f64c7d8f455be84634,2020-08-29 16:20:00 UTC,cool,hold,765,760,760,PA,Berwyn,0,True,False,False,Gas
1717119,c1346b07258dc9a05ecbe5a8267e93c7cd074176,2020-08-19 11:00:00 UTC,cool,hold,761,760,760,PA,Greensburg,9,False,False,True,Electric


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/PA/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/PA/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
PA_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/PA/aug/" + file)
    PA_aug = pd.concat([PA_aug, df])
    
PA_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,001540b162bef02b1e52f9a93e03625d9f2492f6,aug,2017,cool,hold,Tredyffrin,722.392006,720.718678,720.718678,65.0,False,False,True
1,011de1ca8e0ee82d48ca2ac4cb1c182657d50248,aug,2017,cool,auto,Upper Nazareth,758.875983,797.432314,748.148472,0.0,False,False,False
2,011de1ca8e0ee82d48ca2ac4cb1c182657d50248,aug,2017,cool,hold,Upper Nazareth,743.080427,747.943060,747.938790,0.0,False,False,False
3,0212f695761a719f8afa5ac8c450a099acec98ba,aug,2017,auto,hold,pottstown,731.628571,730.257143,680.257143,20.0,False,False,False
4,025a1d00df108f951005ee0dfe2539d4732aaeaa,aug,2017,auto,hold,Exton,686.098434,689.355705,630.000000,30.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2179,ff748c1355830330c5e5141fa1ffcb740ba69c50,aug,2020,cool,hold,Gettysburg,737.084467,745.122947,740.819334,10.0,True,False,True
2180,ffd06e2a208d4876d29e70cc6d77c38abe5fe1cb,aug,2020,cool,auto,Pittsburgh,735.636364,734.545455,734.545455,50.0,True,False,False
2181,ffd06e2a208d4876d29e70cc6d77c38abe5fe1cb,aug,2020,cool,hold,Pittsburgh,737.570342,730.007605,730.007605,50.0,True,False,False
2182,ffdf0f4bb190103795d9fbea6a3afe4fd1d2236c,aug,2020,cool,auto,Media,741.076379,746.902405,746.893918,30.0,False,False,False


In [160]:
PA_aug.to_csv("Scraper_Output/State_Month_Day/PA/PA_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/PA-day/2017-dec-day-PA.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2c44865f4be6194ecd67c29ab744039b731e21d1,2017-12-18 16:50:00 UTC,heat,hold,708,708,708,PA,York,10,False,False,False,Gas
1,dc158f4526df06f7cb0dfbe73a63c2a7e2f2e1e7,2017-12-06 13:15:00 UTC,heat,hold,742,742,742,PA,Lancaster,30,False,False,False,Gas
2,ab4599c12b073026f1c87d0b34227dd9ef0ee0c5,2017-12-17 17:05:00 UTC,heat,hold,662,701,660,PA,Honey Brook,10,False,False,False,Gas
3,eb185faee2e8f9da58763e87105984eef379eac0,2017-12-13 18:40:00 UTC,auto,auto,660,775,665,PA,Plum,40,False,False,False,Gas
4,9dc19d7d9a10f8ab7dea59ccd8f9c2a4d2417dc3,2017-12-24 14:40:00 UTC,auto,hold,724,805,725,PA,Danville,37,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
904548,bd3cb8acbb0a616f2944655ee9f08caabce4d960,2017-12-05 19:05:00 UTC,auto,hold,668,760,670,PA,Pittsburgh,0,False,False,False,Gas
904549,753f57314f40be12d294bcc308b2a10ef5dc2dd6,2017-12-31 15:55:00 UTC,auto,hold,676,760,670,PA,Springfield,70,False,False,False,Gas
904550,e488869099a8e46dc3d7208a2be9000b1ef33766,2017-12-20 15:35:00 UTC,heat,auto,663,760,660,PA,Millvale,105,False,False,False,Gas
904551,cdb7073c92de959de2172325b070011062325189,2017-12-22 12:45:00 UTC,heat,hold,728,760,730,PA,Narberth,50,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/PA/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/PA-day/2018-dec-day-PA.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4483918555d57d823f32787d4f1549b6fa9bdbed,2018-12-19 16:35:00 UTC,heat,hold,708,685,685,PA,Northampton,10,False,False,False,Gas
1,f9eaf5201c34eaff9d069b4d76e3c5cc7aa03b80,2018-12-17 11:45:00 UTC,heat,hold,709,734,734,PA,West Chester,20,False,False,False,Gas
2,b502ae7458414c662a49021f1f2c11c33a50036c,2018-12-13 13:15:00 UTC,heat,hold,701,692,692,PA,Pittsburgh,27,False,False,False,Gas
3,4483918555d57d823f32787d4f1549b6fa9bdbed,2018-12-22 19:40:00 UTC,heat,hold,702,705,705,PA,Northampton,10,False,False,False,Gas
4,af7c46ffb23f7881590213f4ddbd5f32647146c0,2018-12-12 19:00:00 UTC,auto,hold,715,830,720,PA,Fox Chapel,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1358636,c072b0507a01c08bcbe0d3d7898b0a5ee91ec43a,2018-12-05 15:00:00 UTC,auto,auto,649,760,650,PA,Pittsburgh,68,False,False,False,Gas
1358637,d6e5be5bf3761632c26b7c92a53badc224e0943e,2018-12-03 19:05:00 UTC,auto,hold,699,760,700,PA,Lower Saucon,15,False,False,False,Gas
1358638,1df8aec3f586916f13c4a778c18bd21d25f1af4e,2018-12-07 12:20:00 UTC,auto,auto,713,760,710,PA,Macungie,5,False,False,False,Gas
1358639,b761db70457d2dce50cf136a8cbf4e9f031319c1,2018-12-30 13:50:00 UTC,auto,auto,679,760,680,PA,Philadelphia,30,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/PA/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/PA-day/2019-dec-day-PA.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,1c702378ee8f70bee73b1aaf907764c514104ab0,2019-12-15 18:10:00 UTC,auto,hold,707,755,705,PA,Lower Macungie,5,False,False,False,Gas
2,84210a4a3fb286bdb02955443661d44fe73ea9b5,2019-12-05 18:40:00 UTC,heat,hold,699,705,705,PA,Collegeville,27,False,False,False,Gas
3,3c443dedb191cd51e67cb12fb80be0c19047f6f0,2019-12-21 19:15:00 UTC,heat,hold,749,724,724,PA,Royersford,10,False,False,False,Gas
4,af7c46ffb23f7881590213f4ddbd5f32647146c0,2019-12-21 18:55:00 UTC,auto,auto,722,810,720,PA,Fox Chapel,50,False,False,False,Gas
5,3b20b4647d4c84aaa14584807ddcfb5b46ad589d,2019-12-09 12:35:00 UTC,heat,auto,641,640,640,PA,Moon Township,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1616421,e9b1af5baa6d3fc77af7d9200800b616b423b106,2019-12-03 16:25:00 UTC,auto,hold,697,760,720,PA,Wilkes-Barre,20,False,False,True,Electric
1616422,e9b1af5baa6d3fc77af7d9200800b616b423b106,2019-12-03 14:25:00 UTC,auto,hold,710,760,720,PA,Wilkes-Barre,20,False,False,True,Electric
1616423,0d332d41d64f90ad8a9de9061bddaa66908e6a9d,2019-12-06 07:45:00 UTC,auto,hold,714,760,720,PA,Uniontown,60,False,False,False,Gas
1616424,183d3a8a81e6cb6c4979467725268cc6aaef6715,2019-12-03 11:35:00 UTC,auto,auto,654,760,650,PA,Philadelphia,80,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/PA/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/PA-day/2020-dec-day-PA.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,c317000a7b64e9bc8b42ff23b04c0c10e9e97104,2020-12-12 08:00:00 UTC,heat,auto,669,653,653,PA,Pittsburgh,120,False,False,False,Gas
3,c93c8eee536dd209712ed415e4977edfdd115425,2020-12-03 14:15:00 UTC,heat,hold,719,723,723,PA,Irwin,0,False,False,False,Gas
5,bda31a5683e11bcc196decafd919ecf330fa5c14,2020-12-06 10:25:00 UTC,heat,hold,630,640,640,PA,York,40,False,False,True,Electric
6,fe9cc385438553fd2e0b3c8076b52242646957c5,2020-12-04 13:15:00 UTC,heat,auto,662,697,660,PA,East Bradford,30,True,False,True,Electric
8,3483904010e3434a4ece22953ab95c816ab0ab93,2020-12-19 13:05:00 UTC,heat,auto,660,663,660,PA,Lancaster,69,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1405739,e9b1af5baa6d3fc77af7d9200800b616b423b106,2020-12-23 12:50:00 UTC,auto,hold,696,760,700,PA,Wilkes-Barre,20,False,False,True,Electric
1405741,a57edba8ea198b9cd98f03abe79369ad19115dc4,2020-12-12 18:55:00 UTC,auto,hold,677,760,670,PA,Philadelphia,100,True,False,True,Electric
1405742,eaec091048a17ea1174e2ee5842a6ce28087d71b,2020-12-01 17:30:00 UTC,heat,auto,754,760,760,PA,Upper Gwynedd,9,True,False,False,Gas
1405743,94d03089c875997ceda31929a7b9326be3018460,2020-12-04 15:30:00 UTC,auto,auto,695,760,700,PA,East Hempfield,20,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/PA/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/PA/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
PA_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/PA/dec/" + file)
    PA_dec = pd.concat([PA_dec, df])
    
PA_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,000cb4018ac842aa5428402d14cccb4c63045d08,dec,2017,auto,auto,Middletown,627.172414,700.896552,630.896552,27.0,False,False,True
1,000cb4018ac842aa5428402d14cccb4c63045d08,dec,2017,auto,hold,Middletown,650.064516,734.746237,652.253763,27.0,False,False,True
2,001540b162bef02b1e52f9a93e03625d9f2492f6,dec,2017,heat,hold,Tredyffrin,651.365217,655.375362,652.636232,65.0,False,False,True
3,004207d68a3ccd055b3e699142250299486128ab,dec,2017,heat,hold,Sewickley,696.789474,739.368421,702.157895,55.0,True,False,False
4,00ae320f0e8a0bffe1cc1c8b86ec914a5f8fd7ff,dec,2017,heat,auto,Laureldale,654.648649,672.049550,657.004505,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2178,ffd06e2a208d4876d29e70cc6d77c38abe5fe1cb,dec,2020,heat,hold,Pittsburgh,698.080495,701.362229,701.362229,50.0,True,False,False
2179,ffda5b1af97787564c19df5f554a3e3ccf6362e1,dec,2020,heat,auto,West Chester,670.745614,670.000000,670.000000,35.0,True,False,True
2180,ffda5b1af97787564c19df5f554a3e3ccf6362e1,dec,2020,heat,hold,West Chester,682.747863,690.000000,690.000000,35.0,True,False,True
2181,ffdf0f4bb190103795d9fbea6a3afe4fd1d2236c,dec,2020,heat,auto,Media,656.623853,651.927752,651.182339,30.0,False,False,False


In [187]:
PA_dec.to_csv("Scraper_Output/State_Month_Day/PA/PA_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/PA/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
PA_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/PA/" + file)
    PA_all = pd.concat([PA_all, df])
    
PA_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,001540b162bef02b1e52f9a93e03625d9f2492f6,aug,2017,cool,hold,Tredyffrin,722.392006,720.718678,720.718678,65.0,False,False,True
1,011de1ca8e0ee82d48ca2ac4cb1c182657d50248,aug,2017,cool,auto,Upper Nazareth,758.875983,797.432314,748.148472,0.0,False,False,False
2,011de1ca8e0ee82d48ca2ac4cb1c182657d50248,aug,2017,cool,hold,Upper Nazareth,743.080427,747.943060,747.938790,0.0,False,False,False
3,0212f695761a719f8afa5ac8c450a099acec98ba,aug,2017,auto,hold,pottstown,731.628571,730.257143,680.257143,20.0,False,False,False
4,025a1d00df108f951005ee0dfe2539d4732aaeaa,aug,2017,auto,hold,Exton,686.098434,689.355705,630.000000,30.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9352,ff06e494220daaa501485dcb33646ac03d931271,jun,2021,cool,hold,Collegeville,667.592949,667.375000,667.375000,0.0,False,False,False
9353,ff42754912f9d1247596cf4d40b9cc92c7658c6c,jun,2021,auto,hold,Allentown,711.000000,705.000000,655.000000,5.0,False,False,False
9354,ff748c1355830330c5e5141fa1ffcb740ba69c50,jun,2021,cool,hold,Gettysburg,745.020884,756.147791,743.637751,10.0,True,False,True
9355,ff748c1355830330c5e5141fa1ffcb740ba69c50,jun,2021,heat,hold,Gettysburg,729.113636,734.840909,734.022727,10.0,True,False,True


In [190]:
PA_all.to_csv("Scraper_Output/State_Month_Day/PA_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mPAe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['PA']
Unique jan_2018: ['PA']
Unique jan_2019: ['PA']
Unique jan_2020: ['PA']
Unique jan_2021: ['PA']
Unique feb_2017: ['PA']
Unique feb_2018: ['PA']
Unique feb_2019: ['PA']
Unique feb_2020: ['PA']
Unique feb_2021: ['PA']
Unique jun_2017: ['PA']
Unique jun_2018: ['PA']
Unique jun_2019: ['PA']
Unique jun_2020: ['PA']
Unique jun_2021: ['PA']
Unique jul_2017: ['PA']
Unique jul_2018: ['PA']
Unique jul_2019: ['PA']
Unique jul_2020: ['PA']
Unique jul_2021: ['PA']
Unique aug_2017: ['PA']
Unique aug_2018: ['PA']
Unique aug_2019: ['PA']
Unique aug_2020: ['PA']
Unique dec_2017: ['PA']
Unique dec_2018: ['PA']
Unique dec_2019: ['PA']
Unique dec_2020: ['PA']
