# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/MD-day/2017-jan-day-MD.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c7a15d8b3eec2322c388fc0149aa76ff42f486e1,2017-01-08 18:20:00 UTC,auto,hold,731,755,735,MD,Hanover,5,False,False,False,Gas
1,ac3e5ee381a06b159a3f5d1e15f396df855dba42,2017-01-07 19:55:00 UTC,auto,auto,706,745,695,MD,Fort Washington,15,False,False,False,Gas
2,ae43f2fa819c437eb032626ffe24f626a9c28392,2017-01-11 19:50:00 UTC,heat,hold,774,770,770,MD,Beltsville,55,True,False,False,Gas
3,67e4b525e8b070705d003a10ab69acce1ec68f04,2017-01-21 17:10:00 UTC,heat,hold,671,660,660,MD,Elkton,0,False,False,False,Gas
4,9515cb66ca25b80b6310cb0da4d99b9e715e6ee5,2017-01-17 17:30:00 UTC,heat,hold,703,700,700,MD,Kingsville,15,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
226404,ffb8b56e94edd9fcd3d2c716c646fbfa1c20f22b,2017-01-09 12:25:00 UTC,auto,hold,712,770,700,MD,Clarksville,5,False,False,False,Gas
226405,ffb8b56e94edd9fcd3d2c716c646fbfa1c20f22b,2017-01-09 15:20:00 UTC,auto,hold,696,770,700,MD,Clarksville,5,False,False,False,Gas
226406,ffb8b56e94edd9fcd3d2c716c646fbfa1c20f22b,2017-01-21 16:40:00 UTC,auto,auto,689,770,690,MD,Clarksville,5,False,False,False,Gas
226407,ffb8b56e94edd9fcd3d2c716c646fbfa1c20f22b,2017-01-13 18:40:00 UTC,auto,auto,698,770,700,MD,Clarksville,5,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0004ddd9fb43232e8457ed12b578e20c5e1b33b0,Jan,2017,heat,auto,Parkville,686.065217,689.304348,688.956522,50.0,False,False,False
0004ddd9fb43232e8457ed12b578e20c5e1b33b0,Jan,2017,heat,hold,Parkville,682.976562,681.726562,682.179688,50.0,False,False,False
01d52ed97d0be11254d11570a034879724567b3b,Jan,2017,heat,hold,Laurel,719.468750,720.000000,720.000000,5.0,False,False,False
047e8a12282fb7a3852f4e6a8b1b69cd7807cb9c,Jan,2017,heat,auto,White Plains,731.971246,732.193291,732.193291,15.0,False,False,False
047e8a12282fb7a3852f4e6a8b1b69cd7807cb9c,Jan,2017,heat,hold,White Plains,736.566169,737.644776,737.644776,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
ffb8b56e94edd9fcd3d2c716c646fbfa1c20f22b,Jan,2017,auto,hold,Clarksville,703.235772,770.000000,702.601626,5.0,False,False,False
ffb8b56e94edd9fcd3d2c716c646fbfa1c20f22b,Jan,2017,heat,auto,Clarksville,720.000000,770.000000,719.000000,5.0,False,False,False
ffceb92e7d91925adb0905cdb576c31fa579cceb,Jan,2017,heat,hold,Bethesda,681.000000,805.000000,618.000000,0.0,False,False,False
ffee15cd197d28701219f8af65ba1c5d39c77257,Jan,2017,heat,auto,Nottingham,739.235294,744.500000,743.588235,35.0,False,False,True


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/MD/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/MD-day/2018-jan-day-MD.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bd071f31439eb356d91da7ebeb5cd839a7f3d266,2018-01-29 19:00:00 UTC,auto,auto,699,755,705,MD,Huntingtown,5,False,False,True,Electric
1,b71d2abbd1899927db080983f55c2752b9494698,2018-01-07 13:30:00 UTC,auto,auto,676,725,675,MD,Monkton,27,False,False,True,Electric
2,b71d2abbd1899927db080983f55c2752b9494698,2018-01-23 16:40:00 UTC,auto,auto,697,725,675,MD,Monkton,27,False,False,True,Electric
3,f1b3504ad289f5a70982df5be4aa56bbaa7c5b68,2018-01-29 14:00:00 UTC,auto,hold,696,810,700,MD,Baltimore,120,False,False,False,Gas
4,f22b7f5c337392f62a8bb7227524435530579769,2018-01-03 15:45:00 UTC,heat,hold,724,725,725,MD,Germantown,20,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1265046,d8b74725127f42d15f9f9ca104979333f6e0488a,2018-01-16 16:30:00 UTC,heat,auto,818,760,820,MD,Waldorf,30,True,False,True,Electric
1265047,d8b74725127f42d15f9f9ca104979333f6e0488a,2018-01-11 15:10:00 UTC,heat,hold,812,760,810,MD,Waldorf,30,True,False,True,Electric
1265048,d037ac5f6cc1591a4e6f6ef0e5a7f3833a868de5,2018-01-10 13:25:00 UTC,heat,auto,677,760,680,MD,Elkridge,27,False,False,False,Gas
1265049,f188552e62caa5aa45a34f78cb5666d9374c2eb6,2018-01-07 14:35:00 UTC,heat,auto,747,760,760,MD,Baltimore,67,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/MD/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/MD-day/2019-jan-day-MD.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ee6369d2ad4a41d43d689d0e3c13f9f4928e6c98,2019-01-10 13:15:00 UTC,heat,hold,707,709,709,MD,Upper Marlboro,70,True,False,True,Electric
2,7dbfdc568400a68d1c5ffe5e4b93e3bf8bafaa06,2019-01-15 11:50:00 UTC,heat,hold,720,727,727,MD,Pasadena,0,False,False,False,Gas
4,f5af96f6d4cada481cfd7a215fa671a8c136d181,2019-01-18 17:25:00 UTC,heat,hold,722,728,728,MD,Frederick,15,False,False,False,Gas
5,7c019bd036c184ca47e6a7bf3470301c37108e6b,2019-01-20 12:40:00 UTC,heat,auto,619,620,620,MD,Annapolis,30,True,False,True,Electric
6,5c360954a3813f14906a8f031c016e011e755223,2019-01-06 12:50:00 UTC,auto,auto,723,788,725,MD,Glen Burnie,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2365300,2d80d675c0f84c0a8a8165d9f1c0e273dd2fa5e4,2019-01-06 17:35:00 UTC,auto,auto,710,760,710,MD,Spring Spring,55,False,False,False,Gas
2365301,9288b6c38216f504cf768d1a0a859a70734dbfb0,2019-01-12 16:25:00 UTC,auto,hold,735,760,740,MD,Frederick,8,False,False,False,Gas
2365302,e2bdf54b0e3c1eac9d9a4216b3899a6e5243e8d3,2019-01-20 14:20:00 UTC,heat,auto,757,760,760,MD,Baltimore,40,False,False,False,Gas
2365303,f7db271c25950038e80ef7bb45d135e78a37104d,2019-01-19 18:10:00 UTC,heat,auto,745,760,750,MD,Capitol Heights,7,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/MD/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/MD-day/2020-jan-day-MD.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dcaf837ae46bdca756fa6d6fd49a68a9067cc390,2020-01-27 17:15:00 UTC,heat,hold,660,726,726,MD,Fort Washington,57,False,False,False,Gas
1,e84be45f91d9607ee0020d9fffbec315b9d8e8c2,2020-01-19 13:40:00 UTC,auto,auto,703,725,675,MD,Odenton,28,True,False,True,Electric
2,3b4d8b8cc450150f4648106c21d2586f6b574648,2020-01-28 12:35:00 UTC,heat,hold,662,698,660,MD,Baltimore,0,True,False,False,Gas
3,7a87b4c41b19c5a9be2f863ab3da48e0ff767fc6,2020-01-02 17:50:00 UTC,heat,auto,749,808,750,MD,Baltimore,0,False,False,False,Gas
4,296f1897cffab374089199beee2930ea61e4d11c,2020-01-28 17:45:00 UTC,heat,hold,694,699,699,MD,Silver Spring,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2564595,5cd4c943eaf4f735c690a12262c2c6315b394758,2020-01-22 17:30:00 UTC,heat,hold,755,760,760,MD,Middle River,0,False,False,False,Gas
2564596,cc84be0e3ae6c2570e2812fb0d7c985c24139f97,2020-01-25 17:30:00 UTC,heat,auto,771,760,760,MD,BALTIMORE,0,True,False,False,Gas
2564597,cd301984d9bce09f61fdcbad699a027c9fcfd501,2020-01-15 11:25:00 UTC,auto,hold,706,760,680,MD,University Park,75,False,False,False,Gas
2564598,dfe02297737c1a90148750fc1061288a0fa65344,2020-01-27 14:20:00 UTC,auto,auto,710,760,710,MD,Elkridge,10,False,False,True,Electric


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/MD/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/MD-day/2021-jan-day-MD.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3474b6cac02eb80a1ba6814bdcf58f4cdce8a579,2021-01-11 18:00:00 UTC,heat,hold,668,643,643,MD,Baltimore,60,False,False,False,Gas
1,6bbbdb10c20ed0b590bb7eeae907266b39daf3c2,2021-01-08 17:00:00 UTC,auto,hold,716,786,736,MD,Bowie,30,True,False,True,Electric
2,be3c89c8930d4bdae5bb262013c017bd1e05a1ea,2021-01-09 18:00:00 UTC,heat,hold,720,735,735,MD,Prince Frederick,0,True,False,True,Electric
3,a62fa52b61eee8208d776e8e0316fe407ef3f24f,2021-01-23 14:25:00 UTC,heat,hold,688,709,709,MD,Kensington,20,True,False,True,Electric
4,a62fa52b61eee8208d776e8e0316fe407ef3f24f,2021-01-06 19:20:00 UTC,heat,hold,683,703,703,MD,Kensington,20,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1541599,fe463fff6abc21caae15c430a8240dcc96c09582,2021-01-26 16:10:00 UTC,auto,hold,691,765,695,MD,Waldorf,5,False,False,False,Gas
1541600,fe463fff6abc21caae15c430a8240dcc96c09582,2021-01-26 19:10:00 UTC,auto,hold,693,765,695,MD,Waldorf,5,False,False,False,Gas
1541601,bbd53e6af066bf4372ecaf6a3ecf1085c340b4c8,2021-01-01 13:20:00 UTC,auto,hold,713,765,715,MD,Waldorf,29,True,False,True,Electric
1541602,7825e3260d91aa5aa7f37fd0abbbbfe796c553c2,2021-01-04 15:00:00 UTC,auto,hold,712,765,715,MD,Waldorf,5,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/MD/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MD/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MD_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MD/jan/" + file)
    MD_jan = pd.concat([MD_jan, df])
    
MD_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0004ddd9fb43232e8457ed12b578e20c5e1b33b0,Jan,2017,heat,auto,Parkville,686.065217,689.304348,688.956522,50.0,False,False,False
1,0004ddd9fb43232e8457ed12b578e20c5e1b33b0,Jan,2017,heat,hold,Parkville,682.976562,681.726562,682.179688,50.0,False,False,False
2,01d52ed97d0be11254d11570a034879724567b3b,Jan,2017,heat,hold,Laurel,719.468750,720.000000,720.000000,5.0,False,False,False
3,047e8a12282fb7a3852f4e6a8b1b69cd7807cb9c,Jan,2017,heat,auto,White Plains,731.971246,732.193291,732.193291,15.0,False,False,False
4,047e8a12282fb7a3852f4e6a8b1b69cd7807cb9c,Jan,2017,heat,hold,White Plains,736.566169,737.644776,737.644776,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1647,ff85b807aa25b50795dec4b1c4e3524319d86652,Jan,2021,auto,hold,Hollywood,644.409091,783.000000,648.000000,19.0,True,False,True
1648,ffa4f9cef40f20a7379efc733d1daf8f5e07a551,Jan,2021,heat,hold,Joppa,704.722222,710.097222,709.166667,10.0,False,False,False
1649,ffbef6a03db908871b5b118c67ae6257bcbbcbd5,Jan,2021,heat,hold,Baltimore,723.825000,740.000000,740.000000,7.0,False,False,False
1650,ffda40b5a71bc787707b1b2a35a9c73df958cf93,Jan,2021,heat,hold,Kensington,702.166460,699.446377,699.446377,50.0,True,False,False


In [34]:
MD_jan.to_csv("Scraper_Output/State_Month_Day/MD/MD_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/MD-day/2017-feb-day-MD.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,5c31bce56cbf2230632ec9544f991b847b53dc33,2017-02-27 11:45:00 UTC,heat,hold,632,650,630,MD,West River,15,True,False,False,Gas
2,7a7f2d03bcbef8cea65767b01bdd45a1f7977ce2,2017-02-09 19:25:00 UTC,heat,hold,693,690,690,MD,Baltimore,5,False,False,False,Gas
4,4cc7bd5fdce3b42f94f0c13534bf630bfb732577,2017-02-26 12:35:00 UTC,heat,auto,710,720,720,MD,Laurel,40,True,False,True,Electric
5,f3d41738234a96263b6ca0d25b7c7d823eb2dd0b,2017-02-11 17:40:00 UTC,heat,auto,679,680,680,MD,Potomac,50,False,False,False,Gas
6,8f416881411da4fe8e209b90b7dc3a844ccec68f,2017-02-06 17:15:00 UTC,heat,hold,717,720,720,MD,Clarksville,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192180,ffb8b56e94edd9fcd3d2c716c646fbfa1c20f22b,2017-02-19 17:25:00 UTC,auto,auto,721,750,700,MD,Clarksville,5,False,False,False,Gas
192181,ffb8b56e94edd9fcd3d2c716c646fbfa1c20f22b,2017-02-28 16:55:00 UTC,auto,auto,707,750,700,MD,Clarksville,5,False,False,False,Gas
192182,ffb8b56e94edd9fcd3d2c716c646fbfa1c20f22b,2017-02-28 19:10:00 UTC,auto,auto,715,750,700,MD,Clarksville,5,False,False,False,Gas
192183,ffb8b56e94edd9fcd3d2c716c646fbfa1c20f22b,2017-02-25 13:20:00 UTC,auto,auto,725,750,700,MD,Clarksville,5,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/MD/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/MD-day/2018-feb-day-MD.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,60e6f5405be4c5de63907758af8d5bd3b27f7aea,2018-02-17 18:00:00 UTC,heat,auto,752,775,755,MD,Bowie,0,False,False,True,Electric
2,c24e6e0c562212ff8c5ca8c11f615e8c1721e54d,2018-02-26 16:55:00 UTC,heat,hold,714,715,715,MD,Annapolis,0,False,False,True,Electric
4,1722849fde10d8d0b29eac69913b0cfd5f6cc069,2018-02-22 16:00:00 UTC,auto,auto,679,725,675,MD,Clarksburg,8,False,False,False,Gas
5,b5e97060a134ec5d70363d12f2a54a79b8fb108f,2018-02-17 13:30:00 UTC,heat,auto,702,709,709,MD,BALTIMORE,0,False,False,False,Gas
6,55d456b87141a44ad25c798a96aabebc9dc047a1,2018-02-23 17:35:00 UTC,heat,hold,713,716,716,MD,Damascus,30,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1242609,d8d546521932a23ba1ea20fb2784cc5009336775,2018-02-12 15:30:00 UTC,heat,hold,759,760,760,MD,Chesapeake Beach,38,False,False,True,Electric
1242610,04f208b45c0cecb9412eb205335c620c8499502b,2018-02-07 16:30:00 UTC,auto,hold,671,760,670,MD,Clarksburg,5,False,False,False,Gas
1242611,169a4d7f3a79e8f9025de83ed0d00ec78ce7adfa,2018-02-25 16:40:00 UTC,auto,auto,712,760,710,MD,Odenton,60,False,False,False,Gas
1242612,8e6b7684487a0c1e8fc9601ceed10d9522d0330c,2018-02-09 15:35:00 UTC,heat,hold,755,760,760,MD,Clarksburg,35,True,False,True,Electric


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/MD/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/MD-day/2019-feb-day-MD.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,22cdba02c1762f4b8475ee47419a8bac57b1e025,2019-02-19 12:35:00 UTC,auto,hold,734,785,735,MD,Monrovia,0,False,False,False,Gas
1,e979b300779639557a9c056d9e19495b2f75470a,2019-02-16 15:20:00 UTC,heat,auto,660,681,660,MD,Ellicott City,30,True,False,True,Electric
2,ee6369d2ad4a41d43d689d0e3c13f9f4928e6c98,2019-02-27 18:35:00 UTC,heat,hold,713,709,709,MD,Upper Marlboro,70,True,False,True,Electric
3,9f3e84f8e57e9a1e776ccc95ff66f6c40697df59,2019-02-24 18:30:00 UTC,auto,auto,676,725,675,MD,Mechanicsville,20,True,False,True,Electric
4,22cdba02c1762f4b8475ee47419a8bac57b1e025,2019-02-24 18:50:00 UTC,auto,hold,733,785,735,MD,Monrovia,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1639786,b990e97a1347febf4cd45ed4a92bc3aa748a4bbc,2019-02-25 18:55:00 UTC,heat,hold,790,760,760,MD,Upper Marlboro,0,False,False,False,Gas
1639787,5f3add1008571aeea005e02f5cd478d7d77d8f04,2019-02-18 15:00:00 UTC,heat,auto,735,760,718,MD,Columbia,0,True,False,False,Gas
1639788,5d2bfdf61cc4358f0331c8e6f35e58544557ca8b,2019-02-12 12:45:00 UTC,heat,auto,757,760,760,MD,Silver Spring,49,False,False,False,Gas
1639789,fe463fff6abc21caae15c430a8240dcc96c09582,2019-02-18 17:05:00 UTC,auto,hold,704,760,700,MD,Waldorf,5,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/MD/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/MD-day/2020-feb-day-MD.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,239d3e7a6c305df2be3686fe08d54813af066938,2020-02-14 17:05:00 UTC,heat,hold,709,734,716,MD,Clarksburg,40,True,False,True,Electric
1,bf40d5f523db91e00cacdd813aac6b4500dfbfbc,2020-02-16 17:25:00 UTC,heat,auto,660,655,660,MD,Silver Spring,70,True,False,False,Gas
3,2df7b5b78e3d3cb6f1c501207676d3d5835f0373,2020-02-23 17:45:00 UTC,heat,auto,700,656,690,MD,Silver Spring,50,True,False,False,Gas
4,375df20d48bcb4a751665a6bd7f3659a9825a8b0,2020-02-06 11:55:00 UTC,auxHeatOnly,hold,722,735,735,MD,Waldorf,0,True,False,True,Electric
5,3474b6cac02eb80a1ba6814bdcf58f4cdce8a579,2020-02-17 15:15:00 UTC,heat,hold,641,645,645,MD,Baltimore,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2304426,ee6369d2ad4a41d43d689d0e3c13f9f4928e6c98,2020-02-27 13:30:00 UTC,auto,hold,710,765,715,MD,Upper Marlboro,70,True,False,True,Electric
2304427,ee6369d2ad4a41d43d689d0e3c13f9f4928e6c98,2020-02-28 13:45:00 UTC,auto,hold,708,765,715,MD,Upper Marlboro,70,True,False,True,Electric
2304428,f440fba925f695e20c70400cd4f65c7e1cde7392,2020-02-08 17:30:00 UTC,auto,hold,733,765,715,MD,Reisterstown,9,True,False,False,Gas
2304430,ee6369d2ad4a41d43d689d0e3c13f9f4928e6c98,2020-02-22 19:20:00 UTC,auto,hold,706,765,715,MD,Upper Marlboro,70,True,False,True,Electric


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/MD/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/MD-day/2021-feb-day-MD.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3e97012140db70a8fcf5c3f6e4098809c1940512,2021-02-15 19:35:00 UTC,auto,hold,712,787,717,MD,Parkville,0,False,False,False,Gas
1,af0f70bdb9551bb11a68ef4c7ea0eed54569787b,2021-02-02 19:50:00 UTC,heat,hold,705,702,702,MD,Rockville,30,False,False,False,Gas
3,c3033f089e06da9232840d0ac164ec11877f6b56,2021-02-05 13:55:00 UTC,heat,hold,679,683,683,MD,Millersville,30,True,False,False,Gas
4,e73c39e0e5144b515efa0ba6453727d4c36c96ef,2021-02-02 16:20:00 UTC,heat,hold,643,649,649,MD,Bel Air,30,False,False,False,Gas
5,631c42704191fca33668b48763d505b6f90c5452,2021-02-07 10:45:00 UTC,heat,hold,703,707,707,MD,Clarksville,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1361326,bbd53e6af066bf4372ecaf6a3ecf1085c340b4c8,2021-02-21 18:10:00 UTC,auto,hold,712,765,715,MD,Waldorf,29,True,False,True,Electric
1361327,fe463fff6abc21caae15c430a8240dcc96c09582,2021-02-05 18:20:00 UTC,auto,hold,722,765,695,MD,Waldorf,5,False,False,False,Gas
1361328,fe463fff6abc21caae15c430a8240dcc96c09582,2021-02-04 13:30:00 UTC,auto,hold,691,765,695,MD,Waldorf,5,False,False,False,Gas
1361329,bbd53e6af066bf4372ecaf6a3ecf1085c340b4c8,2021-02-20 14:15:00 UTC,auto,hold,715,765,715,MD,Waldorf,29,True,False,True,Electric


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/MD/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MD/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MD_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MD/feb/" + file)
    MD_feb = pd.concat([MD_feb, df])
    
MD_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0004ddd9fb43232e8457ed12b578e20c5e1b33b0,feb,2017,heat,auto,Parkville,669.414634,670.195122,670.195122,50.0,False,False,False
1,0004ddd9fb43232e8457ed12b578e20c5e1b33b0,feb,2017,heat,hold,Parkville,687.869565,689.459627,689.378882,50.0,False,False,False
2,047e8a12282fb7a3852f4e6a8b1b69cd7807cb9c,feb,2017,auto,auto,White Plains,719.989474,740.136842,720.136842,15.0,False,False,False
3,047e8a12282fb7a3852f4e6a8b1b69cd7807cb9c,feb,2017,auto,hold,White Plains,729.263581,745.154930,717.847082,15.0,False,False,False
4,047e8a12282fb7a3852f4e6a8b1b69cd7807cb9c,feb,2017,cool,auto,White Plains,732.068627,750.000000,770.000000,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1566,ff70f4424d819ab49fdeb63dc28490f4adb7ab51,feb,2021,heat,hold,College Park,708.812500,720.145833,719.708333,0.0,True,False,False
1567,ff85b807aa25b50795dec4b1c4e3524319d86652,feb,2021,auto,hold,Hollywood,653.555556,760.000000,660.000000,19.0,True,False,True
1568,ffa4f9cef40f20a7379efc733d1daf8f5e07a551,feb,2021,heat,hold,Joppa,706.492308,714.061538,713.507692,10.0,False,False,False
1569,ffda40b5a71bc787707b1b2a35a9c73df958cf93,feb,2021,heat,hold,Kensington,695.003663,693.818681,693.640110,50.0,True,False,False


In [67]:
MD_feb.to_csv("Scraper_Output/State_Month_Day/MD/MD_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/MD-day/2017-jun-day-MD.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0004ddd9fb43232e8457ed12b578e20c5e1b33b0,2017-06-10 14:40:00 UTC,cool,auto,737,720,677,MD,Parkville,50,False,False,False,Gas
1,27e6c733af6abbef612d53f6e46362f50002aeae,2017-06-21 10:55:00 UTC,auto,hold,722,720,670,MD,Clarksburg,5,False,False,False,Gas
2,27e6c733af6abbef612d53f6e46362f50002aeae,2017-06-11 10:35:00 UTC,auto,hold,739,740,690,MD,Clarksburg,5,False,False,False,Gas
3,1760b1cd256662d00ac6bff09de285e58c4888fc,2017-06-22 12:40:00 UTC,auto,auto,778,786,736,MD,Elkridge,5,False,False,False,Gas
4,691f4eaa06db47447a8e0503a60f1e3aaa2bdf9a,2017-06-07 17:25:00 UTC,cool,auto,743,780,710,MD,Baltimore,105,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
371834,fee19de70085c1b9f4a17bbafb328aec3c59035e,2017-06-30 11:55:00 UTC,cool,auto,722,720,720,MD,Olney,5,False,False,False,Gas
371835,fee19de70085c1b9f4a17bbafb328aec3c59035e,2017-06-17 17:05:00 UTC,cool,auto,711,710,720,MD,Olney,5,False,False,False,Gas
371836,fee19de70085c1b9f4a17bbafb328aec3c59035e,2017-06-08 14:40:00 UTC,cool,auto,693,700,730,MD,Olney,5,False,False,False,Gas
371837,fee19de70085c1b9f4a17bbafb328aec3c59035e,2017-06-05 11:45:00 UTC,cool,hold,711,710,710,MD,Olney,5,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/MD/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/MD-day/2018-jun-day-MD.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1407104ec776a3e0c9920a7d10457287a8127863,2018-06-15 16:15:00 UTC,auto,auto,691,685,635,MD,Point of Rocks,10,False,False,False,Gas
1,7abdce09c57b7d93e3176ce3d151358818901628,2018-06-21 16:55:00 UTC,auto,hold,736,733,663,MD,Chevy Chase Section,0,False,False,False,Gas
2,6ece373f6bd9e1fc98e07d1af302ce641d46aed0,2018-06-19 11:00:00 UTC,cool,hold,718,726,726,MD,Fulton,5,False,False,True,Electric
3,99a58261451ef06e212eae2a14d19ca8d271c98c,2018-06-19 16:00:00 UTC,cool,hold,743,715,715,MD,Bowie,10,False,False,False,Gas
4,986140bc78ae9ba924b5b4d8d7150e02ec9d4f1b,2018-06-29 18:30:00 UTC,auto,hold,739,735,645,MD,Halethorpe,30,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1778170,8da83daee15d0d5b80891f84c794853e7a4da8c7,2018-06-11 17:05:00 UTC,cool,hold,716,760,760,MD,Laurel,20,False,False,False,Gas
1778171,6bc016183d235cf489f14c15a784c1709182d370,2018-06-25 14:50:00 UTC,cool,auto,766,760,760,MD,Elkridge,20,False,False,False,Gas
1778172,0f855a8366ccb3c48a016ce2cea417637dac292a,2018-06-25 12:40:00 UTC,cool,hold,762,760,760,MD,Towson,25,False,False,False,Gas
1778173,88d4af384e2cc3fde8b6a3383b68fdd66edc6077,2018-06-10 18:05:00 UTC,cool,hold,764,760,760,MD,Sykesville,37,False,False,True,Electric


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/MD/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/MD-day/2019-jun-day-MD.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6b16bdfa2c66e5b11fc24cc1436851694220ad3f,2019-06-23 15:20:00 UTC,cool,auto,780,810,810,MD,Baltimore,0,False,False,False,Gas
1,2261994d8cb2d65df0d0e700d120094c40b8b170,2019-06-24 19:50:00 UTC,auto,hold,718,702,652,MD,Glen Burnie,60,False,False,False,Gas
2,76d05188b8e53054d40a66b9a8c5d847e2ef7ea4,2019-06-05 17:35:00 UTC,cool,hold,704,705,705,MD,Brooklyn,50,False,False,False,Gas
3,f08b33dc13258ba4a5192e819bc917facf59c372,2019-06-29 14:50:00 UTC,cool,hold,781,781,741,MD,Owings Mills,30,True,False,True,Electric
4,109ee62e8260f06a53c2bda02000b1f6e2c45872,2019-06-23 08:50:00 UTC,cool,hold,733,735,735,MD,Lutherville Timonium,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2711577,506551463f84cbe68a15958fbc0ba7c7291b4bfd,2019-06-07 13:05:00 UTC,cool,hold,775,760,760,MD,Baltimore,7,False,False,False,Gas
2711578,784fc0aa063d7989a139a68583d42adf9b7a0903,2019-06-20 17:55:00 UTC,cool,hold,762,760,760,MD,Baltimore,0,False,False,False,Gas
2711579,323f615a717add6981b126798c7fe1e2d479f254,2019-06-10 10:50:00 UTC,cool,auto,740,760,760,MD,Towson,0,False,False,False,Gas
2711580,5bcd6f87cafac76dd7e895d75202d01e7a09e4cd,2019-06-29 11:35:00 UTC,cool,hold,760,760,760,MD,Glen Burnie,5,True,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/MD/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/MD-day/2020-jun-day-MD.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7abdce09c57b7d93e3176ce3d151358818901628,2020-06-16 19:10:00 UTC,auto,hold,736,730,658,MD,Chevy Chase Section,0,False,False,False,Gas
1,744ab43b6923cdbce0ac9b55599a05f922ae20cd,2020-06-28 15:20:00 UTC,cool,auto,778,800,703,MD,Baltimore,0,True,False,True,Electric
2,0b895a55129ee104ae831a7b353b604e66b87b5f,2020-06-19 13:30:00 UTC,cool,hold,743,745,745,MD,Gwynne Oak,48,True,False,True,Electric
3,b23917a9ccd7c5c495969397657f340d8e802033,2020-06-07 18:35:00 UTC,cool,hold,753,743,743,MD,Columbia,50,False,False,False,Gas
4,87d51152901507fce151539d45f695bd0e2db303,2020-06-20 12:00:00 UTC,cool,hold,724,745,745,MD,Germantown,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2665446,621c7ee605fcb379c16b44d0685fd9074568ec3a,2020-06-17 17:05:00 UTC,cool,hold,754,760,760,MD,Port Deposit,10,False,False,False,Gas
2665447,13947032cb6927d9d4055be798218fa86e4f463d,2020-06-25 19:05:00 UTC,cool,hold,743,760,760,MD,Baltimore,99,True,False,True,Electric
2665448,88d4af384e2cc3fde8b6a3383b68fdd66edc6077,2020-06-05 12:40:00 UTC,cool,hold,761,760,760,MD,Sykesville,37,False,False,True,Electric
2665449,3b4d8b8cc450150f4648106c21d2586f6b574648,2020-06-01 14:30:00 UTC,cool,hold,689,760,760,MD,Baltimore,0,True,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/MD/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/MD-day/2021-jun-day-MD.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7a3d4016078b85cd612ec67e4f7089bea0238f30,2021-06-08 10:55:00 UTC,cool,hold,748,745,745,MD,Annapolis,10,False,False,False,Gas
1,a43aae75ebe8a32e813c12507411d4766f25fe16,2021-06-17 16:05:00 UTC,cool,hold,751,738,738,MD,Baltimore,0,False,False,False,Gas
2,f440fba925f695e20c70400cd4f65c7e1cde7392,2021-06-30 19:20:00 UTC,cool,hold,779,724,724,MD,Reisterstown,9,True,False,False,Gas
3,9cbf6a9d6cdbeda924b5c5e9a19796638bb43661,2021-06-25 18:10:00 UTC,cool,hold,756,830,830,MD,Abingdon,10,False,False,False,Gas
4,c3033f089e06da9232840d0ac164ec11877f6b56,2021-06-18 18:10:00 UTC,cool,hold,740,740,713,MD,Millersville,30,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1630454,ca13b95911fbb0f407da4523b09ab210c63223de,2021-06-25 10:05:00 UTC,cool,hold,766,760,760,MD,baltimore,60,False,False,False,Gas
1630455,a879f6644b233bbc8c932205e2a3de019fe9afd6,2021-06-29 11:55:00 UTC,cool,hold,759,760,760,MD,Bowie,30,True,False,True,Electric
1630456,de21b93f99fcafac047be51b88bc9c0fc26ca5c1,2021-06-27 11:55:00 UTC,cool,hold,757,760,760,MD,Bel Air,40,False,False,False,Gas
1630457,2af163a8325aea90ef8cf9ac9fc2ef81b67b396c,2021-06-11 14:10:00 UTC,cool,hold,706,760,760,MD,Towson,60,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/MD/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MD/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MD_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MD/jun/" + file)
    MD_jun = pd.concat([MD_jun, df])
    
MD_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0004ddd9fb43232e8457ed12b578e20c5e1b33b0,jun,2017,cool,auto,Parkville,722.796296,730.342593,676.814815,50.0,False,False,False
1,0004ddd9fb43232e8457ed12b578e20c5e1b33b0,jun,2017,cool,hold,Parkville,712.072581,717.604839,717.201613,50.0,False,False,False
2,00169fef3ce1e3a7aaaa39f9f28d8db467527197,jun,2017,cool,auto,Columbia,748.600000,771.500000,678.000000,40.0,True,False,True
3,01d52ed97d0be11254d11570a034879724567b3b,jun,2017,auto,hold,Laurel,805.000000,820.000000,620.000000,5.0,False,False,False
4,023c0fa05b58c503df8b3d752d52e60ae207fab7,jun,2017,cool,auto,Germantown,730.320000,729.880000,714.400000,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1906,ff85b807aa25b50795dec4b1c4e3524319d86652,jun,2021,auto,hold,Hollywood,763.314815,770.000000,640.000000,19.0,True,False,True
1907,ffa4f9cef40f20a7379efc733d1daf8f5e07a551,jun,2021,cool,hold,Joppa,748.283889,754.575536,747.000000,10.0,False,False,False
1908,ffc3c4181ceca3a2853802359f9737063bb307f9,jun,2021,cool,hold,Dundalk,740.162511,740.000000,740.000000,0.0,False,False,False
1909,ffda40b5a71bc787707b1b2a35a9c73df958cf93,jun,2021,cool,hold,Kensington,742.930009,746.050744,745.990376,50.0,True,False,False


In [100]:
MD_jun.to_csv("Scraper_Output/State_Month_Day/MD/MD_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/MD-day/2017-jul-day-MD.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,22576ed9350de3cdb5d34b652ceb59a6ff245a67,2017-07-04 18:10:00 UTC,auto,auto,748,720,670,MD,Silver Spring,65,False,False,False,Gas
1,37e84b57dab0dabaa59b7f65bcdaebd29b66af47,2017-07-24 14:10:00 UTC,cool,hold,815,840,790,MD,Laurel,10,True,False,True,Electric
2,75fb5fcade86b4d7cea8c38ae85fbd7260fa656d,2017-07-03 16:55:00 UTC,cool,auto,754,750,680,MD,Centreville,5,False,False,True,Electric
3,41b671126ba91a14e84955e841cc0849dce34a36,2017-07-09 11:25:00 UTC,auto,auto,680,680,630,MD,Crofton,0,False,False,False,Gas
4,d796654e8be2ba59810d04860cd95f93e013a244,2017-07-09 16:15:00 UTC,auto,auto,753,750,680,MD,Gaithersburg,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
467242,fee19de70085c1b9f4a17bbafb328aec3c59035e,2017-07-26 13:05:00 UTC,cool,auto,717,730,740,MD,Olney,5,False,False,False,Gas
467243,fee19de70085c1b9f4a17bbafb328aec3c59035e,2017-07-30 19:25:00 UTC,cool,auto,726,720,730,MD,Olney,5,False,False,False,Gas
467244,fee19de70085c1b9f4a17bbafb328aec3c59035e,2017-07-09 12:40:00 UTC,cool,auto,720,720,710,MD,Olney,5,False,False,False,Gas
467245,fee19de70085c1b9f4a17bbafb328aec3c59035e,2017-07-21 11:30:00 UTC,cool,auto,724,720,730,MD,Olney,5,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/MD/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/MD-day/2018-jul-day-MD.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9a181db4899ee877d925188069eb64bd33aeaf57,2018-07-04 17:40:00 UTC,cool,hold,719,717,717,MD,Abingdon,30,False,False,True,Electric
2,6e7a6f2a4f0b988b65d34840201f63a0d32b25d6,2018-07-06 19:45:00 UTC,cool,auto,748,741,741,MD,Hyattsville,0,False,False,False,Gas
3,562ce4ec0b9229aef1cbddd75d281b0e5f53bb27,2018-07-29 18:50:00 UTC,auto,hold,787,780,610,MD,Elkridge,10,False,False,False,Gas
4,74f070132daaaf0f438b0c9a481c5832dc7229a7,2018-07-16 11:05:00 UTC,cool,hold,692,701,701,MD,Phoenix,0,True,False,True,Electric
5,7f6e2c226e7c5f3ca024f346ca73b6cb0d77d3a6,2018-07-23 19:10:00 UTC,auto,hold,752,749,699,MD,Chevy Chase Section,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2110089,d3a194f0b7b67c1c2fe41f57dce2034612ee6ff1,2018-07-31 14:20:00 UTC,cool,auto,735,760,760,MD,Parkville,25,True,False,True,Electric
2110090,3206ef5bef01d027c1a68486de70c6fa40ac9d0d,2018-07-09 12:55:00 UTC,cool,auto,734,760,760,MD,Churchville,0,True,False,True,Electric
2110091,ccc7306d0cff36d2a3c94ae8f6793dd23f2d97b9,2018-07-28 18:35:00 UTC,cool,hold,764,760,760,MD,Laurel,0,True,False,True,Electric
2110092,826bcdb1a49911f211e01ab3f0414537556deeff,2018-07-15 18:30:00 UTC,cool,hold,707,760,760,MD,Huntingtown,7,False,False,True,Electric


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/MD/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/MD-day/2019-jul-day-MD.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,43e9fce3c503c32828db296fa6a516b61630069e,2019-07-04 15:40:00 UTC,auto,hold,740,695,635,MD,Brunswick,7,False,False,False,Gas
1,76049b0ec85c4c154521e7a2477ed40607aa82c1,2019-07-11 11:15:00 UTC,cool,hold,680,679,679,MD,Burtonsville,30,True,False,True,Electric
2,9699eb71c2724a2157e3644038e6793f8852b60b,2019-07-22 19:05:00 UTC,auto,hold,722,712,622,MD,Frederick,0,False,False,False,Gas
3,36caeda4e49889b1f1344741501033c520de54d0,2019-07-14 18:35:00 UTC,cool,hold,721,711,711,MD,Windsor Mill,60,True,False,True,Electric
5,f08b33dc13258ba4a5192e819bc917facf59c372,2019-07-10 12:20:00 UTC,cool,hold,771,771,741,MD,Owings Mills,30,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2985169,1f2f15b49c11699cfbef8a4c6815ea4df1844b23,2019-07-22 19:25:00 UTC,cool,hold,760,760,760,MD,Annapolis,20,False,False,False,Gas
2985170,744ab43b6923cdbce0ac9b55599a05f922ae20cd,2019-07-18 15:35:00 UTC,cool,auto,752,760,760,MD,Baltimore,0,True,False,True,Electric
2985171,71cfaeccf42ff28c334bb4c87124ae0f147bc498,2019-07-11 10:30:00 UTC,cool,hold,749,760,760,MD,Monrovia,117,True,False,True,Electric
2985172,9b155daba36a5ee93a146f63a5b9434b15d0a9bd,2019-07-28 15:20:00 UTC,cool,auto,749,760,760,MD,Mount Airy,20,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/MD/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/MD-day/2020-jul-day-MD.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2c67a6937a19f2a1527d27eb9141bbd7f067939f,2020-07-27 16:50:00 UTC,cool,auto,734,750,698,MD,Waldorf,37,True,False,True,Electric
1,4d48de681f406c1635c190376e5a1e1304d9ef34,2020-07-18 14:35:00 UTC,auto,hold,716,715,645,MD,Havre de Grace,0,True,False,False,Gas
2,4d47c5b772b75a468332d204573675b5d41e2193,2020-07-27 17:05:00 UTC,cool,hold,747,747,747,MD,Severn,0,True,False,False,Gas
3,e46ac1a819d2e10492315c86bcf313182ec03d73,2020-07-19 18:55:00 UTC,cool,auto,753,750,704,MD,Baltimore,69,False,False,False,Gas
4,c3033f089e06da9232840d0ac164ec11877f6b56,2020-07-29 11:50:00 UTC,cool,hold,732,730,713,MD,Millersville,30,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2928856,9b0d56321b3ad87fa73e0372790429d09b8ebfe1,2020-07-17 12:50:00 UTC,cool,auto,757,760,760,MD,Edgewater,10,True,False,True,Electric
2928857,6a522902026bcfff36a59d08ba56d236900c630f,2020-07-01 16:25:00 UTC,cool,hold,761,760,760,MD,Jarrettsville,35,True,False,True,Electric
2928858,cdb615fe8869055eef95779f0ff15110b47ba93b,2020-07-31 14:30:00 UTC,cool,auto,741,760,760,MD,Baltimore,120,False,False,False,Gas
2928859,2bfad921726766c150a27a6632aca16097b9901c,2020-07-04 16:00:00 UTC,cool,hold,760,760,760,MD,Severn,0,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/MD/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/MD-day/2021-jul-day-MD.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,737fc5dfe3e37c3aebb19d1b9a7e32073e905cde,2021-07-04 17:25:00 UTC,cool,hold,725,731,731,MD,Saint Michaels,15,True,False,True,Electric
1,4ca7c49a192b344325453bf2f7d67b645548abf0,2021-07-29 11:35:00 UTC,cool,hold,749,770,768,MD,Sykesville,69,True,False,True,Electric
2,1ac88a5e2f284378ceb4529bce0fd90b18f40913,2021-07-11 17:45:00 UTC,auto,hold,714,710,620,MD,Towson,49,False,False,False,Gas
3,1ac88a5e2f284378ceb4529bce0fd90b18f40913,2021-07-16 10:40:00 UTC,auto,hold,711,710,620,MD,Towson,49,False,False,False,Gas
4,a3fd93892cab55b83fc08b7cd5b459bc555a7a46,2021-07-07 14:35:00 UTC,cool,hold,708,701,701,MD,Windsor Mill,9,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1578525,d9d9b5f061299a7e4e8a28a6d51715e530272b71,2021-07-16 19:20:00 UTC,cool,hold,767,760,760,MD,Catonsville,0,True,False,True,Electric
1578526,43b3834f62b159348e260e698023f839c3eb6140,2021-07-26 09:25:00 UTC,cool,hold,760,760,760,MD,Reisterstown,20,False,False,False,Gas
1578527,d3a194f0b7b67c1c2fe41f57dce2034612ee6ff1,2021-07-04 18:50:00 UTC,cool,hold,754,760,760,MD,Parkville,25,True,False,True,Electric
1578528,87075c44ab0c604487ca9e5dfc985dc4d3bd5c8d,2021-07-26 17:40:00 UTC,cool,hold,753,760,760,MD,Bowie,50,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/MD/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MD/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MD_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MD/jul/" + file)
    MD_jul = pd.concat([MD_jul, df])
    
MD_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0004ddd9fb43232e8457ed12b578e20c5e1b33b0,jul,2017,cool,auto,Parkville,774.550661,766.017621,625.000000,50.0,False,False,False
1,0004ddd9fb43232e8457ed12b578e20c5e1b33b0,jul,2017,cool,hold,Parkville,742.333333,740.000000,740.000000,50.0,False,False,False
2,00169fef3ce1e3a7aaaa39f9f28d8db467527197,jul,2017,cool,auto,Columbia,742.593750,741.387500,720.062500,40.0,True,False,True
3,00169fef3ce1e3a7aaaa39f9f28d8db467527197,jul,2017,cool,hold,Columbia,748.000000,740.000000,740.000000,40.0,True,False,True
4,004d65377c96d3ad22f52b1f4cec9f5cd105ed0a,jul,2017,cool,auto,Silver Spring,750.520629,758.222004,687.151277,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1647,ffa4f9cef40f20a7379efc733d1daf8f5e07a551,jul,2021,cool,hold,Joppa,756.621156,748.408364,741.644526,10.0,False,False,False
1648,ffc3c4181ceca3a2853802359f9737063bb307f9,jul,2021,cool,hold,Dundalk,746.624395,745.500000,745.500000,0.0,False,False,False
1649,ffceb92e7d91925adb0905cdb576c31fa579cceb,jul,2021,cool,hold,Bethesda,761.541667,769.958333,759.833333,0.0,False,False,False
1650,ffda40b5a71bc787707b1b2a35a9c73df958cf93,jul,2021,cool,hold,Kensington,750.083647,748.846926,748.846926,50.0,True,False,False


In [133]:
MD_jul.to_csv("Scraper_Output/State_Month_Day/MD/MD_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/MD-day/2017-aug-day-MD.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,30940a4830010ff13af61076188305ad0e7240d6,2017-08-19 12:50:00 UTC,cool,auto,756,780,620,MD,Edgewater,70,False,False,True,Electric
1,5815e608f2d10d0fee8bfe03c4f406bb194637e3,2017-08-08 11:25:00 UTC,auto,auto,762,824,617,MD,Annapolis,50,True,False,True,Electric
2,1898c8a5970a8921430505982e7e64f116db1b77,2017-08-15 18:15:00 UTC,auto,auto,726,740,720,MD,Laurel,27,False,False,False,Gas
3,5815e608f2d10d0fee8bfe03c4f406bb194637e3,2017-08-06 10:35:00 UTC,auto,auto,767,824,617,MD,Annapolis,50,True,False,True,Electric
4,e1bf068db280eb2e0b84a538e31b952d31e872bc,2017-08-02 12:15:00 UTC,auto,auto,723,720,630,MD,Towson,76,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
483111,fee19de70085c1b9f4a17bbafb328aec3c59035e,2017-08-04 17:30:00 UTC,cool,auto,724,720,730,MD,Olney,5,False,False,False,Gas
483112,fee19de70085c1b9f4a17bbafb328aec3c59035e,2017-08-21 16:10:00 UTC,cool,hold,731,730,730,MD,Olney,5,False,False,False,Gas
483113,fee19de70085c1b9f4a17bbafb328aec3c59035e,2017-08-05 19:25:00 UTC,cool,auto,723,720,730,MD,Olney,5,False,False,False,Gas
483114,fee19de70085c1b9f4a17bbafb328aec3c59035e,2017-08-21 14:20:00 UTC,cool,hold,733,730,730,MD,Olney,5,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/MD/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/MD-day/2018-aug-day-MD.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a1d74b4829981b26f90afd441b20d3c1b0afc3ab,2018-08-10 14:30:00 UTC,cool,hold,790,785,785,MD,Sykesville,20,False,False,True,Electric
1,cdd6be7dd62e2addfd0b7212b5412a9e25193ced,2018-08-20 16:35:00 UTC,cool,hold,777,775,775,MD,Rockville,50,False,False,True,Electric
2,d276ba97673b38c15d24337dbf332f0a7b4e141e,2018-08-21 13:00:00 UTC,cool,hold,770,775,775,MD,Walkersville,45,False,False,True,Electric
3,4df9ae31870b89c5500c0b838aaa05acd00bfada,2018-08-12 16:45:00 UTC,auto,hold,725,722,672,MD,Germantown,35,True,False,True,Electric
5,74f070132daaaf0f438b0c9a481c5832dc7229a7,2018-08-03 18:00:00 UTC,cool,hold,709,701,701,MD,Phoenix,0,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2163297,23b009510ec69ed9b47663eb0cd2b880d6bc7a56,2018-08-09 14:25:00 UTC,cool,auto,764,760,760,MD,Severna Park,10,False,False,False,Gas
2163298,4826cab25abd7e31db0e08850c0c4c5606a10881,2018-08-05 13:15:00 UTC,cool,hold,764,760,760,MD,Abingdon,0,False,False,False,Gas
2163299,72f8e509feeb09ba817d595a5597fffddce6c927,2018-08-30 15:15:00 UTC,cool,auto,754,760,760,MD,Waldorf,0,True,False,False,Gas
2163300,b5034ddaef92f7234e73acf542cc3f1eb1e3d652,2018-08-17 19:30:00 UTC,cool,auto,744,740,760,MD,Baltimore,15,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/MD/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/MD-day/2019-aug-day-MD.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f22b7f5c337392f62a8bb7227524435530579769,2019-08-13 15:35:00 UTC,cool,hold,737,735,735,MD,Germantown,20,True,False,False,Gas
1,375df20d48bcb4a751665a6bd7f3659a9825a8b0,2019-08-24 13:20:00 UTC,cool,hold,745,755,755,MD,Waldorf,0,True,False,True,Electric
3,28ab14f90ed6164841ca061425f170f251c6252c,2019-08-25 07:45:00 UTC,auto,hold,745,752,672,MD,Gaithersburg,25,True,False,True,Electric
4,ab13f5733f30494ec43a56abe8531a12d81e9e6e,2019-08-19 19:55:00 UTC,cool,auto,809,830,830,MD,Baltimore,7,True,False,True,Electric
5,d9353facda8fe07919fc3a74c3bcb30ca9f7f5df,2019-08-01 08:45:00 UTC,auto,hold,757,755,645,MD,Lutherville Timonium,0,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2930240,fa9d877902b6069cd45e138b3860c897848b2f17,2019-08-11 17:05:00 UTC,cool,auto,766,760,760,MD,Odenton,20,True,False,False,Gas
2930241,5daf6f023fa69a07d61ce5b014c5024dc2208cc7,2019-08-25 13:40:00 UTC,cool,hold,745,760,760,MD,Suitland,30,True,False,False,Gas
2930242,16ff91d380cf4a0f470de697aa3518f9a405481a,2019-08-07 11:10:00 UTC,cool,hold,764,760,760,MD,Bel Air,15,False,False,False,Gas
2930243,bcf3d76a72f1d7a1d5e3542ae73ad59632cce595,2019-08-26 13:15:00 UTC,cool,auto,749,780,760,MD,Davidsonville,0,True,False,True,Electric


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/MD/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/MD-day/2020-aug-day-MD.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dd04c21a3be33b5d7b3781502f8e284fc146cec7,2020-08-12 12:30:00 UTC,auto,auto,779,780,697,MD,Burtonsville,0,False,False,False,Gas
1,b07fcb78a6d10ac77c1598e26fd8576cd3f6766d,2020-08-30 14:45:00 UTC,auto,auto,691,692,642,MD,Columbia,40,True,False,True,Electric
2,e4102f3878a1fbb96cc77f86c94fcf9c4f8da9c1,2020-08-09 17:55:00 UTC,auto,hold,761,735,735,MD,SILVER SPRING,0,False,False,False,Gas
3,ae1d45a4d4107ce45d0d4c25afc6aa1594372261,2020-08-17 14:50:00 UTC,cool,hold,757,753,753,MD,Upper Marlboro,18,False,False,False,Gas
4,9c9effaa602666d1b3bd52d84dd7edd27bf0d4a5,2020-08-16 12:00:00 UTC,cool,auto,743,790,786,MD,Crownsville,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2865927,bca839d9da4fe0df47754d506a96dfec14fbf381,2020-08-17 15:05:00 UTC,cool,hold,750,760,760,MD,Clarksville,20,False,False,False,Gas
2865928,1f2d3257b33ef555e218c68ca51d0dd477d9591e,2020-08-23 12:30:00 UTC,cool,auto,762,760,760,MD,Bowie,10,False,False,False,Gas
2865929,5b820ddb3bcd775a530fb07118979035a91fcf6d,2020-08-30 15:25:00 UTC,cool,auto,754,760,760,MD,Baltimore,0,True,False,True,Electric
2865930,7fe4a4d53108de380b3d15bac9451396064fd54c,2020-08-01 13:00:00 UTC,cool,hold,761,760,760,MD,Bel Air,0,True,False,True,Electric


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/MD/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MD/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MD_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MD/aug/" + file)
    MD_aug = pd.concat([MD_aug, df])
    
MD_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0004ddd9fb43232e8457ed12b578e20c5e1b33b0,aug,2017,cool,auto,Parkville,728.833333,730.000000,690.000000,50.0,False,False,False
1,00169fef3ce1e3a7aaaa39f9f28d8db467527197,aug,2017,auto,auto,Columbia,737.026087,744.034783,677.600000,40.0,True,False,True
2,004d65377c96d3ad22f52b1f4cec9f5cd105ed0a,aug,2017,cool,auto,Silver Spring,767.514151,791.693396,650.004717,10.0,False,False,False
3,0115d4565dcf7c1441e7f8d0e5d998c2f5bc07e9,aug,2017,cool,auto,California,742.893333,745.266667,742.586667,57.0,False,False,False
4,0115d4565dcf7c1441e7f8d0e5d998c2f5bc07e9,aug,2017,cool,hold,California,717.312500,719.100000,719.090625,57.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3129,ff865a14deda41a5964cd0e6ab1298d87cdec273,aug,2020,cool,hold,Chevy Chase,776.243902,768.365854,767.439024,60.0,True,False,True
3130,ffa4f9cef40f20a7379efc733d1daf8f5e07a551,aug,2020,cool,auto,Joppa,768.124194,763.916129,745.580645,10.0,False,False,False
3131,ffa4f9cef40f20a7379efc733d1daf8f5e07a551,aug,2020,cool,hold,Joppa,760.884191,758.784926,744.955423,10.0,False,False,False
3132,ffc3c4181ceca3a2853802359f9737063bb307f9,aug,2020,cool,hold,Dundalk,721.225816,720.000000,720.000000,0.0,False,False,False


In [160]:
MD_aug.to_csv("Scraper_Output/State_Month_Day/MD/MD_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/MD-day/2017-dec-day-MD.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,3e61d201a1e2b284db66417194c7d049e6f59c80,2017-12-13 16:35:00 UTC,heat,auto,695,678,671,MD,Brunswick,7,False,False,False,Gas
2,3bbe7f33fd271b1d387e17431997f6bd618c3beb,2017-12-30 18:00:00 UTC,auto,auto,719,775,725,MD,Centreville,10,True,False,True,Electric
3,12ad57fbad384a692dedf3f552f7277f46902b7b,2017-12-14 17:20:00 UTC,heat,hold,719,716,716,MD,Frederick,0,False,False,False,Gas
5,45c5268867f98622b3f475973eb548c75807cf9a,2017-12-17 12:10:00 UTC,heat,hold,759,762,762,MD,Rockville,56,False,False,False,Gas
6,e3ea31b4875509a2ca55be19c75c4b2762a3bdd8,2017-12-31 16:10:00 UTC,auto,auto,688,765,695,MD,Prince Frederick,15,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1052071,d6479d69b9dc3e03b09e64f2a8a1b4c88c440578,2017-12-12 16:50:00 UTC,heat,hold,746,760,740,MD,Edgewood,60,False,False,False,Gas
1052072,5e8d128faf9ab9f0cc77cb29dbc8047694261dbf,2017-12-13 14:30:00 UTC,heat,auto,779,760,760,MD,Riverdale Park,5,False,False,False,Gas
1052073,d6479d69b9dc3e03b09e64f2a8a1b4c88c440578,2017-12-27 10:50:00 UTC,heat,auto,731,760,730,MD,Edgewood,60,False,False,False,Gas
1052074,4d47c5b772b75a468332d204573675b5d41e2193,2017-12-27 14:40:00 UTC,heat,auto,759,760,760,MD,Severn,0,True,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/MD/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/MD-day/2018-dec-day-MD.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4d041c8121f3582a911c6f3a6ad58589e0726556,2018-12-09 15:20:00 UTC,heat,hold,671,675,675,MD,Pikesville,50,False,False,False,Gas
1,93f2fc9e20707d3ffac308754b780386fb32c464,2018-12-06 13:30:00 UTC,heat,hold,702,705,705,MD,Westminster,0,False,False,False,Gas
2,c3033f089e06da9232840d0ac164ec11877f6b56,2018-12-18 15:40:00 UTC,heat,hold,684,683,683,MD,Millersville,30,True,False,False,Gas
3,3663cb3454b8ba4ceec667c1cb4fe716143c1feb,2018-12-08 17:30:00 UTC,heat,auto,670,725,675,MD,Rosedale,20,False,False,False,Gas
4,9a181db4899ee877d925188069eb64bd33aeaf57,2018-12-15 13:50:00 UTC,heat,hold,656,705,655,MD,Abingdon,30,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2284699,04f95f4f44e8588450a5fd39c9aa2c1c570df435,2018-12-23 14:55:00 UTC,auto,auto,710,760,710,MD,Lanham,5,False,False,False,Gas
2284700,6d5b891c96249393fdf525cc25fa34a3964c7dab,2018-12-17 12:55:00 UTC,auto,hold,676,760,680,MD,Baltimore,90,False,False,False,Gas
2284701,904aa7ad4bb23e52621d63992b8de1a8a21dfd1a,2018-12-07 13:40:00 UTC,heat,auto,736,760,740,MD,Gaithersburg,30,False,False,False,Gas
2284702,fae78dc4a04d9e76139f601b0365d8984dc9586c,2018-12-27 17:35:00 UTC,auto,hold,679,760,680,MD,Hanover,10,True,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/MD/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/MD-day/2019-dec-day-MD.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,109ee62e8260f06a53c2bda02000b1f6e2c45872,2019-12-21 15:40:00 UTC,heat,hold,737,735,735,MD,Lutherville Timonium,0,False,False,False,Gas
1,270ef1906145491045a55c90337fe2ef22b48c00,2019-12-26 13:25:00 UTC,heat,auto,625,630,630,MD,Boyds,0,False,False,False,Gas
2,6a3dfc82c10c9f601d9b70a030e32e5101c292a6,2019-12-05 15:40:00 UTC,heat,hold,708,705,705,MD,Baltimore,0,True,False,False,Gas
3,10179f469adcc2826f5a1b233a2e3c2ca0e2cf62,2019-12-13 14:45:00 UTC,auto,hold,723,785,715,MD,Brandywine,0,False,False,True,Electric
5,a5a7d64c50c2e2a92aff532688ea92ee6db30590,2019-12-21 17:55:00 UTC,auto,hold,699,830,700,MD,Frederick,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2599701,ee6369d2ad4a41d43d689d0e3c13f9f4928e6c98,2019-12-24 17:55:00 UTC,auto,hold,753,765,715,MD,Upper Marlboro,70,True,False,True,Electric
2599702,ee6369d2ad4a41d43d689d0e3c13f9f4928e6c98,2019-12-30 17:30:00 UTC,auto,hold,726,765,715,MD,Upper Marlboro,70,True,False,True,Electric
2599703,ee6369d2ad4a41d43d689d0e3c13f9f4928e6c98,2019-12-16 16:30:00 UTC,auto,hold,712,765,715,MD,Upper Marlboro,70,True,False,True,Electric
2599704,ee6369d2ad4a41d43d689d0e3c13f9f4928e6c98,2019-12-23 16:15:00 UTC,auto,hold,713,765,715,MD,Upper Marlboro,70,True,False,True,Electric


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/MD/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/MD-day/2020-dec-day-MD.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,946d99f4c6014ed0b6e9ba82a776770b422d0ea1,2020-12-18 14:05:00 UTC,auto,hold,705,779,709,MD,Bethesda,0,False,False,False,Gas
1,f1cdec351558c09754a945bbcece876272b6d273,2020-12-24 17:30:00 UTC,heat,hold,639,648,640,MD,Baltimore,0,False,False,False,Gas
2,7a160bc1942e4d216ae3a5ab7afbe416dc3f1028,2020-12-08 13:05:00 UTC,heat,hold,759,764,764,MD,Ellicott City,20,False,False,False,Gas
3,946d99f4c6014ed0b6e9ba82a776770b422d0ea1,2020-12-14 12:35:00 UTC,auto,hold,705,779,709,MD,Bethesda,0,False,False,False,Gas
4,bbd53e6af066bf4372ecaf6a3ecf1085c340b4c8,2020-12-29 14:35:00 UTC,auto,hold,711,765,715,MD,Waldorf,29,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2256799,51734cd9e270b841a1aab039739781f9e000f7c2,2020-12-24 19:40:00 UTC,auto,hold,711,760,710,MD,Rockville,60,False,False,False,Gas
2256800,0d86d9dee1d461b13e9e99feb36b70f6dbb635e6,2020-12-08 13:30:00 UTC,heat,hold,691,760,760,MD,Jessup,15,False,False,False,Gas
2256801,fa9d877902b6069cd45e138b3860c897848b2f17,2020-12-07 13:20:00 UTC,heat,auto,754,760,760,MD,Odenton,20,True,False,False,Gas
2256802,3397b4e26c815611af66a7cca6717c57e8ef4da9,2020-12-20 18:55:00 UTC,heat,hold,757,760,760,MD,Baltimore,10,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/MD/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MD/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MD_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MD/dec/" + file)
    MD_dec = pd.concat([MD_dec, df])
    
MD_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0004ddd9fb43232e8457ed12b578e20c5e1b33b0,dec,2017,heat,auto,Parkville,705.250000,710.000000,710.000000,50.0,False,False,False
1,0004ddd9fb43232e8457ed12b578e20c5e1b33b0,dec,2017,heat,hold,Parkville,690.139410,691.656836,691.595174,50.0,False,False,False
2,00169fef3ce1e3a7aaaa39f9f28d8db467527197,dec,2017,auto,hold,Columbia,697.673469,759.836735,700.653061,40.0,True,False,True
3,003a3b5c65e4767907f528a81c3bc75c08aefb02,dec,2017,heat,auto,Belcamp,680.498902,681.803954,681.803954,10.0,False,False,False
4,003a3b5c65e4767907f528a81c3bc75c08aefb02,dec,2017,heat,hold,Belcamp,691.576427,692.289134,692.289134,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3069,ffbef6a03db908871b5b118c67ae6257bcbbcbd5,dec,2020,heat,auto,Baltimore,744.729167,730.562500,760.270833,7.0,False,False,False
3070,ffbef6a03db908871b5b118c67ae6257bcbbcbd5,dec,2020,heat,hold,Baltimore,734.068226,744.231969,744.231969,7.0,False,False,False
3071,ffda40b5a71bc787707b1b2a35a9c73df958cf93,dec,2020,heat,hold,Kensington,702.669425,696.819197,696.819197,50.0,True,False,False
3072,ffee15cd197d28701219f8af65ba1c5d39c77257,dec,2020,heat,auto,Nottingham,719.770000,722.660000,720.760000,35.0,False,False,True


In [187]:
MD_dec.to_csv("Scraper_Output/State_Month_Day/MD/MD_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/MD/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MD_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/MD/" + file)
    MD_all = pd.concat([MD_all, df])
    
MD_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0004ddd9fb43232e8457ed12b578e20c5e1b33b0,aug,2017,cool,auto,Parkville,728.833333,730.000000,690.000000,50.0,False,False,False
1,00169fef3ce1e3a7aaaa39f9f28d8db467527197,aug,2017,auto,auto,Columbia,737.026087,744.034783,677.600000,40.0,True,False,True
2,004d65377c96d3ad22f52b1f4cec9f5cd105ed0a,aug,2017,cool,auto,Silver Spring,767.514151,791.693396,650.004717,10.0,False,False,False
3,0115d4565dcf7c1441e7f8d0e5d998c2f5bc07e9,aug,2017,cool,auto,California,742.893333,745.266667,742.586667,57.0,False,False,False
4,0115d4565dcf7c1441e7f8d0e5d998c2f5bc07e9,aug,2017,cool,hold,California,717.312500,719.100000,719.090625,57.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12146,ff85b807aa25b50795dec4b1c4e3524319d86652,jun,2021,auto,hold,Hollywood,763.314815,770.000000,640.000000,19.0,True,False,True
12147,ffa4f9cef40f20a7379efc733d1daf8f5e07a551,jun,2021,cool,hold,Joppa,748.283889,754.575536,747.000000,10.0,False,False,False
12148,ffc3c4181ceca3a2853802359f9737063bb307f9,jun,2021,cool,hold,Dundalk,740.162511,740.000000,740.000000,0.0,False,False,False
12149,ffda40b5a71bc787707b1b2a35a9c73df958cf93,jun,2021,cool,hold,Kensington,742.930009,746.050744,745.990376,50.0,True,False,False


In [190]:
MD_all.to_csv("Scraper_Output/State_Month_Day/MD_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mMDe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['MD']
Unique jan_2018: ['MD']
Unique jan_2019: ['MD']
Unique jan_2020: ['MD']
Unique jan_2021: ['MD']
Unique feb_2017: ['MD']
Unique feb_2018: ['MD']
Unique feb_2019: ['MD']
Unique feb_2020: ['MD']
Unique feb_2021: ['MD']
Unique jun_2017: ['MD']
Unique jun_2018: ['MD']
Unique jun_2019: ['MD']
Unique jun_2020: ['MD']
Unique jun_2021: ['MD']
Unique jul_2017: ['MD']
Unique jul_2018: ['MD']
Unique jul_2019: ['MD']
Unique jul_2020: ['MD']
Unique jul_2021: ['MD']
Unique aug_2017: ['MD']
Unique aug_2018: ['MD']
Unique aug_2019: ['MD']
Unique aug_2020: ['MD']
Unique dec_2017: ['MD']
Unique dec_2018: ['MD']
Unique dec_2019: ['MD']
Unique dec_2020: ['MD']
