# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/VA-day/2017-jan-day-VA.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2338eba44376ad2ce887d5b3b3622971fe27e0cc,2017-01-02 12:20:00 UTC,heat,hold,669,670,670,VA,Springfield,25,False,False,False,Gas
1,b5cd346ae3c9d570969bb52f326a058cd205e771,2017-01-07 17:45:00 UTC,heat,hold,656,650,650,VA,Mc Lean,0,False,False,False,Gas
2,942524f800895b04aae9e16d43e583615181a285,2017-01-28 18:20:00 UTC,heat,hold,665,670,670,VA,Richmond,95,False,False,False,Gas
3,7ca5c2ee4f7ca7385250245b9d04d00429fa4099,2017-01-09 12:45:00 UTC,heat,hold,617,650,620,VA,Fairfax,45,False,False,False,Gas
4,c568ed146fe482bd4171ece03c4d93eca2ff6adf,2017-01-11 12:50:00 UTC,auto,auto,736,820,740,VA,Yorktown,40,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
416570,f90cd05382325ea89152a8dd52ea6eb4f37c23c9,2017-01-29 19:15:00 UTC,heat,hold,668,680,680,VA,Louisa,10,True,False,True,Electric
416571,f90cd05382325ea89152a8dd52ea6eb4f37c23c9,2017-01-12 17:35:00 UTC,heat,auto,706,700,700,VA,Louisa,10,True,False,True,Electric
416572,f90cd05382325ea89152a8dd52ea6eb4f37c23c9,2017-01-11 15:15:00 UTC,heat,auto,677,680,680,VA,Louisa,10,True,False,True,Electric
416573,f90cd05382325ea89152a8dd52ea6eb4f37c23c9,2017-01-07 14:15:00 UTC,heat,auto,677,690,690,VA,Louisa,10,True,False,True,Electric


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
00a77c90abdcc1ac45fa8c839401cc6a0539b164,Jan,2017,heat,auto,Alexandria,667.766423,670.000000,670.000000,10.0,False,False,True
00a77c90abdcc1ac45fa8c839401cc6a0539b164,Jan,2017,heat,hold,Alexandria,668.466667,670.000000,670.000000,10.0,False,False,True
01565e9263a0f04e74fcddc27ba8f0e88631879a,Jan,2017,heat,auto,Arlington,700.125000,700.000000,700.000000,0.0,False,False,True
01565e9263a0f04e74fcddc27ba8f0e88631879a,Jan,2017,heat,hold,Arlington,687.393782,686.544041,686.544041,0.0,False,False,True
02436b66b29e9db0a27b3b51998acd715aeeae61,Jan,2017,heat,hold,Dumfries,682.485479,677.651016,677.651016,30.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...
fe88be868dc807c92729125c3aea0c89fa4ee2f7,Jan,2017,heat,hold,Midlothian,664.138889,665.111111,665.111111,40.0,True,False,False
fefc13703d10404cb0b19907eed5e8adbbd3787d,Jan,2017,heat,auto,Centreville,730.523077,747.415385,746.892308,30.0,True,False,False
fefc13703d10404cb0b19907eed5e8adbbd3787d,Jan,2017,heat,hold,Centreville,726.839286,730.000000,730.000000,30.0,True,False,False
ff8d2f6b238eda3bd3dbd2dd5268c48d5d933d11,Jan,2017,auto,hold,Charlottesville,705.558140,768.813953,707.593023,6.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/VA/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/VA-day/2018-jan-day-VA.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,94594179aa4df7d678763966f9486d383692d992,2018-01-06 16:30:00 UTC,heat,hold,608,640,610,VA,Midlothian,26,False,False,False,Gas
2,878e0f9a22116eb6aaf934bd29f002e2f32795a7,2018-01-12 12:55:00 UTC,heat,hold,693,689,689,VA,Manassas,10,False,False,False,Gas
3,2db09f0565dce26c744a9b0e7cfefccef86767fd,2018-01-18 14:15:00 UTC,heat,hold,678,685,685,VA,Vienna,0,False,False,False,Gas
4,abd7107a62f2e9f8eb11c860502dd5678d0d79a4,2018-01-16 15:45:00 UTC,heat,hold,674,679,679,VA,Arlington,30,False,False,False,Gas
5,f37bf0b3bdca9b3ba762641ce828e7015b3969ac,2018-01-22 12:30:00 UTC,heat,hold,690,675,675,VA,Clifton,30,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1241779,be6ae1d40b80e4f5285cb59e1169bcb9cc0b28bb,2018-01-11 17:50:00 UTC,auto,auto,720,765,715,VA,Fredericksburg,15,False,False,False,Gas
1241780,b08dfd647623f1f50997e14775def3732e966ceb,2018-01-13 19:45:00 UTC,auto,auto,722,765,715,VA,Remington,30,False,False,True,Electric
1241781,292ca56e0d6896bf5b76534ed855e344901bcc82,2018-01-10 19:40:00 UTC,auto,auto,715,765,715,VA,Alexandria,0,False,False,False,Gas
1241782,f6b7b9258d06417f516197b659af6a3f2aa19e2c,2018-01-28 19:50:00 UTC,auto,hold,660,765,625,VA,Alexandria,25,False,False,True,Electric


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/VA/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/VA-day/2019-jan-day-VA.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,deed2021619223c3aac240c4e16563ed07fe9c8c,2019-01-21 17:00:00 UTC,heat,hold,693,687,687,VA,Burke,50,False,False,True,Electric
1,6d351e86eabd51277749c8966081a149c1fd5623,2019-01-28 13:45:00 UTC,heat,hold,643,640,640,VA,Fairfax,15,True,False,True,Electric
2,f3f67912c283659caf8b2ff77fe6f9c0311a39ec,2019-01-08 18:25:00 UTC,heat,auto,702,719,690,VA,Scottsville,30,True,False,True,Electric
3,25a1449b413456f29b986777a062a4cccd5057e0,2019-01-06 18:45:00 UTC,heat,auto,631,630,630,VA,Forest,20,False,False,True,Electric
4,40d6e753b0333cb6bcc4fddba35a7eb763584357,2019-01-10 19:40:00 UTC,heat,auto,680,671,671,VA,McLean,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1857520,5a17b637a8a87db90db735de28224d207cd3f2d7,2019-01-30 19:50:00 UTC,auto,hold,699,760,705,VA,Virginia Beach,0,False,False,False,Gas
1857521,0548d730717b31c0b094ec3642b4272fb24d8ed6,2019-01-26 17:15:00 UTC,auto,hold,706,760,710,VA,Herndon,30,True,False,True,Electric
1857522,d5923959fe027e1d9d651a052e66d3a944948d60,2019-01-02 14:15:00 UTC,cool,auto,760,760,640,VA,Hampton,5,True,False,False,Gas
1857523,8ea083b8f7f0ecf8da28e0eb3e7d8cd6b2e4b89e,2019-01-16 14:45:00 UTC,auto,hold,696,760,690,VA,Broadlands,20,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/VA/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/VA-day/2020-jan-day-VA.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2eb4403b37ded60a5f0b475aee19c4d011065300,2020-01-16 18:50:00 UTC,heat,hold,690,689,689,VA,Midlothian,40,True,False,True,Electric
1,9db77cd49560cc7879630092ba76c41072c29c11,2020-01-18 13:05:00 UTC,auto,hold,723,759,729,VA,Virginia Beach,29,True,False,False,Gas
2,3e505bc5b1aabe38722711261242073f2634eaa7,2020-01-22 19:10:00 UTC,heat,hold,748,749,749,VA,Richmond,10,False,False,False,Gas
3,7abdb4748c435412468be0c1d6e6b3f0c452ff34,2020-01-18 17:40:00 UTC,heat,hold,740,742,742,VA,Alexandria,30,True,False,False,Gas
5,9fc5c187e35a02a6d83d873318366536820c29d8,2020-01-26 13:35:00 UTC,auto,auto,703,755,705,VA,Alexandria,30,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1945936,61df5370646c3f32022cd5442fcf7743445a9a44,2020-01-01 11:55:00 UTC,auto,hold,708,760,710,VA,Sterling,30,True,False,True,Electric
1945937,ca4bb6195e6c49ef0b194a034b2f66fa87bf043c,2020-01-24 19:20:00 UTC,heat,hold,746,760,760,VA,Reston,49,False,False,False,Gas
1945938,0bb5a17a193f56321319b72cea049cf71051c86c,2020-01-01 12:50:00 UTC,heat,hold,758,760,760,VA,Centreville,30,True,False,True,Electric
1945939,8b98e6c155d259783fa3f624317b8ca7e2be20d2,2020-01-18 18:05:00 UTC,auto,auto,677,760,680,VA,Chantilly,5,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/VA/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/VA-day/2021-jan-day-VA.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,352588780e2ed90204f4102f7c95cc195ac06d9d,2021-01-25 18:40:00 UTC,auto,hold,692,755,695,VA,Springfield,47,True,False,True,Electric
1,be789d83c037c74b85ab886b2801a146ea0d7011,2021-01-06 15:20:00 UTC,auto,hold,712,766,716,VA,Richmond,35,False,False,True,Electric
2,a10ea823ef027bf33af64544fac0297590dd6d3f,2021-01-01 18:25:00 UTC,heat,hold,635,640,640,VA,Norfolk,50,False,False,True,Electric
3,b246d88d29be97c9b465b7f8d18b21a8088aad4b,2021-01-13 17:50:00 UTC,heat,hold,688,689,689,VA,Chesapeake,0,True,False,True,Electric
4,352588780e2ed90204f4102f7c95cc195ac06d9d,2021-01-26 18:30:00 UTC,auto,hold,691,755,695,VA,Springfield,47,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1199193,ace0d4dea763ad999ea74349da8bd2bf56814f44,2021-01-22 19:15:00 UTC,cool,hold,754,760,760,VA,Arlington,15,False,False,False,Gas
1199194,1b6101a801f16ce9ddde7e783b1b15d46fa9396d,2021-01-17 07:45:00 UTC,auto,hold,704,760,710,VA,Ashburn,10,False,False,False,Gas
1199195,e65dc4d739a7b59628fb70061297ccacac5ea322,2021-01-23 11:40:00 UTC,auto,hold,707,760,710,VA,Stafford,0,False,False,False,Gas
1199196,407256864d03be91c294ef1fce2659b43c7961de,2021-01-23 17:45:00 UTC,auto,hold,666,760,670,VA,Chester,35,True,False,True,Electric


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/VA/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/VA/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
VA_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/VA/jan/" + file)
    VA_jan = pd.concat([VA_jan, df])
    
VA_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00a77c90abdcc1ac45fa8c839401cc6a0539b164,Jan,2017,heat,auto,Alexandria,667.766423,670.000000,670.000000,10.0,False,False,True
1,00a77c90abdcc1ac45fa8c839401cc6a0539b164,Jan,2017,heat,hold,Alexandria,668.466667,670.000000,670.000000,10.0,False,False,True
2,01565e9263a0f04e74fcddc27ba8f0e88631879a,Jan,2017,heat,auto,Arlington,700.125000,700.000000,700.000000,0.0,False,False,True
3,01565e9263a0f04e74fcddc27ba8f0e88631879a,Jan,2017,heat,hold,Arlington,687.393782,686.544041,686.544041,0.0,False,False,True
4,02436b66b29e9db0a27b3b51998acd715aeeae61,Jan,2017,heat,hold,Dumfries,682.485479,677.651016,677.651016,30.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1436,fefc13703d10404cb0b19907eed5e8adbbd3787d,Jan,2021,heat,hold,Centreville,716.029586,718.674556,718.674556,30.0,True,False,False
1437,ff020a06635306a6879f02330063b6bd5c6af5d8,Jan,2021,heat,hold,Chesterfield,670.164686,675.795416,670.905772,10.0,False,False,False
1438,ff089a5b75a6c44545cf3e1017feedb5ac35a8f9,Jan,2021,heat,hold,Great Bridge East,674.932489,674.696203,674.506329,5.0,True,False,True
1439,ffb5ebb46f7c8a83ff8e2b541c0e6acf6a5fdb7b,Jan,2021,auto,hold,reston,722.800000,780.000000,706.000000,0.0,True,False,True


In [34]:
VA_jan.to_csv("Scraper_Output/State_Month_Day/VA/VA_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/VA-day/2017-feb-day-VA.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3109112d2dfa1a63e8ef49228d5bde9989cca566,2017-02-03 16:25:00 UTC,auto,hold,684,741,691,VA,Virginia Beach,50,True,False,True,Electric
1,358db71f54ddd62d4f7d336e71bb208b1fbe7dbf,2017-02-08 12:30:00 UTC,heat,hold,678,650,650,VA,New Kent,0,True,False,True,Electric
3,b1155c05f7557a2bd421315d63c760c08527a637,2017-02-03 13:05:00 UTC,heat,hold,723,730,730,VA,Spotsylvania,10,False,False,False,Gas
4,2810e2f57a3f062aa3e45ec31a1214930d4343bd,2017-02-02 10:20:00 UTC,heat,hold,701,700,700,VA,Bristow,0,False,False,False,Gas
5,9b18dee75a040d612ca69425eb3a948970817814,2017-02-09 19:05:00 UTC,auto,auto,674,820,650,VA,Richmond,40,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
392640,f90cd05382325ea89152a8dd52ea6eb4f37c23c9,2017-02-27 14:40:00 UTC,heat,auto,687,700,700,VA,Louisa,10,True,False,True,Electric
392641,f90cd05382325ea89152a8dd52ea6eb4f37c23c9,2017-02-05 16:00:00 UTC,heat,auto,693,710,690,VA,Louisa,10,True,False,True,Electric
392642,f90cd05382325ea89152a8dd52ea6eb4f37c23c9,2017-02-05 17:00:00 UTC,heat,auto,687,710,690,VA,Louisa,10,True,False,True,Electric
392643,f90cd05382325ea89152a8dd52ea6eb4f37c23c9,2017-02-19 19:30:00 UTC,heat,auto,687,750,630,VA,Louisa,10,True,False,True,Electric


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/VA/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/VA-day/2018-feb-day-VA.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8ea083b8f7f0ecf8da28e0eb3e7d8cd6b2e4b89e,2018-02-13 14:35:00 UTC,heat,auto,677,672,672,VA,Broadlands,20,False,False,False,Gas
1,3109112d2dfa1a63e8ef49228d5bde9989cca566,2018-02-09 12:45:00 UTC,auto,hold,663,712,662,VA,Virginia Beach,50,True,False,True,Electric
2,6f2db8c1a12b5d627574775416621b86c5503e86,2018-02-19 19:20:00 UTC,heat,hold,697,685,685,VA,Smithfield,5,True,False,False,Gas
3,2b39754d589008a2cb2e2d2e7dc0e9577487f07a,2018-02-16 16:00:00 UTC,heat,hold,712,655,655,VA,Leesburg,15,False,False,False,Gas
4,3c365d21f889ba746296c0d49d0318f316611c0b,2018-02-01 18:25:00 UTC,heat,hold,732,735,735,VA,Arlington,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1131516,039bc94edbd9bb165f308b7ea283e2b899230ccd,2018-02-04 16:35:00 UTC,auto,auto,715,765,715,VA,Woodbridge,40,True,False,True,Electric
1131517,f6b7b9258d06417f516197b659af6a3f2aa19e2c,2018-02-27 12:25:00 UTC,auto,hold,653,765,655,VA,Alexandria,25,False,False,True,Electric
1131518,78b50534978b303eb97e898c6b96d72aa9942a6e,2018-02-04 17:05:00 UTC,auto,hold,683,765,685,VA,Manassas,10,True,False,False,Gas
1131519,0454b7a766065ccd43a4997c9d52da6728bafe57,2018-02-20 12:35:00 UTC,auto,hold,703,765,695,VA,Aldie,0,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/VA/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/VA-day/2019-feb-day-VA.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1e2a0e93bb4ae057a23f7e3ed9463f924ade356b,2019-02-01 16:25:00 UTC,heat,hold,692,695,695,VA,Lancaster,70,False,False,True,Electric
1,d9b13c4b82f4da8143948f0e683196cf3dad81de,2019-02-12 12:10:00 UTC,auto,auto,624,640,620,VA,Mechanicsville,0,False,False,False,Gas
2,25a1449b413456f29b986777a062a4cccd5057e0,2019-02-24 19:00:00 UTC,heat,auto,627,630,630,VA,Forest,20,False,False,True,Electric
3,62f6e4b12acc1a8e6735f07a1879a3d1c593c6fc,2019-02-08 14:00:00 UTC,auto,auto,672,726,676,VA,McLean,10,False,False,False,Gas
4,12cb6e38971d70c6d93b71f9e06f4df798b251e4,2019-02-08 19:00:00 UTC,heat,hold,778,781,781,VA,Lansdowne,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1224386,eacd654d4435c993639c84330fd455ecbc18bf87,2019-02-10 17:30:00 UTC,heat,auto,758,760,760,VA,NORFOLK,60,True,False,False,Gas
1224387,67fe2b748f8c7eb63162a94591f8034ed2d255d4,2019-02-21 17:15:00 UTC,auto,hold,682,760,680,VA,Aldie,0,False,False,False,Gas
1224388,706c0d26a28e155db49e744846267df22f8b44b8,2019-02-13 18:05:00 UTC,auto,hold,710,760,710,VA,Sterling,35,False,False,False,Gas
1224389,1b6101a801f16ce9ddde7e783b1b15d46fa9396d,2019-02-05 14:35:00 UTC,heat,auto,694,760,700,VA,Ashburn,10,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/VA/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/VA-day/2020-feb-day-VA.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a8cd7b2a53ed897aca6e9197aacae6b2d0d80633,2020-02-10 17:25:00 UTC,heat,auto,665,791,670,VA,Ashburn,5,True,False,False,Gas
1,d337a87b1ba89bd3f18558d67d67d83e92371094,2020-02-02 14:35:00 UTC,heat,auto,702,685,700,VA,Stafford,15,False,False,False,Gas
2,548a26413dcf552ac67655061c39bddeee887b16,2020-02-15 15:35:00 UTC,auto,hold,716,765,715,VA,Woodbridge,6,False,False,False,Gas
3,be789d83c037c74b85ab886b2801a146ea0d7011,2020-02-21 18:20:00 UTC,auto,hold,725,777,727,VA,Richmond,35,False,False,True,Electric
4,ecef526ccd08506062f671eb77601e56f98579d0,2020-02-06 19:25:00 UTC,heat,hold,714,715,715,VA,Woodstock,90,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1739322,1d2f8655f8bb7cf5bfb33d7912b1ecea3e0a949e,2020-02-11 19:50:00 UTC,auto,hold,703,760,700,VA,Virginia Beach,30,False,False,False,Gas
1739323,8d2f2e03b58c343d9e155e2cc617fc59e049fc41,2020-02-26 15:35:00 UTC,auto,hold,715,760,720,VA,Arlington,0,False,False,False,Gas
1739324,d08343b0dd6004bad411a0b949b5a07206d029ae,2020-02-23 17:40:00 UTC,auto,hold,665,760,670,VA,Chesapeake,25,False,False,False,Gas
1739325,37129b2259faaa30d6012e4f7b4ba276735fcd65,2020-02-06 16:00:00 UTC,auto,auto,704,760,710,VA,Moseley,0,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/VA/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/VA-day/2021-feb-day-VA.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,23e7fad079092c6e39b5111c9fbcddcab5251d19,2021-02-05 18:10:00 UTC,auto,hold,698,746,696,VA,Mechanicsville,7,False,False,False,Gas
1,38602cafee7b7dbc6e5384aa756960947631fdbb,2021-02-12 13:20:00 UTC,heat,hold,705,706,706,VA,Virginia Beach,15,False,False,False,Gas
2,5cf0593a0d860c484fb72522e5e758d4a741acd8,2021-02-09 18:20:00 UTC,auto,hold,756,819,759,VA,Chantilly,19,False,False,False,Gas
3,563521ca669c55888b2290ddf1396e2800100bbb,2021-02-17 11:00:00 UTC,heat,hold,651,655,655,VA,Virginia Beach,20,True,False,False,Gas
4,bed0179a1ab54509559410ca22037fca9ffbbb5a,2021-02-25 13:15:00 UTC,heat,hold,719,718,718,VA,Winchester,116,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1032794,b156e45a037de6ce17e895c159d0eadbc957afdf,2021-02-24 19:40:00 UTC,auto,hold,704,760,680,VA,Virginia Beach,0,False,False,False,Gas
1032795,239ddeb3046c130d91f313428d2d345d93db83b6,2021-02-17 13:20:00 UTC,heat,hold,732,760,760,VA,Dumfries,0,True,False,True,Electric
1032796,18ac5d8126309ebe2b015532ddf7f73f87b94283,2021-02-12 14:45:00 UTC,auto,hold,689,760,690,VA,Henrico,20,False,False,False,Gas
1032797,494ea66a89443cd71ee3f7c402f818914af3e317,2021-02-24 13:00:00 UTC,auto,hold,712,760,710,VA,Stafford,0,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/VA/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/VA/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
VA_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/VA/feb/" + file)
    VA_feb = pd.concat([VA_feb, df])
    
VA_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00a80b7651daeb021a619256a237cba1d0a29ed8,feb,2017,heat,auto,Centreville,710.955556,710.000000,709.800000,25.0,False,False,False
1,00a80b7651daeb021a619256a237cba1d0a29ed8,feb,2017,heat,hold,Centreville,703.509434,700.075472,700.000000,25.0,False,False,False
2,01565e9263a0f04e74fcddc27ba8f0e88631879a,feb,2017,heat,auto,Arlington,680.400000,680.000000,680.000000,0.0,False,False,True
3,01565e9263a0f04e74fcddc27ba8f0e88631879a,feb,2017,heat,hold,Arlington,680.333333,680.000000,680.000000,0.0,False,False,True
4,015a3a183f8d2d338ba35a9a2a49be020601cfb2,feb,2017,auto,auto,Leesburg,700.400000,770.000000,690.000000,55.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1368,ff089a5b75a6c44545cf3e1017feedb5ac35a8f9,feb,2021,heat,hold,Great Bridge East,674.296089,664.022346,664.022346,5.0,True,False,True
1369,ff0967ebe76dabcdf7b633a6ebed3ec7d356de61,feb,2021,heat,hold,Manassas,707.600000,710.000000,710.000000,0.0,True,False,True
1370,ff141dbbec8ce3fd6bbb5e788d0720cf4b8c088d,feb,2021,auto,hold,Manassas,698.162162,757.243243,699.981982,48.0,True,False,True
1371,ff6eb356ac37289e094952e9eabb626cd15970d7,feb,2021,heat,hold,Stafford,686.533898,686.644068,686.423729,10.0,True,False,False


In [67]:
VA_feb.to_csv("Scraper_Output/State_Month_Day/VA/VA_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/VA-day/2017-jun-day-VA.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3529150459985e907d0627edac85ea064da5403b,2017-06-28 18:25:00 UTC,cool,auto,706,710,720,VA,Hopewell,0,True,False,True,Electric
1,8fc8c64d5d8ea328f529117c28fd3f63e8b40141,2017-06-21 14:20:00 UTC,cool,hold,727,730,730,VA,Middletown,10,True,False,True,Electric
2,e9d8f6269aa4367b99945fe01cfd91c1ca899f65,2017-06-06 12:45:00 UTC,cool,hold,742,790,790,VA,Yorktown,40,False,False,False,Gas
3,356bb8583606d38fbd60c301d1fe3697f8d0520e,2017-06-18 18:05:00 UTC,cool,auto,721,720,660,VA,Danville,20,True,False,True,Electric
4,ecef526ccd08506062f671eb77601e56f98579d0,2017-06-23 12:55:00 UTC,cool,auto,743,780,740,VA,Woodstock,90,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
733631,b8f15cc3f2f43a2bcfb11e407ff473e6b60ca508,2017-06-04 14:05:00 UTC,cool,hold,713,710,710,VA,North Chesterfield,0,False,False,False,Gas
733632,41a6d807585a1a2abb7592fff7d2a349fc68eda7,2017-06-10 19:30:00 UTC,cool,hold,750,750,750,VA,North Chesterfield,5,False,False,True,Electric
733633,b8f15cc3f2f43a2bcfb11e407ff473e6b60ca508,2017-06-22 15:20:00 UTC,cool,hold,722,720,720,VA,North Chesterfield,0,False,False,False,Gas
733634,176df46422e877eafa284327bda947b01cb5b139,2017-06-21 18:25:00 UTC,auto,hold,741,740,680,VA,North Chesterfield,35,True,False,True,Electric


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/VA/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/VA-day/2018-jun-day-VA.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b52a9583b92a46d41402b79894a8facf1e49aa13,2018-06-01 16:25:00 UTC,auto,hold,714,713,643,VA,Danville,27,True,False,True,Electric
1,27950108015fae5f01983218a3cbcd51506f0f1f,2018-06-01 14:15:00 UTC,auto,hold,692,695,645,VA,Suffolk,10,False,False,False,Gas
2,e2ebbf15dd29ec276babbda89839720e4010576c,2018-06-20 10:40:00 UTC,cool,auto,763,777,772,VA,Troy,20,True,False,True,Electric
3,1331c46df9b8376619c8c54e420ba84949d0ac88,2018-06-27 17:40:00 UTC,auto,auto,684,685,655,VA,Salem,7,False,False,True,Electric
4,11207989bf9af6de868d7f72d215a26f3b365b74,2018-06-13 18:10:00 UTC,auto,hold,721,719,639,VA,Ashburn,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1647161,59a3996c03dfcbbe724832eb54c0d4748966a659,2018-06-07 11:15:00 UTC,cool,hold,749,760,760,VA,Fredericksburg,10,True,False,True,Electric
1647162,708477530bea05a11fdf13a7a8c70fb2d8d41cdd,2018-06-05 13:45:00 UTC,auto,hold,755,810,760,VA,Fredericksburg,20,True,False,True,Electric
1647163,f6b7b9258d06417f516197b659af6a3f2aa19e2c,2018-06-28 15:00:00 UTC,cool,hold,764,760,760,VA,Alexandria,25,False,False,True,Electric
1647164,59a3996c03dfcbbe724832eb54c0d4748966a659,2018-06-16 13:55:00 UTC,cool,auto,757,760,760,VA,Fredericksburg,10,True,False,True,Electric


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/VA/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/VA-day/2019-jun-day-VA.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d464da8a612a1f9b116ffc67e19a7fdf9816fc7e,2019-06-16 19:10:00 UTC,cool,auto,747,730,674,VA,Burke,45,False,False,True,Electric
1,b004cc86467377464f0eb2c609a75f5cd52dc7f3,2019-06-18 18:40:00 UTC,auto,hold,740,742,662,VA,Richmond,60,False,False,True,Electric
2,40d6e753b0333cb6bcc4fddba35a7eb763584357,2019-06-01 18:10:00 UTC,cool,auto,744,743,743,VA,McLean,60,False,False,False,Gas
3,1845000ebfd822ce1b7fe13b626c19dcd34a5e59,2019-06-29 14:50:00 UTC,cool,hold,713,717,717,VA,Portsmouth,60,True,False,True,Electric
4,83521fc2bb03faa74b0dcc6e992dc477d131d8de,2019-06-17 14:05:00 UTC,auto,hold,744,752,672,VA,Midlothian,17,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2148922,2501862b5b2b0a91230131ac1b533b4f50cdae20,2019-06-22 19:10:00 UTC,cool,hold,764,760,760,VA,Wolf Trap,20,False,False,False,Gas
2148923,67b6a6918386107e1501210c7a3c3c510126d468,2019-06-18 15:45:00 UTC,cool,hold,760,760,760,VA,Fredericksburg,15,False,False,False,Gas
2148924,bd8d7c1bdc9f516e1540474fb143488e025c4ce8,2019-06-30 17:40:00 UTC,cool,hold,764,760,760,VA,Wolf Trap,20,False,False,False,Gas
2148925,d01464f64635a3a1dba094483007e2452f8ee2f1,2019-06-04 18:00:00 UTC,cool,hold,739,760,760,VA,Chesapeake,0,True,False,True,Electric


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/VA/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/VA-day/2020-jun-day-VA.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,94c6d8217b6ac5114a7e1ef9a22275e6d88037fc,2020-06-10 15:00:00 UTC,cool,hold,726,723,723,VA,Vienna,50,False,False,False,Gas
1,fd917e664f40739300a52703fd0db729816de1ce,2020-06-17 12:10:00 UTC,cool,auto,764,764,668,VA,Alexandria,75,False,False,False,Gas
2,7d73ef6138abbde630517376aaf9c85d7332b4a0,2020-06-10 12:25:00 UTC,cool,auto,757,759,759,VA,Henrico,20,False,False,False,Gas
3,6bd668c10bf7d0f8f786c39bef4ccf945d9f66fc,2020-06-19 11:35:00 UTC,cool,hold,725,732,732,VA,Staunton,80,False,False,True,Electric
4,f7d56938cdcc528977797ae2462ec38652d83d3e,2020-06-21 10:45:00 UTC,auto,auto,730,730,677,VA,Mechanicville,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2188497,03c707ccc0c790343a9b58d4991168d5f849c97a,2020-06-29 18:35:00 UTC,cool,hold,768,760,760,VA,Lancaster,10,True,False,True,Electric
2188498,6b62df55fa832fc874ef3afa53bd391da1dd9921,2020-06-01 16:05:00 UTC,cool,auto,737,760,760,VA,Roanoke,10,False,False,True,Electric
2188499,fd917e664f40739300a52703fd0db729816de1ce,2020-06-25 12:55:00 UTC,cool,hold,762,760,760,VA,Alexandria,75,False,False,False,Gas
2188500,913b3298d7921aea96903a145ed93504db539bab,2020-06-04 12:15:00 UTC,cool,hold,760,760,760,VA,Woodbridge,0,True,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/VA/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/VA-day/2021-jun-day-VA.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ab03019bfb800f0b503ddf5ad1f54b31ee3a9e99,2021-06-18 18:55:00 UTC,auto,hold,722,714,654,VA,Virginia Beach,40,True,False,True,Electric
1,7c9d78e17b29da7d3c10a381bd9c3c4b3285e183,2021-06-04 18:05:00 UTC,auto,hold,645,633,603,VA,Melfa,0,False,False,True,Electric
2,2b77648a3f3e65bbbd75cffdb4c8933ce05aa922,2021-06-19 17:40:00 UTC,cool,hold,745,755,755,VA,Henrico,0,True,False,True,Electric
3,3c365d21f889ba746296c0d49d0318f316611c0b,2021-06-26 11:20:00 UTC,cool,hold,744,745,745,VA,Arlington,60,False,False,False,Gas
4,0bb5a17a193f56321319b72cea049cf71051c86c,2021-06-25 16:00:00 UTC,auto,hold,769,778,728,VA,Centreville,30,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1187240,c761f8853c105f51305b36bc7855436ed68a19db,2021-06-14 19:25:00 UTC,cool,hold,771,760,760,VA,Chesapeake,30,False,False,True,Electric
1187241,2501862b5b2b0a91230131ac1b533b4f50cdae20,2021-06-03 11:30:00 UTC,cool,hold,744,760,760,VA,Wolf Trap,20,False,False,False,Gas
1187242,91d12ae6919d3b1e8132097c462bb7f05d612709,2021-06-26 13:55:00 UTC,cool,hold,763,760,760,VA,Chesapeake,30,False,False,False,Gas
1187243,dd65e9a17a94b5eb9db248e0ad98bb4c39c52842,2021-06-23 19:50:00 UTC,cool,hold,763,760,760,VA,Hampton,0,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/VA/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/VA/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
VA_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/VA/jun/" + file)
    VA_jun = pd.concat([VA_jun, df])
    
VA_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00a80b7651daeb021a619256a237cba1d0a29ed8,jun,2017,auto,auto,Centreville,711.489933,710.046980,650.000000,25.0,False,False,False
1,00a80b7651daeb021a619256a237cba1d0a29ed8,jun,2017,auto,hold,Centreville,709.658974,710.935897,650.000000,25.0,False,False,False
2,00a80b7651daeb021a619256a237cba1d0a29ed8,jun,2017,cool,auto,Centreville,735.527933,738.343575,712.477654,25.0,False,False,False
3,00a80b7651daeb021a619256a237cba1d0a29ed8,jun,2017,cool,hold,Centreville,719.569330,717.023326,717.023326,25.0,False,False,False
4,01565e9263a0f04e74fcddc27ba8f0e88631879a,jun,2017,cool,auto,Arlington,721.841667,720.000000,720.000000,0.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1553,ff141dbbec8ce3fd6bbb5e788d0720cf4b8c088d,jun,2021,auto,hold,Manassas,725.489796,730.142857,680.000000,48.0,True,False,True
1554,ff6eb356ac37289e094952e9eabb626cd15970d7,jun,2021,cool,hold,Stafford,707.921569,706.225490,705.647059,10.0,True,False,False
1555,ff8d2f6b238eda3bd3dbd2dd5268c48d5d933d11,jun,2021,auto,hold,Charlottesville,699.484463,698.827684,668.827684,6.0,False,False,False
1556,ff8d4a7cc516fb17641deba1e511d7b7089022ca,jun,2021,cool,hold,Arlington,740.454545,721.727273,717.727273,5.0,False,False,True


In [100]:
VA_jun.to_csv("Scraper_Output/State_Month_Day/VA/VA_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/VA-day/2017-jul-day-VA.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,46eac8217ebec13ac9c732f6750756e6f04ede91,2017-07-14 19:50:00 UTC,auto,hold,746,740,680,VA,Earlysville,40,False,False,True,Electric
1,0822a16d06f7cc87d1f7d539f0b1bb3a9e3d8645,2017-07-13 17:50:00 UTC,auto,hold,735,730,660,VA,Waynesboro,35,False,False,False,Gas
2,4b4eff79c0fcdfe5dd085258be9f4ce7f32ff607,2017-07-16 18:35:00 UTC,auto,hold,754,735,685,VA,McLean,50,False,False,False,Gas
3,e5f3ec818fda9c06783dd1bbd268befa0758420f,2017-07-30 10:50:00 UTC,cool,auto,740,750,740,VA,Lorton,45,False,False,False,Gas
4,6b3cfb9f3e1e92985d4b18ca7cef0feb62ceb7b3,2017-07-10 15:45:00 UTC,cool,auto,750,740,730,VA,North Canton,55,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
864257,176df46422e877eafa284327bda947b01cb5b139,2017-07-06 15:10:00 UTC,cool,auto,724,720,680,VA,North Chesterfield,35,True,False,True,Electric
864258,b8f15cc3f2f43a2bcfb11e407ff473e6b60ca508,2017-07-28 16:10:00 UTC,cool,hold,720,720,720,VA,North Chesterfield,0,False,False,False,Gas
864259,b8f15cc3f2f43a2bcfb11e407ff473e6b60ca508,2017-07-03 12:35:00 UTC,cool,hold,727,730,730,VA,North Chesterfield,0,False,False,False,Gas
864260,176df46422e877eafa284327bda947b01cb5b139,2017-07-26 19:15:00 UTC,cool,hold,723,715,665,VA,North Chesterfield,35,True,False,True,Electric


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/VA/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/VA-day/2018-jul-day-VA.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b36817760b8ccb0f23a80d256f7c39ab1aa5d7e4,2018-07-29 16:30:00 UTC,cool,hold,728,745,745,VA,Riner,0,False,False,False,Gas
2,e84a24098fb1def7b096b201d186669b51b17661,2018-07-29 17:00:00 UTC,cool,auto,698,700,672,VA,Dunn Loring,45,False,False,False,Gas
3,a48a0418842b33ccfed990a7e9f885b79d972c74,2018-07-22 14:05:00 UTC,cool,hold,697,732,732,VA,Norfolk,0,False,False,False,Gas
4,9c7e09cc81efde119ac23a0b3c5dfcf82f60a19e,2018-07-04 19:35:00 UTC,auto,hold,754,742,642,VA,Alexandria,10,False,False,True,Electric
5,b8ed112d01627539f400c00d32bd333d8d558345,2018-07-18 17:05:00 UTC,cool,hold,742,735,735,VA,Glen Allen,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1821093,6ba2372a13fac26a7e8f71558636dab8b9ac57fc,2018-07-03 17:50:00 UTC,cool,auto,762,760,760,VA,Fredericksburg,0,False,False,False,Gas
1821094,bfe683aaff1a529afe11c8aa7a6a4f1f21b89084,2018-07-29 12:30:00 UTC,cool,auto,762,760,760,VA,Williamsburg,15,False,False,False,Gas
1821095,99362518a9ce2fdcb8233f6bc438f7c0c2e9024b,2018-07-05 11:05:00 UTC,cool,auto,757,760,760,VA,Leesburg,20,False,False,False,Gas
1821096,503c37e1bc1dce2cd5da64b328e22b4203401ac0,2018-07-12 11:25:00 UTC,cool,hold,743,760,760,VA,Arlington,0,False,False,True,Electric


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/VA/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/VA-day/2019-jul-day-VA.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bbc78efbe436e2a0d2d3715e73d8581f497e5e7c,2019-07-29 10:40:00 UTC,cool,hold,727,741,701,VA,Collinsville,19,True,False,True,Electric
1,e9a01d0abaf193a46dfc9d7bd961ae75eec3107f,2019-07-03 11:50:00 UTC,cool,auto,737,740,738,VA,Ashburn,5,False,False,False,Gas
2,765aee4d34d45b5dd01acb0cc23694956600cfd3,2019-07-02 14:15:00 UTC,cool,hold,690,689,689,VA,Bristow,20,True,False,False,Gas
3,be17616703f6a04c7ea74535333412d050a795a8,2019-07-17 15:20:00 UTC,cool,hold,742,728,728,VA,Manassas,50,False,False,False,Gas
4,302c8d302f45ff970ded77466fb31051f401b8ab,2019-07-29 15:10:00 UTC,auto,hold,719,715,655,VA,Stafford,30,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2284199,4e9399b85e89a054881cccabaa978f1a914baebd,2019-07-21 15:00:00 UTC,cool,hold,765,760,760,VA,Seaford,0,False,False,False,Gas
2284200,f79175020897cf143cf747269cd85706e9990ff2,2019-07-13 18:40:00 UTC,cool,hold,778,760,760,VA,Hayes,40,False,False,True,Electric
2284201,b3cf4c8762c0d47818167ffd7e36f9f6f983f80a,2019-07-15 18:00:00 UTC,cool,hold,770,760,760,VA,Midlothian,0,False,False,False,Gas
2284202,26bd99a06a1fdfc0fb44a75ac587c990619d3afd,2019-07-27 13:15:00 UTC,cool,auto,741,740,760,VA,Stafford,40,True,False,True,Electric


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/VA/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/VA-day/2020-jul-day-VA.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e027923309495757cc5a1e4dfd485cfcf13ec9a2,2020-07-07 16:55:00 UTC,cool,hold,708,708,708,VA,herndon,10,True,False,False,Gas
2,bd5167fe9a212cc5eaa903a2eb0a0a21b4dd1f48,2020-07-31 12:30:00 UTC,cool,auto,758,760,731,VA,Dry Fork,10,True,False,True,Electric
3,da66459c69d7b3e70a23b507465fec3322a0fbe2,2020-07-20 16:15:00 UTC,cool,hold,755,752,752,VA,Arlington,45,False,False,True,Electric
4,bc1354e091aa18d3c84c7435ed83990c01e35bed,2020-07-05 18:55:00 UTC,cool,hold,736,729,729,VA,Arlington,9,True,False,False,Gas
5,4f9cdf3263dc37e29e7d06c77c75b96dfd4c0dbf,2020-07-26 15:40:00 UTC,auto,hold,746,740,610,VA,Virginia Beach,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2323428,68c96efd6a2729d9b21b96d65a1a2a3b7f1f6fed,2020-07-16 19:00:00 UTC,cool,hold,770,760,760,VA,South Norfolk,60,True,False,True,Electric
2323429,2f77d899688bb3144fa113d2440855afae79dc67,2020-07-20 18:30:00 UTC,cool,auto,763,760,760,VA,Manassas,10,False,False,False,Gas
2323430,3bfc609fddce5c756ffbed16a80f2e3d7ed3651a,2020-07-20 10:35:00 UTC,cool,hold,758,760,760,VA,Sterling,45,False,False,False,Gas
2323431,c7c29bee044a83da554b68383114273693cf48f6,2020-07-21 12:10:00 UTC,cool,hold,759,760,760,VA,Virginia Beach,0,False,False,True,Electric


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/VA/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/VA-day/2021-jul-day-VA.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,378d7ba3f021eab723364a8bacfb3e10a9a27d96,2021-07-24 14:05:00 UTC,auto,hold,730,725,635,VA,Sterling,9,False,False,True,Electric
1,563521ca669c55888b2290ddf1396e2800100bbb,2021-07-29 18:40:00 UTC,cool,hold,736,722,722,VA,Virginia Beach,20,True,False,False,Gas
2,f3f67912c283659caf8b2ff77fe6f9c0311a39ec,2021-07-20 17:05:00 UTC,cool,hold,754,750,729,VA,Scottsville,30,True,False,True,Electric
3,38602cafee7b7dbc6e5384aa756960947631fdbb,2021-07-07 15:05:00 UTC,auto,hold,706,696,646,VA,Virginia Beach,15,False,False,False,Gas
4,58fd9105b9fd64bfc45f0b2a3678ea7d425b3661,2021-07-02 14:50:00 UTC,auto,hold,737,742,692,VA,Midlothian,7,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1227343,91d12ae6919d3b1e8132097c462bb7f05d612709,2021-07-10 15:05:00 UTC,cool,hold,782,760,760,VA,Chesapeake,30,False,False,False,Gas
1227344,fd917e664f40739300a52703fd0db729816de1ce,2021-07-23 11:35:00 UTC,cool,hold,763,760,760,VA,Alexandria,75,False,False,False,Gas
1227345,48f2eb93866fd2cf5da4367ccc629846f90c06c6,2021-07-14 12:50:00 UTC,cool,hold,764,760,760,VA,Sterling,0,True,False,False,Gas
1227346,6b346e0085ce90d836d19574883192870851f7f1,2021-07-14 19:35:00 UTC,cool,hold,761,760,760,VA,Chesapeake,20,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/VA/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/VA/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
VA_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/VA/jul/" + file)
    VA_jul = pd.concat([VA_jul, df])
    
VA_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0093e2266316dab9e25b0e8c51876169485cae66,jul,2017,auto,auto,Virginia Beach,722.000000,720.000000,660.000000,40.0,True,False,True
1,00a77c90abdcc1ac45fa8c839401cc6a0539b164,jul,2017,cool,auto,Alexandria,682.750000,691.250000,651.500000,10.0,False,False,True
2,00a80b7651daeb021a619256a237cba1d0a29ed8,jul,2017,cool,auto,Centreville,749.542289,747.465174,727.639303,25.0,False,False,False
3,00a80b7651daeb021a619256a237cba1d0a29ed8,jul,2017,cool,hold,Centreville,731.273196,729.335481,729.304124,25.0,False,False,False
4,01565e9263a0f04e74fcddc27ba8f0e88631879a,jul,2017,cool,hold,Arlington,722.848980,720.614286,720.610204,0.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1382,ff020a06635306a6879f02330063b6bd5c6af5d8,jul,2021,cool,hold,Chesterfield,774.779570,773.268817,772.225806,10.0,False,False,False
1383,ff0967ebe76dabcdf7b633a6ebed3ec7d356de61,jul,2021,cool,hold,Manassas,786.242105,787.964912,787.964912,0.0,True,False,True
1384,ff6eb356ac37289e094952e9eabb626cd15970d7,jul,2021,cool,hold,Stafford,703.383333,696.000000,696.000000,10.0,True,False,False
1385,ff8d2f6b238eda3bd3dbd2dd5268c48d5d933d11,jul,2021,auto,hold,Charlottesville,692.447368,689.210526,659.210526,6.0,False,False,False


In [133]:
VA_jul.to_csv("Scraper_Output/State_Month_Day/VA/VA_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/VA-day/2017-aug-day-VA.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0822a16d06f7cc87d1f7d539f0b1bb3a9e3d8645,2017-08-29 19:55:00 UTC,cool,hold,710,710,710,VA,Waynesboro,35,False,False,False,Gas
1,3da1837f9b3fd01b73b89fb9b513b89ce28acf50,2017-08-22 13:05:00 UTC,cool,hold,737,740,740,VA,Woodstock,0,False,False,True,Electric
3,3b03aad92e218e881a92991c7b045a060079d5c0,2017-08-17 19:30:00 UTC,cool,hold,720,720,720,VA,Dayton,105,False,False,True,Electric
6,ecc0d432ab275feb06816aa8051f300f60fcb6af,2017-08-28 13:10:00 UTC,cool,hold,745,750,750,VA,chesapeake,20,False,False,False,Gas
7,0b7e50f090e52a6160fed4965e42e267f791c80e,2017-08-05 16:10:00 UTC,cool,hold,721,720,720,VA,Portsmouth,45,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
879949,b8f15cc3f2f43a2bcfb11e407ff473e6b60ca508,2017-08-30 16:10:00 UTC,cool,hold,721,720,720,VA,North Chesterfield,0,False,False,False,Gas
879950,176df46422e877eafa284327bda947b01cb5b139,2017-08-11 13:15:00 UTC,cool,auto,733,730,700,VA,North Chesterfield,35,True,False,True,Electric
879951,176df46422e877eafa284327bda947b01cb5b139,2017-08-26 16:55:00 UTC,cool,auto,732,730,700,VA,North Chesterfield,35,True,False,True,Electric
879952,b8f15cc3f2f43a2bcfb11e407ff473e6b60ca508,2017-08-31 12:10:00 UTC,cool,hold,718,720,720,VA,North Chesterfield,0,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/VA/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/VA-day/2018-aug-day-VA.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9632cd7c43fff2b4ca85e579ae8850e41c1eb6b2,2018-08-05 10:55:00 UTC,auto,hold,682,685,615,VA,Roanoke,0,False,False,False,Gas
1,0a561dc73251310f92cb74e708de2d34b03e9a68,2018-08-28 14:30:00 UTC,cool,hold,726,725,725,VA,Chesapeake,107,True,False,True,Electric
2,2ffa929f769735bdf57c0492bae1e30e428c96c5,2018-08-31 16:40:00 UTC,auto,hold,728,733,683,VA,Warrenton,37,True,False,True,Electric
3,42e69cbd9416291fa296b0dfa24c835f6f2932d2,2018-08-18 13:50:00 UTC,auto,hold,727,727,677,VA,Springfield,0,False,False,False,Gas
4,64601bc789b8c6416632946ccad6082dd3546f53,2018-08-07 13:05:00 UTC,cool,auto,745,730,705,VA,Potomac Falls,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1874457,e1c3e18143bc90fe0632c0bdf04286484e95563c,2018-08-15 13:20:00 UTC,cool,hold,756,760,760,VA,Charlottesville,0,False,False,False,Gas
1874458,25ab2f88046af31b48c09be49a54e54c2d022143,2018-08-07 15:15:00 UTC,cool,hold,762,760,760,VA,Aldie,5,True,False,True,Electric
1874459,25ab2f88046af31b48c09be49a54e54c2d022143,2018-08-27 19:40:00 UTC,cool,hold,768,760,760,VA,Aldie,5,True,False,True,Electric
1874460,665c35af017ba5688f5a4e588df5d00db62a82ea,2018-08-17 19:40:00 UTC,cool,hold,758,760,760,VA,Charlottesville,35,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/VA/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/VA-day/2019-aug-day-VA.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,4741d6a5489f7da6a913d80786aed2eebc986a8e,2019-08-21 12:35:00 UTC,cool,auto,711,717,717,VA,Forest,25,True,False,True,Electric
2,40d6e753b0333cb6bcc4fddba35a7eb763584357,2019-08-31 18:45:00 UTC,cool,auto,761,761,761,VA,McLean,60,False,False,False,Gas
3,51d08afcc751e99ee04d345bd0380dc6cf0fcc1b,2019-08-08 14:30:00 UTC,cool,hold,785,788,788,VA,Lake Ridge,20,False,False,False,Gas
4,51d08afcc751e99ee04d345bd0380dc6cf0fcc1b,2019-08-06 16:55:00 UTC,cool,hold,752,752,752,VA,Lake Ridge,20,False,False,False,Gas
5,61df5370646c3f32022cd5442fcf7743445a9a44,2019-08-24 17:50:00 UTC,auto,hold,735,732,682,VA,Sterling,30,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2267922,6f087db3618b53f72c150c48370addf8e3333933,2019-08-13 14:05:00 UTC,cool,auto,769,760,760,VA,Spotsylvania,10,True,False,True,Electric
2267923,9e878c30cc457a171f6f881922162411e02c380f,2019-08-14 14:50:00 UTC,cool,auto,756,760,760,VA,Round Hill,0,True,False,True,Electric
2267924,86961cd3f98d97dc9cd0a6f6b5af8a1e8c681e2f,2019-08-03 18:25:00 UTC,cool,auto,763,760,760,VA,Round Hill,0,False,False,True,Electric
2267925,20fcb3f394406b777c740b8785057c7b26541cb3,2019-08-25 16:25:00 UTC,cool,auto,740,760,760,VA,Chesapeake,16,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/VA/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/VA-day/2020-aug-day-VA.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3109112d2dfa1a63e8ef49228d5bde9989cca566,2020-08-22 16:25:00 UTC,cool,hold,719,695,695,VA,Virginia Beach,50,True,False,True,Electric
1,f2d73e283a88b8903f76fc71efe0e7f4e1a595b9,2020-08-22 16:40:00 UTC,auto,hold,682,684,664,VA,Newport News,60,False,False,False,Gas
2,c66ec21069427f244d2ab9743facf98437e41a17,2020-08-09 17:05:00 UTC,auto,hold,719,720,610,VA,Norfolk,60,False,False,False,Gas
3,3b12faadd3729ecd9c828fdf011fe03b13ef13c9,2020-08-22 18:40:00 UTC,auto,hold,725,725,645,VA,Arlington,95,False,False,False,Gas
4,3694a0298527e0e710d721cc1a9ad714cd9eb359,2020-08-14 19:25:00 UTC,cool,hold,719,703,703,VA,Chesapeake,47,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2202098,1c3a380a2cd3e2b1072150dec6672c4573c92ac7,2020-08-13 13:45:00 UTC,cool,hold,769,760,760,VA,Warrenton,0,False,False,True,Electric
2202099,556316db1ab6cda4c15fa0681262fe524187b7f4,2020-08-30 11:35:00 UTC,cool,auto,732,760,760,VA,Cape Charles,15,False,False,True,Electric
2202100,f792611d82b789bdc1b6e189b4afbc25bd9e5815,2020-08-25 19:15:00 UTC,cool,hold,767,760,760,VA,Alexandria,30,False,False,False,Gas
2202101,0bb5a17a193f56321319b72cea049cf71051c86c,2020-08-15 14:25:00 UTC,cool,auto,763,760,760,VA,Centreville,30,True,False,True,Electric


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/VA/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/VA/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
VA_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/VA/aug/" + file)
    VA_aug = pd.concat([VA_aug, df])
    
VA_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00a80b7651daeb021a619256a237cba1d0a29ed8,aug,2017,cool,hold,Centreville,755.503518,756.622111,756.619095,25.0,False,False,False
1,01565e9263a0f04e74fcddc27ba8f0e88631879a,aug,2017,cool,hold,Arlington,721.609174,720.056881,720.042202,0.0,False,False,True
2,02436b66b29e9db0a27b3b51998acd715aeeae61,aug,2017,auto,auto,Dumfries,725.275362,725.000000,675.000000,30.0,True,False,True
3,02436b66b29e9db0a27b3b51998acd715aeeae61,aug,2017,auto,hold,Dumfries,718.162058,717.746952,645.701160,30.0,True,False,True
4,02bd25a2eb19543bf06464390c599dee686ffc39,aug,2017,cool,hold,Fairfax,731.935484,730.000000,730.000000,17.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2609,ff6eb356ac37289e094952e9eabb626cd15970d7,aug,2020,cool,hold,Stafford,730.150000,730.000000,730.000000,10.0,True,False,False
2610,ff8d2f6b238eda3bd3dbd2dd5268c48d5d933d11,aug,2020,auto,auto,Charlottesville,695.016529,688.586777,658.586777,6.0,False,False,False
2611,ff8d2f6b238eda3bd3dbd2dd5268c48d5d933d11,aug,2020,auto,hold,Charlottesville,704.602317,697.953668,664.942085,6.0,False,False,False
2612,ff8d4a7cc516fb17641deba1e511d7b7089022ca,aug,2020,cool,auto,Arlington,796.066667,790.000000,650.000000,5.0,False,False,True


In [160]:
VA_aug.to_csv("Scraper_Output/State_Month_Day/VA/VA_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/VA-day/2017-dec-day-VA.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,b1a790812746721e0f78f6620b47bd67bbe86c29,2017-12-08 16:50:00 UTC,heat,hold,700,685,685,VA,Leesburg,15,False,False,False,Gas
2,6f2db8c1a12b5d627574775416621b86c5503e86,2017-12-27 12:20:00 UTC,heat,hold,693,695,695,VA,Smithfield,5,True,False,False,Gas
3,027485824b2510f4bc679b9cb34e946d5eb3d58f,2017-12-24 12:45:00 UTC,heat,hold,703,695,695,VA,Virginia Beach,30,False,False,False,Gas
4,f8021088f78539c9d69b2fabec816c94eaf1576f,2017-12-26 15:15:00 UTC,auto,hold,699,715,665,VA,Arlington,30,False,False,False,Gas
5,6d351e86eabd51277749c8966081a149c1fd5623,2017-12-03 18:35:00 UTC,heat,hold,637,630,630,VA,Fairfax,15,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1174354,6f2db8c1a12b5d627574775416621b86c5503e86,2017-12-10 12:20:00 UTC,auto,hold,714,765,715,VA,Smithfield,5,True,False,False,Gas
1174355,ae71d0d11bdcff11224cad45f561be59e3dc001d,2017-12-10 13:40:00 UTC,auto,auto,706,765,715,VA,Stafford,40,True,False,True,Electric
1174356,3c2ed412fe9c6397204c0d86e49f123e497e2195,2017-12-17 18:25:00 UTC,auto,auto,712,765,715,VA,Fairfax,47,False,False,True,Electric
1174357,f6b7b9258d06417f516197b659af6a3f2aa19e2c,2017-12-24 14:20:00 UTC,auto,hold,665,765,645,VA,Alexandria,25,False,False,True,Electric


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/VA/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/VA-day/2018-dec-day-VA.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,deed2021619223c3aac240c4e16563ed07fe9c8c,2018-12-07 13:50:00 UTC,heat,hold,701,707,707,VA,Burke,50,False,False,True,Electric
1,8eb641427a0e1a8ef34bb9aef0adb5d3edd8f984,2018-12-13 12:15:00 UTC,heat,hold,714,719,719,VA,Suffolk,15,False,False,False,Gas
2,1fadd22df0807108d6d578b633ca0ebb753dc53e,2018-12-31 13:55:00 UTC,heat,hold,720,734,734,VA,Alexandria,0,True,False,True,Electric
3,3d3290eb3a9b665b7c0fd12df189eaf41b4a5312,2018-12-19 17:30:00 UTC,auto,hold,721,840,720,VA,Nokesville,50,True,False,True,Electric
4,450ec4ff8de6345f7612eb7365cba7f9ddfeab79,2018-12-05 15:00:00 UTC,auto,hold,702,765,715,VA,Bealeton,5,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1835566,074e66186bbfa503443351bd1db0ebfd6583139b,2018-12-18 15:35:00 UTC,heat,hold,761,760,760,VA,Virginia Beach,20,False,False,True,Electric
1835567,f894d3f4980ce6923cc3e580b87cbe77469adef8,2018-12-30 18:55:00 UTC,auto,hold,663,760,660,VA,Fredericksburg,5,False,False,False,Gas
1835568,bf0b3c07b796089c26c82d5f9b6f4ccf3aac8fca,2018-12-30 12:40:00 UTC,heat,auto,759,760,760,VA,Yorktown,20,False,False,False,Gas
1835569,b6505b91da414a330ea05d0bee0f955f76f54958,2018-12-03 18:50:00 UTC,heat,hold,700,760,760,VA,Nokesville,17,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/VA/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/VA-day/2019-dec-day-VA.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8f1b94227d91c3e4fafdc1da914a284e8dfdcfe3,2019-12-01 10:10:00 UTC,heat,hold,683,685,685,VA,Fredericksburg,45,True,False,True,Electric
1,d4b9ac6d12ad4757f36254aa9be7b13ec5692c4a,2019-12-09 18:00:00 UTC,auto,hold,724,779,709,VA,Chesapeake,20,True,False,False,Gas
2,78b50534978b303eb97e898c6b96d72aa9942a6e,2019-12-27 07:55:00 UTC,auto,hold,717,810,720,VA,Manassas,10,True,False,False,Gas
3,7abdb4748c435412468be0c1d6e6b3f0c452ff34,2019-12-30 18:20:00 UTC,heat,auto,727,732,730,VA,Alexandria,30,True,False,False,Gas
4,05840072f4671b450b05123abbc79480fea86e5e,2019-12-07 18:00:00 UTC,heat,hold,677,689,689,VA,Chesapeake,5,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2009532,94a7d9345d3f75e5b9aaf6a915e34451aa79d319,2019-12-24 10:55:00 UTC,auto,auto,706,765,715,VA,Midlothian,15,True,False,True,Electric
2009533,94a7d9345d3f75e5b9aaf6a915e34451aa79d319,2019-12-17 12:15:00 UTC,auto,auto,707,765,715,VA,Midlothian,15,True,False,True,Electric
2009534,b7078aea9eb83f91b526b66baf74e446cf03ba24,2019-12-12 12:00:00 UTC,heat,auto,742,765,740,VA,Hampton,0,False,False,False,Gas
2009535,a23c8854a7a56aed204e6058008ec06fc4aec17f,2019-12-04 16:10:00 UTC,auto,hold,689,765,695,VA,Hopewell,79,False,False,True,Electric


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/VA/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/VA-day/2020-dec-day-VA.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,54e01ccee5770672dd4ff60dff9bdcdb7460ce41,2020-12-04 15:40:00 UTC,heat,hold,729,736,736,VA,Virginia Beach,25,False,False,False,Gas
2,e289d959fb5f4af872aa8e7a3b5f475ad29ee93f,2020-12-05 19:35:00 UTC,auto,hold,690,775,695,VA,Virginia Beach,55,False,False,False,Gas
3,080ca5ccc36bac34354b2e8ed5aa6350520f0659,2020-12-05 15:00:00 UTC,heat,hold,734,739,739,VA,Virginia Beach,5,False,False,True,Electric
4,a00997f1fb387b96f9e9d7f5e37f04fe3e05438e,2020-12-30 16:20:00 UTC,auto,hold,700,747,697,VA,Vinton,0,True,False,False,Gas
5,da66459c69d7b3e70a23b507465fec3322a0fbe2,2020-12-06 12:05:00 UTC,heat,hold,694,698,698,VA,Arlington,45,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1714402,0b7e50f090e52a6160fed4965e42e267f791c80e,2020-12-08 13:55:00 UTC,auto,hold,711,765,715,VA,Portsmouth,45,False,False,True,Electric
1714403,ebd964262bbb27a29a94fc65080fe2fdb2ded76c,2020-12-16 12:20:00 UTC,auto,hold,706,765,705,VA,Richmond,5,False,False,False,Gas
1714404,bfb8e9abcfcc9981b7a85b31539cc8ab615e8c63,2020-12-12 17:15:00 UTC,heat,auto,711,765,715,VA,Boones Mill,20,False,False,True,Electric
1714406,ebd964262bbb27a29a94fc65080fe2fdb2ded76c,2020-12-17 12:20:00 UTC,auto,hold,704,765,705,VA,Richmond,5,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/VA/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/VA/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
VA_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/VA/dec/" + file)
    VA_dec = pd.concat([VA_dec, df])
    
VA_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0071062a9d7cf61a15599e4818290e802f205631,dec,2017,heat,auto,Arlington,716.333333,740.416667,739.583333,10.0,False,False,True
1,0093e2266316dab9e25b0e8c51876169485cae66,dec,2017,auto,hold,Virginia Beach,734.142857,780.000000,749.214286,40.0,True,False,True
2,00a77c90abdcc1ac45fa8c839401cc6a0539b164,dec,2017,heat,hold,Alexandria,673.043478,680.000000,680.000000,10.0,False,False,True
3,00a80b7651daeb021a619256a237cba1d0a29ed8,dec,2017,heat,hold,Centreville,692.319734,693.496205,693.496205,25.0,False,False,False
4,00e6fa301b2ce8163c0122b419f864442dd204c6,dec,2017,heat,auto,Chesapeake,664.980556,688.888889,688.888889,60.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2742,ff020a06635306a6879f02330063b6bd5c6af5d8,dec,2020,heat,auto,Chesterfield,667.632275,669.986772,669.531746,10.0,False,False,False
2743,ff020a06635306a6879f02330063b6bd5c6af5d8,dec,2020,heat,hold,Chesterfield,676.828571,678.180451,675.778195,10.0,False,False,False
2744,ff089a5b75a6c44545cf3e1017feedb5ac35a8f9,dec,2020,heat,hold,Great Bridge East,690.541667,690.500000,689.875000,5.0,True,False,True
2745,ff093cf124aea558c90a2f764ade9d755b346ac6,dec,2020,heat,auto,Gainesville,716.435294,710.000000,710.000000,20.0,False,False,False


In [187]:
VA_dec.to_csv("Scraper_Output/State_Month_Day/VA/VA_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/VA/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
VA_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/VA/" + file)
    VA_all = pd.concat([VA_all, df])
    
VA_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00a80b7651daeb021a619256a237cba1d0a29ed8,aug,2017,cool,hold,Centreville,755.503518,756.622111,756.619095,25.0,False,False,False
1,01565e9263a0f04e74fcddc27ba8f0e88631879a,aug,2017,cool,hold,Arlington,721.609174,720.056881,720.042202,0.0,False,False,True
2,02436b66b29e9db0a27b3b51998acd715aeeae61,aug,2017,auto,auto,Dumfries,725.275362,725.000000,675.000000,30.0,True,False,True
3,02436b66b29e9db0a27b3b51998acd715aeeae61,aug,2017,auto,hold,Dumfries,718.162058,717.746952,645.701160,30.0,True,False,True
4,02bd25a2eb19543bf06464390c599dee686ffc39,aug,2017,cool,hold,Fairfax,731.935484,730.000000,730.000000,17.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11049,ff141dbbec8ce3fd6bbb5e788d0720cf4b8c088d,jun,2021,auto,hold,Manassas,725.489796,730.142857,680.000000,48.0,True,False,True
11050,ff6eb356ac37289e094952e9eabb626cd15970d7,jun,2021,cool,hold,Stafford,707.921569,706.225490,705.647059,10.0,True,False,False
11051,ff8d2f6b238eda3bd3dbd2dd5268c48d5d933d11,jun,2021,auto,hold,Charlottesville,699.484463,698.827684,668.827684,6.0,False,False,False
11052,ff8d4a7cc516fb17641deba1e511d7b7089022ca,jun,2021,cool,hold,Arlington,740.454545,721.727273,717.727273,5.0,False,False,True


In [190]:
VA_all.to_csv("Scraper_Output/State_Month_Day/VA_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mVAe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['VA']
Unique jan_2018: ['VA']
Unique jan_2019: ['VA']
Unique jan_2020: ['VA']
Unique jan_2021: ['VA']
Unique feb_2017: ['VA']
Unique feb_2018: ['VA']
Unique feb_2019: ['VA']
Unique feb_2020: ['VA']
Unique feb_2021: ['VA']
Unique jun_2017: ['VA']
Unique jun_2018: ['VA']
Unique jun_2019: ['VA']
Unique jun_2020: ['VA']
Unique jun_2021: ['VA']
Unique jul_2017: ['VA']
Unique jul_2018: ['VA']
Unique jul_2019: ['VA']
Unique jul_2020: ['VA']
Unique jul_2021: ['VA']
Unique aug_2017: ['VA']
Unique aug_2018: ['VA']
Unique aug_2019: ['VA']
Unique aug_2020: ['VA']
Unique dec_2017: ['VA']
Unique dec_2018: ['VA']
Unique dec_2019: ['VA']
Unique dec_2020: ['VA']
