# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/VT-day/2017-jan-day-VT.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,e9042a7ed72dc1ec47946a7b3e214edd1d26090f,2017-01-11T13:05:00Z,heat,hold,684,686,686,VT,Newfane,50,False,False,False,Gas
2,e9042a7ed72dc1ec47946a7b3e214edd1d26090f,2017-01-05T13:55:00Z,heat,hold,709,766,612,VT,Newfane,50,False,False,False,Gas
3,bc7f182018bb08d874582111d575271a5b08222d,2017-01-14T19:55:00Z,heat,auto,616,808,616,VT,Hinesburg,75,False,False,False,Gas
5,5b7f5dbb625ed5102fc3e91ac27731652483eafd,2017-01-07T18:50:00Z,heat,auto,555,786,652,VT,Newfane,0,False,False,False,Gas
6,e9042a7ed72dc1ec47946a7b3e214edd1d26090f,2017-01-26T13:30:00Z,heat,hold,699,706,706,VT,Newfane,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31050,e9042a7ed72dc1ec47946a7b3e214edd1d26090f,2017-01-03T13:25:00Z,heat,hold,702,720,720,VT,Newfane,50,False,False,False,Gas
31051,e9042a7ed72dc1ec47946a7b3e214edd1d26090f,2017-01-29T16:20:00Z,heat,auto,728,730,730,VT,Newfane,50,False,False,False,Gas
31052,e9042a7ed72dc1ec47946a7b3e214edd1d26090f,2017-01-29T15:55:00Z,heat,auto,710,730,730,VT,Newfane,50,False,False,False,Gas
31053,e9042a7ed72dc1ec47946a7b3e214edd1d26090f,2017-01-29T16:05:00Z,heat,auto,723,730,730,VT,Newfane,50,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
08e9d7365bb8f03903827359fb15dc779f0d72c8,Jan,2017,heat,hold,Stowe,672.75,680.0,680.0,15.0,False,False,False
0ddcca6e74610999ee10239e72330c797274efa8,Jan,2017,auto,hold,South Burlington,701.0,780.0,733.5,5.0,False,False,False
0ddcca6e74610999ee10239e72330c797274efa8,Jan,2017,heat,hold,South Burlington,691.0,750.0,750.0,5.0,False,False,False
0e51eab7b259c251d7b77f7174b1bee53fb83ff3,Jan,2017,heat,auto,westfield,647.872065,767.616194,655.378947,55.0,False,False,False
0e51eab7b259c251d7b77f7174b1bee53fb83ff3,Jan,2017,heat,hold,westfield,668.363971,677.058824,677.058824,55.0,False,False,False
13ac09831b625d07ec7e86d8d3961168f6204fe6,Jan,2017,auto,hold,South Burlington,667.956522,815.869565,649.608696,5.0,False,False,False
13ac09831b625d07ec7e86d8d3961168f6204fe6,Jan,2017,heat,hold,South Burlington,677.0,750.0,750.0,5.0,False,False,False
1ab5d6516027bf2645cf937303b886cde871a36e,Jan,2017,heat,auto,Hinesburg,642.666667,665.666667,638.611111,35.0,False,False,False
1ab5d6516027bf2645cf937303b886cde871a36e,Jan,2017,heat,hold,Hinesburg,645.166667,694.166667,669.166667,35.0,False,False,False
202b20f2fdcb124df820af1fd71955548fab26ff,Jan,2017,heat,auto,South Burlington,649.413401,650.035398,650.035398,0.0,True,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/VT/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/VT-day/2018-jan-day-VT.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9d377e2a176dfe7292b9ca0b5e145dadc364e4a9,2018-01-16 19:35:00 UTC,heat,hold,673,675,675,UT,Wellsville,0,False,False,False,Gas
1,7edc4e0e5f8a8d2959c82ec9345484ea1c326957,2018-01-04 14:40:00 UTC,heat,hold,659,665,665,UT,Midway,10,False,False,False,Gas
2,7edc4e0e5f8a8d2959c82ec9345484ea1c326957,2018-01-03 14:10:00 UTC,heat,hold,656,655,655,UT,Midway,10,False,False,False,Gas
3,7edc4e0e5f8a8d2959c82ec9345484ea1c326957,2018-01-08 16:40:00 UTC,heat,hold,666,665,665,UT,Midway,10,False,False,False,Gas
5,afc12a7d91651a4c8b7e0a2e75ac1635daae0cec,2018-01-30 14:00:00 UTC,auto,auto,683,840,670,UT,Riverton,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
668647,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2018-01-02 14:00:00 UTC,heat,hold,755,760,760,UT,West Valley City,47,False,False,False,Gas
668648,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2018-01-05 19:00:00 UTC,heat,hold,755,760,760,UT,West Valley City,47,False,False,False,Gas
668649,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2018-01-02 13:35:00 UTC,heat,hold,753,760,760,UT,West Valley City,47,False,False,False,Gas
668650,ed614b030c7fc181cc41e1b46c7f166fe6ba8c13,2018-01-04 15:00:00 UTC,heat,hold,754,760,760,UT,West Valley City,47,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/VT/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/VT-day/2019-jan-day-VT.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8c9560256e24bd3bd9cc777165a1c0ecd1fc1e9d,2019-01-06 19:25:00 UTC,heat,hold,749,750,750,VT,Winooski,105,False,False,False,Gas
2,f6c46fd20d2c1b73d78b31025e35646280dee31b,2019-01-13 15:25:00 UTC,heat,hold,676,699,668,VT,Rutland,50,True,False,False,Gas
3,8c9560256e24bd3bd9cc777165a1c0ecd1fc1e9d,2019-01-13 18:10:00 UTC,heat,auto,771,730,780,VT,Winooski,105,False,False,False,Gas
5,46f977bf6b32f3c8c3bce774e4fd6eda8d92db21,2019-01-13 18:05:00 UTC,heat,auto,679,696,678,VT,Peru,5,False,False,True,Electric
6,fb9e6433ec69f11def9740170e6b089149814b76,2019-01-28 11:30:00 UTC,heat,hold,702,762,666,VT,Burlington,55,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124018,6f61787f517726a2f49e4b8fc65b4eda0ae311a2,2019-01-19 15:40:00 UTC,heat,auto,739,740,740,VT,Colchester,0,False,False,False,Gas
124019,6f61787f517726a2f49e4b8fc65b4eda0ae311a2,2019-01-31 19:30:00 UTC,heat,auto,735,740,740,VT,Colchester,0,False,False,False,Gas
124020,6f61787f517726a2f49e4b8fc65b4eda0ae311a2,2019-01-19 14:40:00 UTC,heat,auto,743,740,740,VT,Colchester,0,False,False,False,Gas
124021,6f61787f517726a2f49e4b8fc65b4eda0ae311a2,2019-01-19 16:30:00 UTC,heat,auto,734,740,740,VT,Colchester,0,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/VT/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/VT-day/2020-jan-day-VT.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,10c71a921bd6171381c0ee1080795b3cd0e8299d,2020-01-11 12:15:00 UTC,heat,auto,658,656,623,VT,Plymouth,70,False,False,False,Gas
1,f0e456d9578ab0c4553079e8c0abdd06d0a24936,2020-01-21 16:00:00 UTC,heat,hold,710,775,689,VT,Stowe,30,False,False,False,Gas
2,740b64e1cafcfbc98a8ee89cd0a518eee4163aa1,2020-01-18 13:25:00 UTC,heat,hold,633,679,677,VT,Peru,5,False,False,True,Electric
3,e0e23265d37b59b96750ef2355a86e25695673b7,2020-01-01 17:45:00 UTC,heat,hold,689,745,667,VT,Woodstock,20,False,False,False,Gas
4,463e091043b222b3e88af924abd32a8b3e5042e2,2020-01-30 19:45:00 UTC,heat,hold,677,714,658,VT,Thetford,120,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129099,fae2077e3bd726479b0d4f7381810a11ddfd6945,2020-01-05 14:50:00 UTC,heat,hold,716,740,740,VT,Concord,0,False,False,False,Gas
129100,fae2077e3bd726479b0d4f7381810a11ddfd6945,2020-01-06 18:25:00 UTC,heat,hold,740,740,740,VT,Concord,0,False,False,False,Gas
129101,fae2077e3bd726479b0d4f7381810a11ddfd6945,2020-01-06 13:50:00 UTC,heat,hold,729,740,740,VT,Concord,0,False,False,False,Gas
129102,fae2077e3bd726479b0d4f7381810a11ddfd6945,2020-01-05 18:10:00 UTC,heat,hold,723,740,740,VT,Concord,0,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/VT/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/VT-day/2021-jan-day-VT.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,647073dd061dfdbda60d6ebe18579d91c09c5323,2021-01-06 18:45:00 UTC,heat,hold,683,685,685,VT,Milton,15,False,False,False,Gas
1,647073dd061dfdbda60d6ebe18579d91c09c5323,2021-01-08 19:15:00 UTC,heat,hold,683,685,685,VT,Milton,15,False,False,False,Gas
2,0bff4db5b845e75add7155a51150efdad848627f,2021-01-22 19:20:00 UTC,heat,hold,740,780,780,VT,Ludlow,88,False,False,False,Gas
3,e0e23265d37b59b96750ef2355a86e25695673b7,2021-01-02 14:10:00 UTC,heat,hold,682,710,674,VT,Woodstock,20,False,False,False,Gas
4,463e091043b222b3e88af924abd32a8b3e5042e2,2021-01-24 15:00:00 UTC,heat,hold,663,703,682,VT,Thetford,120,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88278,f23a83d45499bc989a9f6bf289c0ae1606b4f60c,2021-01-24 13:35:00 UTC,heat,hold,717,740,740,VT,Hartford,0,True,False,False,Gas
88279,f23a83d45499bc989a9f6bf289c0ae1606b4f60c,2021-01-02 14:55:00 UTC,heat,hold,735,740,740,VT,Hartford,0,True,False,False,Gas
88280,f23a83d45499bc989a9f6bf289c0ae1606b4f60c,2021-01-02 15:45:00 UTC,heat,hold,739,740,740,VT,Hartford,0,True,False,False,Gas
88281,f23a83d45499bc989a9f6bf289c0ae1606b4f60c,2021-01-02 12:55:00 UTC,heat,hold,707,740,740,VT,Hartford,0,True,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/VT/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/VT/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
VT_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/VT/jan/" + file)
    VT_jan = pd.concat([VT_jan, df])
    
VT_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,08e9d7365bb8f03903827359fb15dc779f0d72c8,Jan,2017,heat,hold,Stowe,672.750000,680.000000,680.000000,15.0,False,False,False
1,0ddcca6e74610999ee10239e72330c797274efa8,Jan,2017,auto,hold,South Burlington,701.000000,780.000000,733.500000,5.0,False,False,False
2,0ddcca6e74610999ee10239e72330c797274efa8,Jan,2017,heat,hold,South Burlington,691.000000,750.000000,750.000000,5.0,False,False,False
3,0e51eab7b259c251d7b77f7174b1bee53fb83ff3,Jan,2017,heat,auto,westfield,647.872065,767.616194,655.378947,55.0,False,False,False
4,0e51eab7b259c251d7b77f7174b1bee53fb83ff3,Jan,2017,heat,hold,westfield,668.363971,677.058824,677.058824,55.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,fae2077e3bd726479b0d4f7381810a11ddfd6945,Jan,2021,heat,hold,Concord,697.392591,700.000000,700.000000,0.0,False,False,False
79,fb25c89e80abf37ee43b1d4c3b1aa4aaf4e948fb,Jan,2021,heat,hold,Underhill Center,651.400560,659.249300,655.144258,30.0,False,False,False
80,fb5559f697fdfaa37875b00d94ac7a600e0d864c,Jan,2021,heat,hold,Brownsville,668.243902,672.658537,672.658537,99.0,False,False,True
81,fb5f7cfad04c7e8e661e103399b09700809a96fb,Jan,2021,heat,hold,Woodstock,671.948113,688.825472,688.113208,20.0,False,False,False


In [34]:
VT_jan.to_csv("Scraper_Output/State_Month_Day/VT/VT_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/VT-day/2017-feb-day-VT.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e9042a7ed72dc1ec47946a7b3e214edd1d26090f,2017-02-14T17:40:00Z,heat,hold,710,706,706,VT,Newfane,50,False,False,False,Gas
1,e9042a7ed72dc1ec47946a7b3e214edd1d26090f,2017-02-14T15:10:00Z,heat,hold,706,706,706,VT,Newfane,50,False,False,False,Gas
2,e9042a7ed72dc1ec47946a7b3e214edd1d26090f,2017-02-14T18:25:00Z,heat,hold,712,706,706,VT,Newfane,50,False,False,False,Gas
3,e9042a7ed72dc1ec47946a7b3e214edd1d26090f,2017-02-10T16:20:00Z,heat,hold,701,696,696,VT,Newfane,50,False,False,False,Gas
4,e9042a7ed72dc1ec47946a7b3e214edd1d26090f,2017-02-14T13:50:00Z,heat,hold,703,706,706,VT,Newfane,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29220,0e51eab7b259c251d7b77f7174b1bee53fb83ff3,2017-02-11T09:45:00Z,heat,hold,712,720,720,VT,westfield,55,False,False,False,Gas
29221,0e51eab7b259c251d7b77f7174b1bee53fb83ff3,2017-02-11T10:55:00Z,heat,hold,718,720,720,VT,westfield,55,False,False,False,Gas
29222,0e51eab7b259c251d7b77f7174b1bee53fb83ff3,2017-02-11T07:15:00Z,heat,hold,718,720,720,VT,westfield,55,False,False,False,Gas
29223,0e51eab7b259c251d7b77f7174b1bee53fb83ff3,2017-02-11T10:00:00Z,heat,hold,712,720,720,VT,westfield,55,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/VT/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/VT-day/2018-feb-day-VT.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
4,6a5c72f744242e0fe1b5c89734fb53e692a08655,2018-02-24 12:55:00 UTC,heat,auto,698,728,665,VT,Charlotte,120,False,False,False,Gas
7,463e091043b222b3e88af924abd32a8b3e5042e2,2018-02-25 17:05:00 UTC,heat,hold,662,702,702,VT,Thetford,120,False,False,False,Gas
14,e0e23265d37b59b96750ef2355a86e25695673b7,2018-02-25 19:00:00 UTC,heat,hold,683,766,664,VT,Woodstock,20,False,False,False,Gas
15,e99523eee5fe9681d563152bd286b13ef86eb975,2018-02-24 17:00:00 UTC,heat,auto,681,740,692,VT,Braintree,30,False,False,False,Gas
35,e0e23265d37b59b96750ef2355a86e25695673b7,2018-02-21 17:20:00 UTC,heat,hold,669,747,657,VT,Woodstock,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82431,e44893029fdab02205cf581b038d85c7d63535cc,2018-02-18 12:20:00 UTC,heat,hold,726,740,740,VT,wilmington,10,False,False,False,Gas
82432,e44893029fdab02205cf581b038d85c7d63535cc,2018-02-18 16:00:00 UTC,heat,hold,753,740,740,VT,wilmington,10,False,False,False,Gas
82433,e44893029fdab02205cf581b038d85c7d63535cc,2018-02-19 19:05:00 UTC,heat,hold,736,740,740,VT,wilmington,10,False,False,False,Gas
82434,e44893029fdab02205cf581b038d85c7d63535cc,2018-02-19 14:05:00 UTC,heat,hold,741,740,740,VT,wilmington,10,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/VT/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/VT-day/2019-feb-day-VT.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1ca3b950e95b5c6a276165eff5b341271c21f12d,2019-02-02 18:25:00 UTC,heat,auto,700,760,685,VT,Williston North,30,True,False,False,Gas
1,a99b7e08d0379ed03cb1de3ed8a6ff5fb0e30ead,2019-02-03 15:20:00 UTC,heat,hold,666,688,664,VT,Burlington,105,True,False,False,Gas
2,612fe095174db97db714495219be142a32832056,2019-02-17 14:45:00 UTC,heat,hold,669,675,675,VT,Mount Holly,50,False,False,False,Gas
3,6523b6fc085e8cf35bbf6e26d50dedb349511a7b,2019-02-09 17:50:00 UTC,heat,auto,698,688,688,VT,Colchester,20,True,False,False,Gas
4,2a41fbabce7e2de5b4604808f90017db24700f6e,2019-02-10 13:00:00 UTC,heat,auto,613,650,641,VT,Waterville,120,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
86960,f23a83d45499bc989a9f6bf289c0ae1606b4f60c,2019-02-04 11:15:00 UTC,heat,hold,739,740,740,VT,Hartford,0,True,False,False,Gas
86961,f23a83d45499bc989a9f6bf289c0ae1606b4f60c,2019-02-04 11:10:00 UTC,heat,hold,737,740,740,VT,Hartford,0,True,False,False,Gas
86962,f23a83d45499bc989a9f6bf289c0ae1606b4f60c,2019-02-04 11:10:00 UTC,heat,hold,737,740,740,VT,Hartford,0,True,False,False,Gas
86963,f23a83d45499bc989a9f6bf289c0ae1606b4f60c,2019-02-13 13:15:00 UTC,heat,hold,686,740,740,VT,Hartford,0,True,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/VT/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/VT-day/2020-feb-day-VT.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6a5c72f744242e0fe1b5c89734fb53e692a08655,2020-02-04 13:40:00 UTC,heat,auto,689,736,655,VT,Charlotte,120,False,False,False,Gas
1,6aa9c20beb412b4653fc173a9f0e985c5aa96c8e,2020-02-09 18:10:00 UTC,heat,auto,676,650,679,VT,Proctor,110,True,False,False,Gas
2,1e85c47c9e77a77a67e16e53865aed852d284e23,2020-02-26 11:40:00 UTC,heat,auto,635,694,694,VT,Thetford Center,60,True,False,False,Gas
3,0bff4db5b845e75add7155a51150efdad848627f,2020-02-02 13:55:00 UTC,heat,hold,691,650,641,VT,Ludlow,88,False,False,False,Gas
4,1e85c47c9e77a77a67e16e53865aed852d284e23,2020-02-26 11:40:00 UTC,heat,auto,635,694,694,VT,Thetford Center,60,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
119763,fae2077e3bd726479b0d4f7381810a11ddfd6945,2020-02-20 13:10:00 UTC,heat,auto,716,740,740,VT,Concord,0,False,False,False,Gas
119764,fae2077e3bd726479b0d4f7381810a11ddfd6945,2020-02-20 11:05:00 UTC,heat,auto,719,740,740,VT,Concord,0,False,False,False,Gas
119765,fae2077e3bd726479b0d4f7381810a11ddfd6945,2020-02-22 07:35:00 UTC,heat,auto,728,740,740,VT,Concord,0,False,False,False,Gas
119766,fae2077e3bd726479b0d4f7381810a11ddfd6945,2020-02-20 19:15:00 UTC,heat,auto,738,740,740,VT,Concord,0,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/VT/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/VT-day/2021-feb-day-VT.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ca7bd09b630d8b445a2802cc54e5b3610f3faf66,2021-02-18 19:15:00 UTC,heat,hold,697,774,702,VT,Saint Albans,30,True,False,False,Gas
1,e0e23265d37b59b96750ef2355a86e25695673b7,2021-02-02 14:30:00 UTC,heat,hold,686,721,679,VT,Woodstock,20,False,False,False,Gas
2,647073dd061dfdbda60d6ebe18579d91c09c5323,2021-02-05 14:45:00 UTC,heat,hold,688,685,685,VT,Milton,15,False,False,False,Gas
5,e0e23265d37b59b96750ef2355a86e25695673b7,2021-02-12 17:15:00 UTC,heat,hold,699,699,687,VT,Woodstock,20,False,False,False,Gas
6,647073dd061dfdbda60d6ebe18579d91c09c5323,2021-02-06 15:25:00 UTC,heat,hold,687,685,685,VT,Milton,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73211,2ba5bba25731e900128c57ec9f931dd3ac5dd750,2021-02-02 14:35:00 UTC,heat,hold,687,690,690,VT,Weston,15,False,False,False,Gas
73212,2ba5bba25731e900128c57ec9f931dd3ac5dd750,2021-02-01 12:15:00 UTC,heat,hold,681,690,690,VT,Weston,15,False,False,False,Gas
73213,2ba5bba25731e900128c57ec9f931dd3ac5dd750,2021-02-02 19:10:00 UTC,heat,hold,700,690,690,VT,Weston,15,False,False,False,Gas
73214,2ba5bba25731e900128c57ec9f931dd3ac5dd750,2021-02-01 19:30:00 UTC,heat,hold,682,690,690,VT,Weston,15,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/VT/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/VT/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
VT_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/VT/feb/" + file)
    VT_feb = pd.concat([VT_feb, df])
    
VT_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,08e5ea6a99f568b84b88969934ce6fd54f4a8359,feb,2017,heat,auto,Bethel,703.135854,800.000000,638.571429,0.0,False,False,False
1,08e5ea6a99f568b84b88969934ce6fd54f4a8359,feb,2017,heat,hold,Bethel,714.000516,661.018576,661.018576,0.0,False,False,False
2,0e51eab7b259c251d7b77f7174b1bee53fb83ff3,feb,2017,heat,auto,westfield,650.822004,724.374776,658.271020,55.0,False,False,False
3,0e51eab7b259c251d7b77f7174b1bee53fb83ff3,feb,2017,heat,hold,westfield,681.716418,687.246269,687.246269,55.0,False,False,False
4,13ac09831b625d07ec7e86d8d3961168f6204fe6,feb,2017,auto,hold,South Burlington,659.793103,820.344828,650.000000,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,fb25c89e80abf37ee43b1d4c3b1aa4aaf4e948fb,feb,2021,heat,hold,Underhill Center,648.278571,655.682143,652.783929,30.0,False,False,False
72,fb5559f697fdfaa37875b00d94ac7a600e0d864c,feb,2021,cool,hold,Brownsville,692.833333,670.333333,661.000000,99.0,False,False,True
73,fb5559f697fdfaa37875b00d94ac7a600e0d864c,feb,2021,heat,hold,Brownsville,697.571429,730.428571,721.142857,99.0,False,False,True
74,fb5f7cfad04c7e8e661e103399b09700809a96fb,feb,2021,heat,hold,Woodstock,676.497409,691.393782,691.129534,20.0,False,False,False


In [67]:
VT_feb.to_csv("Scraper_Output/State_Month_Day/VT/VT_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/VT-day/2017-jun-day-VT.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7cf01f9297d88d3d00700bf97cf8051cd99241f8,2017-06-06 19:50:00 UTC,heat,hold,693,695,695,VT,Woodford,10,False,False,True,Electric
1,b55bdc8ba9fb07580b1e047e052a9b017d45c626,2017-06-17 12:40:00 UTC,heat,hold,655,722,722,VT,Jericho,27,False,False,False,Gas
2,7cf01f9297d88d3d00700bf97cf8051cd99241f8,2017-06-06 19:45:00 UTC,heat,hold,694,695,695,VT,Woodford,10,False,False,True,Electric
3,7cf01f9297d88d3d00700bf97cf8051cd99241f8,2017-06-06 19:30:00 UTC,heat,hold,693,695,695,VT,Woodford,10,False,False,True,Electric
4,7cf01f9297d88d3d00700bf97cf8051cd99241f8,2017-06-06 19:20:00 UTC,heat,hold,685,695,695,VT,Woodford,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37230,b1e5ee0e5f54079b59e15528166e6b515e6c597c,2017-06-25 17:40:00 UTC,cool,hold,742,740,740,VT,Richmond,50,False,False,False,Gas
37231,b1e5ee0e5f54079b59e15528166e6b515e6c597c,2017-06-25 16:25:00 UTC,cool,hold,736,740,740,VT,Richmond,50,False,False,False,Gas
37232,b1e5ee0e5f54079b59e15528166e6b515e6c597c,2017-06-25 15:50:00 UTC,cool,hold,744,740,740,VT,Richmond,50,False,False,False,Gas
37233,b1e5ee0e5f54079b59e15528166e6b515e6c597c,2017-06-25 18:25:00 UTC,cool,hold,740,740,740,VT,Richmond,50,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/VT/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/VT-day/2018-jun-day-VT.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,51e33fc01a3f8c3a3b2d7e60c49dfe4593119b0d,2018-06-01 09:40:00 UTC,cool,hold,731,742,742,VT,Williston,20,False,False,False,Gas
1,51e33fc01a3f8c3a3b2d7e60c49dfe4593119b0d,2018-06-01 10:15:00 UTC,cool,hold,732,742,742,VT,Williston,20,False,False,False,Gas
2,51e33fc01a3f8c3a3b2d7e60c49dfe4593119b0d,2018-06-01 11:05:00 UTC,cool,hold,733,742,742,VT,Williston,20,False,False,False,Gas
3,028cb525d267c3428a3b3c872643302a06d19401,2018-06-15 13:30:00 UTC,heat,hold,693,770,754,VT,Thetford,120,False,False,False,Gas
4,51e33fc01a3f8c3a3b2d7e60c49dfe4593119b0d,2018-06-01 09:50:00 UTC,cool,hold,732,742,742,VT,Williston,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72488,b1e5ee0e5f54079b59e15528166e6b515e6c597c,2018-06-29 19:55:00 UTC,cool,hold,765,740,740,VT,Richmond,50,False,False,False,Gas
72489,b1e5ee0e5f54079b59e15528166e6b515e6c597c,2018-06-16 17:15:00 UTC,cool,hold,735,740,740,VT,Richmond,50,False,False,False,Gas
72490,b1e5ee0e5f54079b59e15528166e6b515e6c597c,2018-06-16 17:35:00 UTC,cool,hold,740,740,740,VT,Richmond,50,False,False,False,Gas
72491,b1e5ee0e5f54079b59e15528166e6b515e6c597c,2018-06-16 18:05:00 UTC,cool,hold,746,740,740,VT,Richmond,50,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/VT/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/VT-day/2019-jun-day-VT.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7cf01f9297d88d3d00700bf97cf8051cd99241f8,2019-06-02 18:05:00 UTC,cool,auto,717,705,655,VT,Woodford,10,False,False,True,Electric
1,6a5c72f744242e0fe1b5c89734fb53e692a08655,2019-06-05 13:25:00 UTC,heat,auto,716,735,667,VT,Charlotte,120,False,False,False,Gas
3,7cf01f9297d88d3d00700bf97cf8051cd99241f8,2019-06-02 11:05:00 UTC,cool,auto,683,705,655,VT,Woodford,10,False,False,True,Electric
4,6a5c72f744242e0fe1b5c89734fb53e692a08655,2019-06-25 16:00:00 UTC,heat,auto,718,731,681,VT,Charlotte,120,False,False,False,Gas
5,647073dd061dfdbda60d6ebe18579d91c09c5323,2019-06-19 19:00:00 UTC,cool,auto,752,750,750,VT,Milton,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90055,d58be700034341c053836c3763723e9a90b90ed2,2019-06-13 13:55:00 UTC,cool,hold,709,740,740,VT,Essex Junction,55,False,False,False,Gas
90056,d58be700034341c053836c3763723e9a90b90ed2,2019-06-12 15:45:00 UTC,cool,hold,726,740,740,VT,Essex Junction,55,False,False,False,Gas
90057,d58be700034341c053836c3763723e9a90b90ed2,2019-06-12 13:10:00 UTC,cool,hold,706,740,740,VT,Essex Junction,55,False,False,False,Gas
90058,d58be700034341c053836c3763723e9a90b90ed2,2019-06-12 16:15:00 UTC,cool,hold,728,740,740,VT,Essex Junction,55,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/VT/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/VT-day/2020-jun-day-VT.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
201,7fa021bd983c34f07a6ab6cb5cf89b2a51fdc387,2020-06-26 19:20:00 UTC,heat,auto,710,650,610,VT,Bethel,0,False,False,False,Gas
202,7fa021bd983c34f07a6ab6cb5cf89b2a51fdc387,2020-06-21 15:50:00 UTC,heat,auto,785,650,610,VT,Bethel,0,False,False,False,Gas
203,7fa021bd983c34f07a6ab6cb5cf89b2a51fdc387,2020-06-25 10:45:00 UTC,heat,auto,659,650,610,VT,Bethel,0,False,False,False,Gas
204,7fa021bd983c34f07a6ab6cb5cf89b2a51fdc387,2020-06-21 11:20:00 UTC,heat,auto,726,650,610,VT,Bethel,0,False,False,False,Gas
205,7fa021bd983c34f07a6ab6cb5cf89b2a51fdc387,2020-06-20 15:15:00 UTC,heat,auto,723,650,610,VT,Bethel,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81110,662953f85593ffcb52ca9d8569604ec908481b51,2020-06-28 08:45:00 UTC,cool,hold,649,650,650,VT,South Burlington,0,False,False,False,Gas
81111,662953f85593ffcb52ca9d8569604ec908481b51,2020-06-16 07:45:00 UTC,cool,hold,650,650,650,VT,South Burlington,0,False,False,False,Gas
81112,662953f85593ffcb52ca9d8569604ec908481b51,2020-06-21 09:35:00 UTC,cool,hold,651,650,650,VT,South Burlington,0,False,False,False,Gas
81113,662953f85593ffcb52ca9d8569604ec908481b51,2020-06-03 08:15:00 UTC,cool,hold,643,650,650,VT,South Burlington,0,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/VT/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/VT-day/2021-jun-day-VT.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,647073dd061dfdbda60d6ebe18579d91c09c5323,2021-06-06 12:50:00 UTC,cool,hold,748,750,750,VT,Milton,15,False,False,False,Gas
1,647073dd061dfdbda60d6ebe18579d91c09c5323,2021-06-06 13:30:00 UTC,cool,hold,752,750,750,VT,Milton,15,False,False,False,Gas
2,fb5559f697fdfaa37875b00d94ac7a600e0d864c,2021-06-07 17:40:00 UTC,cool,hold,740,741,741,VT,Brownsville,99,False,False,True,Electric
3,fb5559f697fdfaa37875b00d94ac7a600e0d864c,2021-06-07 18:15:00 UTC,cool,hold,746,741,741,VT,Brownsville,99,False,False,True,Electric
4,647073dd061dfdbda60d6ebe18579d91c09c5323,2021-06-06 13:45:00 UTC,cool,hold,754,750,750,VT,Milton,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50952,d58be700034341c053836c3763723e9a90b90ed2,2021-06-09 12:50:00 UTC,cool,hold,746,740,740,VT,Essex Junction,55,False,False,False,Gas
50953,d58be700034341c053836c3763723e9a90b90ed2,2021-06-09 13:55:00 UTC,cool,hold,740,740,740,VT,Essex Junction,55,False,False,False,Gas
50954,d58be700034341c053836c3763723e9a90b90ed2,2021-06-09 12:45:00 UTC,cool,hold,749,740,740,VT,Essex Junction,55,False,False,False,Gas
50955,d58be700034341c053836c3763723e9a90b90ed2,2021-06-09 11:10:00 UTC,cool,hold,740,740,740,VT,Essex Junction,55,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/VT/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/VT/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
VT_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/VT/jun/" + file)
    VT_jun = pd.concat([VT_jun, df])
    
VT_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,08e5ea6a99f568b84b88969934ce6fd54f4a8359,jun,2017,heat,auto,Bethel,769.149254,680.000000,670.000000,0.0,False,False,False
1,0ddcca6e74610999ee10239e72330c797274efa8,jun,2017,auto,hold,South Burlington,747.066667,740.000000,630.000000,5.0,False,False,False
2,1cc2b77c1f9dd8971c8050cb5604220405ec8404,jun,2017,auto,auto,Colchester,706.246085,704.995526,654.995526,0.0,True,False,False
3,1cc2b77c1f9dd8971c8050cb5604220405ec8404,jun,2017,auto,hold,Colchester,713.229851,719.820896,663.014925,0.0,True,False,False
4,202b20f2fdcb124df820af1fd71955548fab26ff,jun,2017,auto,hold,South Burlington,662.876761,694.000000,604.000000,0.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
32,f0da960a3fa374f1f719439150052e38c531d274,jun,2021,heat,hold,Hartland,706.868311,650.000000,640.000000,0.0,False,False,False
33,f23a83d45499bc989a9f6bf289c0ae1606b4f60c,jun,2021,heat,hold,Hartford,681.296025,662.963389,646.174686,0.0,True,False,False
34,fb25c89e80abf37ee43b1d4c3b1aa4aaf4e948fb,jun,2021,heat,hold,Underhill Center,692.355408,650.000000,620.000000,30.0,False,False,False
35,fb5559f697fdfaa37875b00d94ac7a600e0d864c,jun,2021,cool,hold,Brownsville,741.208333,745.562500,744.062500,99.0,False,False,True


In [100]:
VT_jun.to_csv("Scraper_Output/State_Month_Day/VT/VT_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/VT-day/2017-jul-day-VT.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0ddcca6e74610999ee10239e72330c797274efa8,2017-07-30 18:40:00 UTC,auto,auto,755,725,675,VT,South Burlington,5,False,False,False,Gas
3,13ac09831b625d07ec7e86d8d3961168f6204fe6,2017-07-30 19:40:00 UTC,auto,hold,736,793,642,VT,South Burlington,5,False,False,False,Gas
4,0e51eab7b259c251d7b77f7174b1bee53fb83ff3,2017-07-02 18:10:00 UTC,heat,auto,757,786,627,VT,westfield,55,False,False,False,Gas
5,662953f85593ffcb52ca9d8569604ec908481b51,2017-07-08 15:40:00 UTC,cool,hold,670,694,652,VT,South Burlington,0,False,False,False,Gas
6,662953f85593ffcb52ca9d8569604ec908481b51,2017-07-09 11:10:00 UTC,cool,hold,657,677,653,VT,South Burlington,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38296,1cc2b77c1f9dd8971c8050cb5604220405ec8404,2017-07-22 18:30:00 UTC,auto,hold,721,720,650,VT,Colchester,0,True,False,False,Gas
38297,1cc2b77c1f9dd8971c8050cb5604220405ec8404,2017-07-16 16:15:00 UTC,auto,hold,718,720,650,VT,Colchester,0,True,False,False,Gas
38298,1cc2b77c1f9dd8971c8050cb5604220405ec8404,2017-07-18 17:55:00 UTC,auto,hold,721,720,650,VT,Colchester,0,True,False,False,Gas
38299,1cc2b77c1f9dd8971c8050cb5604220405ec8404,2017-07-10 18:40:00 UTC,auto,hold,719,720,650,VT,Colchester,0,True,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/VT/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/VT-day/2018-jul-day-VT.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bbe5199cf833df39722e4f7e78091f2d5b945fd5,2018-07-07 11:45:00 UTC,cool,hold,705,760,760,VT,Williston,17,False,False,False,Gas
1,bbe5199cf833df39722e4f7e78091f2d5b945fd5,2018-07-07 15:00:00 UTC,cool,hold,710,760,760,VT,Williston,17,False,False,False,Gas
2,028cb525d267c3428a3b3c872643302a06d19401,2018-07-31 17:05:00 UTC,heat,hold,706,750,750,VT,Thetford,120,False,False,False,Gas
3,bbe5199cf833df39722e4f7e78091f2d5b945fd5,2018-07-07 16:15:00 UTC,cool,hold,733,760,760,VT,Williston,17,False,False,False,Gas
4,bbe5199cf833df39722e4f7e78091f2d5b945fd5,2018-07-07 16:35:00 UTC,cool,hold,736,760,760,VT,Williston,17,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74822,fb5f7cfad04c7e8e661e103399b09700809a96fb,2018-07-30 15:00:00 UTC,cool,hold,703,740,740,VT,Woodstock,20,False,False,False,Gas
74823,fb5f7cfad04c7e8e661e103399b09700809a96fb,2018-07-30 15:35:00 UTC,cool,hold,708,740,740,VT,Woodstock,20,False,False,False,Gas
74824,fb5f7cfad04c7e8e661e103399b09700809a96fb,2018-07-30 14:50:00 UTC,cool,hold,702,740,740,VT,Woodstock,20,False,False,False,Gas
74825,fb5f7cfad04c7e8e661e103399b09700809a96fb,2018-07-30 15:10:00 UTC,cool,hold,704,740,740,VT,Woodstock,20,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/VT/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/VT-day/2019-jul-day-VT.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bbe5199cf833df39722e4f7e78091f2d5b945fd5,2019-07-22 16:35:00 UTC,cool,hold,778,780,780,VT,Williston,17,False,False,False,Gas
2,46f977bf6b32f3c8c3bce774e4fd6eda8d92db21,2019-07-06 18:45:00 UTC,cool,hold,768,780,755,VT,Peru,5,False,False,True,Electric
3,bbe5199cf833df39722e4f7e78091f2d5b945fd5,2019-07-23 10:25:00 UTC,cool,hold,753,780,780,VT,Williston,17,False,False,False,Gas
4,bbe5199cf833df39722e4f7e78091f2d5b945fd5,2019-07-29 15:50:00 UTC,cool,hold,770,780,780,VT,Williston,17,False,False,False,Gas
5,bbe5199cf833df39722e4f7e78091f2d5b945fd5,2019-07-15 13:10:00 UTC,cool,hold,751,760,760,VT,Williston,17,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97220,fb5f7cfad04c7e8e661e103399b09700809a96fb,2019-07-18 15:10:00 UTC,cool,hold,683,690,680,VT,Woodstock,20,False,False,False,Gas
97221,0ddcca6e74610999ee10239e72330c797274efa8,2019-07-10 10:50:00 UTC,auto,hold,738,760,690,VT,South Burlington,5,False,False,False,Gas
97222,0ddcca6e74610999ee10239e72330c797274efa8,2019-07-23 16:55:00 UTC,auto,hold,753,800,690,VT,South Burlington,5,False,False,False,Gas
97223,0ddcca6e74610999ee10239e72330c797274efa8,2019-07-23 16:30:00 UTC,auto,hold,751,800,690,VT,South Burlington,5,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/VT/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/VT-day/2020-jul-day-VT.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f8be51699465a30909d988ecfea0c8d244103c4f,2020-07-11 10:25:00 UTC,cool,auto,787,770,770,VT,Bristol,10,True,False,True,Electric
1,f8be51699465a30909d988ecfea0c8d244103c4f,2020-07-11 10:35:00 UTC,cool,auto,786,770,770,VT,Bristol,10,True,False,True,Electric
2,fb5559f697fdfaa37875b00d94ac7a600e0d864c,2020-07-25 18:25:00 UTC,cool,hold,718,715,715,VT,Brownsville,99,False,False,True,Electric
3,fb5559f697fdfaa37875b00d94ac7a600e0d864c,2020-07-28 19:40:00 UTC,cool,hold,727,735,735,VT,Brownsville,99,False,False,True,Electric
4,740b64e1cafcfbc98a8ee89cd0a518eee4163aa1,2020-07-25 19:50:00 UTC,cool,hold,763,760,760,VT,Peru,5,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74986,568ea4ab739671edacf364f79bcecd1c69ae5ce9,2020-07-28 14:50:00 UTC,cool,hold,735,740,740,VT,Peru,5,False,False,True,Electric
74987,568ea4ab739671edacf364f79bcecd1c69ae5ce9,2020-07-30 14:00:00 UTC,cool,auto,743,740,740,VT,Peru,5,False,False,True,Electric
74988,568ea4ab739671edacf364f79bcecd1c69ae5ce9,2020-07-11 14:40:00 UTC,cool,hold,715,740,740,VT,Peru,5,False,False,True,Electric
74989,568ea4ab739671edacf364f79bcecd1c69ae5ce9,2020-07-30 14:25:00 UTC,cool,auto,739,740,740,VT,Peru,5,False,False,True,Electric


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/VT/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/VT-day/2021-jul-day-VT.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fb5559f697fdfaa37875b00d94ac7a600e0d864c,2021-07-27 18:50:00 UTC,cool,hold,727,711,711,VT,Brownsville,99,False,False,True,Electric
1,fb5559f697fdfaa37875b00d94ac7a600e0d864c,2021-07-27 19:40:00 UTC,cool,hold,715,711,711,VT,Brownsville,99,False,False,True,Electric
2,fb5559f697fdfaa37875b00d94ac7a600e0d864c,2021-07-27 18:00:00 UTC,cool,hold,752,711,711,VT,Brownsville,99,False,False,True,Electric
3,51e33fc01a3f8c3a3b2d7e60c49dfe4593119b0d,2021-07-10 13:35:00 UTC,cool,hold,711,725,698,VT,Williston,20,False,False,False,Gas
4,fb5559f697fdfaa37875b00d94ac7a600e0d864c,2021-07-27 19:25:00 UTC,cool,hold,712,711,711,VT,Brownsville,99,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52549,0ddcca6e74610999ee10239e72330c797274efa8,2021-07-27 16:05:00 UTC,auto,hold,781,760,690,VT,South Burlington,5,False,False,False,Gas
52550,0ddcca6e74610999ee10239e72330c797274efa8,2021-07-27 14:00:00 UTC,auto,hold,769,760,690,VT,South Burlington,5,False,False,False,Gas
52551,0ddcca6e74610999ee10239e72330c797274efa8,2021-07-27 18:00:00 UTC,auto,hold,781,760,690,VT,South Burlington,5,False,False,False,Gas
52552,0ddcca6e74610999ee10239e72330c797274efa8,2021-07-27 16:45:00 UTC,auto,hold,781,760,690,VT,South Burlington,5,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/VT/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/VT/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
VT_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/VT/jul/" + file)
    VT_jul = pd.concat([VT_jul, df])
    
VT_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,08e5ea6a99f568b84b88969934ce6fd54f4a8359,jul,2017,heat,auto,Bethel,773.991228,680.000000,670.000000,0.0,False,False,False
1,0ddcca6e74610999ee10239e72330c797274efa8,jul,2017,auto,auto,South Burlington,742.192308,748.221154,668.971154,5.0,False,False,False
2,0ddcca6e74610999ee10239e72330c797274efa8,jul,2017,auto,hold,South Burlington,738.533333,747.600000,639.333333,5.0,False,False,False
3,0ddcca6e74610999ee10239e72330c797274efa8,jul,2017,cool,hold,South Burlington,765.750000,730.000000,730.000000,5.0,False,False,False
4,0e51eab7b259c251d7b77f7174b1bee53fb83ff3,jul,2017,heat,auto,westfield,758.517730,780.113475,649.567376,55.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17,e38e8fa6c8b52a7e30d0b2f5eb78f25d92edafd8,jul,2021,cool,hold,Essex,698.365854,701.780488,701.646341,5.0,True,False,False
18,f0da960a3fa374f1f719439150052e38c531d274,jul,2021,heat,hold,Hartland,668.539474,650.000000,640.000000,0.0,False,False,False
19,fb25c89e80abf37ee43b1d4c3b1aa4aaf4e948fb,jul,2021,heat,hold,Underhill Center,673.931034,650.000000,649.310345,30.0,False,False,False
20,fb5559f697fdfaa37875b00d94ac7a600e0d864c,jul,2021,cool,hold,Brownsville,726.250000,711.625000,710.000000,99.0,False,False,True


In [133]:
VT_jul.to_csv("Scraper_Output/State_Month_Day/VT/VT_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/VT-day/2017-aug-day-VT.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6f76dee396823efaa4928dd76539ab3f46489ee6,2017-08-25T19:15:00Z,heat,auto,733,660,660,VT,Bethel,0,False,False,False,Gas
1,6f76dee396823efaa4928dd76539ab3f46489ee6,2017-08-05T10:55:00Z,heat,auto,757,660,660,VT,Bethel,0,False,False,False,Gas
2,6f76dee396823efaa4928dd76539ab3f46489ee6,2017-08-18T15:50:00Z,heat,auto,724,660,660,VT,Bethel,0,False,False,False,Gas
3,6f76dee396823efaa4928dd76539ab3f46489ee6,2017-08-19T10:45:00Z,heat,auto,734,660,660,VT,Bethel,0,False,False,False,Gas
4,6f76dee396823efaa4928dd76539ab3f46489ee6,2017-08-14T18:20:00Z,heat,auto,784,660,660,VT,Bethel,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32015,1cc2b77c1f9dd8971c8050cb5604220405ec8404,2017-08-07T12:10:00Z,auto,hold,704,700,650,VT,Colchester,0,True,False,False,Gas
32016,1cc2b77c1f9dd8971c8050cb5604220405ec8404,2017-08-01T18:40:00Z,auto,hold,704,700,650,VT,Colchester,0,True,False,False,Gas
32017,1cc2b77c1f9dd8971c8050cb5604220405ec8404,2017-08-22T11:10:00Z,auto,hold,714,710,650,VT,Colchester,0,True,False,False,Gas
32018,1cc2b77c1f9dd8971c8050cb5604220405ec8404,2017-08-31T18:40:00Z,auto,hold,700,700,650,VT,Colchester,0,True,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/VT/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/VT-day/2018-aug-day-VT.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,028cb525d267c3428a3b3c872643302a06d19401,2018-08-02 15:50:00 UTC,heat,hold,730,770,770,VT,Thetford,120,False,False,False,Gas
1,028cb525d267c3428a3b3c872643302a06d19401,2018-08-02 15:40:00 UTC,heat,hold,728,770,770,VT,Thetford,120,False,False,False,Gas
2,028cb525d267c3428a3b3c872643302a06d19401,2018-08-03 16:10:00 UTC,heat,hold,720,741,658,VT,Thetford,120,False,False,False,Gas
3,028cb525d267c3428a3b3c872643302a06d19401,2018-08-02 14:10:00 UTC,heat,hold,717,770,770,VT,Thetford,120,False,False,False,Gas
4,028cb525d267c3428a3b3c872643302a06d19401,2018-08-02 14:45:00 UTC,heat,hold,724,770,770,VT,Thetford,120,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73091,0ddcca6e74610999ee10239e72330c797274efa8,2018-08-22 15:10:00 UTC,auto,hold,734,740,690,VT,South Burlington,5,False,False,False,Gas
73092,0ddcca6e74610999ee10239e72330c797274efa8,2018-08-22 13:10:00 UTC,auto,hold,743,740,690,VT,South Burlington,5,False,False,False,Gas
73093,0ddcca6e74610999ee10239e72330c797274efa8,2018-08-15 17:10:00 UTC,auto,auto,763,760,690,VT,South Burlington,5,False,False,False,Gas
73094,0ddcca6e74610999ee10239e72330c797274efa8,2018-08-22 14:10:00 UTC,auto,hold,729,740,690,VT,South Burlington,5,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/VT/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/VT-day/2019-aug-day-VT.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bbe5199cf833df39722e4f7e78091f2d5b945fd5,2019-08-01 15:50:00 UTC,cool,hold,773,780,780,VT,Williston,17,False,False,False,Gas
1,bbe5199cf833df39722e4f7e78091f2d5b945fd5,2019-08-01 10:30:00 UTC,cool,hold,754,780,780,VT,Williston,17,False,False,False,Gas
2,bbe5199cf833df39722e4f7e78091f2d5b945fd5,2019-08-14 19:50:00 UTC,cool,hold,773,780,780,VT,Williston,17,False,False,False,Gas
3,bbe5199cf833df39722e4f7e78091f2d5b945fd5,2019-08-30 10:20:00 UTC,cool,hold,743,780,780,VT,Williston,17,False,False,False,Gas
4,bbe5199cf833df39722e4f7e78091f2d5b945fd5,2019-08-20 09:40:00 UTC,cool,hold,757,780,780,VT,Williston,17,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77691,fb5f7cfad04c7e8e661e103399b09700809a96fb,2019-08-29 14:20:00 UTC,cool,hold,695,715,715,VT,Woodstock,20,False,False,False,Gas
77692,fb5f7cfad04c7e8e661e103399b09700809a96fb,2019-08-28 16:10:00 UTC,cool,hold,726,735,735,VT,Woodstock,20,False,False,False,Gas
77693,fb5f7cfad04c7e8e661e103399b09700809a96fb,2019-08-22 15:45:00 UTC,cool,hold,709,695,695,VT,Woodstock,20,False,False,False,Gas
77694,fb5f7cfad04c7e8e661e103399b09700809a96fb,2019-08-22 15:40:00 UTC,cool,hold,707,695,695,VT,Woodstock,20,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/VT/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/VT-day/2020-aug-day-VT.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fb5559f697fdfaa37875b00d94ac7a600e0d864c,2020-08-11 12:30:00 UTC,cool,hold,742,745,745,VT,Brownsville,99,False,False,True,Electric
1,fb5559f697fdfaa37875b00d94ac7a600e0d864c,2020-08-11 13:55:00 UTC,cool,hold,749,745,745,VT,Brownsville,99,False,False,True,Electric
2,fb5559f697fdfaa37875b00d94ac7a600e0d864c,2020-08-10 19:10:00 UTC,cool,hold,747,735,735,VT,Brownsville,99,False,False,True,Electric
3,fb5559f697fdfaa37875b00d94ac7a600e0d864c,2020-08-10 19:50:00 UTC,cool,hold,736,735,735,VT,Brownsville,99,False,False,True,Electric
4,fb5559f697fdfaa37875b00d94ac7a600e0d864c,2020-08-01 19:55:00 UTC,cool,hold,733,735,735,VT,Brownsville,99,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68271,d58be700034341c053836c3763723e9a90b90ed2,2020-08-15 16:25:00 UTC,cool,hold,741,740,740,VT,Essex Junction,55,False,False,False,Gas
68272,d58be700034341c053836c3763723e9a90b90ed2,2020-08-16 11:30:00 UTC,cool,hold,739,740,740,VT,Essex Junction,55,False,False,False,Gas
68273,d58be700034341c053836c3763723e9a90b90ed2,2020-08-15 16:35:00 UTC,cool,hold,735,740,740,VT,Essex Junction,55,False,False,False,Gas
68274,d58be700034341c053836c3763723e9a90b90ed2,2020-08-16 11:25:00 UTC,cool,hold,738,740,740,VT,Essex Junction,55,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/VT/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/VT/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
VT_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/VT/aug/" + file)
    VT_aug = pd.concat([VT_aug, df])
    
VT_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0ddcca6e74610999ee10239e72330c797274efa8,aug,2017,auto,auto,South Burlington,742.555556,754.444444,631.666667,5.0,False,False,False
1,0e51eab7b259c251d7b77f7174b1bee53fb83ff3,aug,2017,heat,auto,westfield,743.811321,780.226415,649.141509,55.0,False,False,False
2,1cc2b77c1f9dd8971c8050cb5604220405ec8404,aug,2017,auto,hold,Colchester,708.294367,708.367959,650.026606,0.0,True,False,False
3,202b20f2fdcb124df820af1fd71955548fab26ff,aug,2017,cool,hold,South Burlington,714.122807,716.153846,716.153846,0.0,True,False,False
4,27f0ecb02e0966f44199b02555ae4662c9cbf6d2,aug,2017,heat,hold,Essex,682.000000,754.000000,640.000000,50.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
35,e38e8fa6c8b52a7e30d0b2f5eb78f25d92edafd8,aug,2020,cool,auto,Essex,646.058824,651.176471,650.882353,5.0,True,False,False
36,e38e8fa6c8b52a7e30d0b2f5eb78f25d92edafd8,aug,2020,cool,hold,Essex,689.973684,687.815789,687.657895,5.0,True,False,False
37,f8be51699465a30909d988ecfea0c8d244103c4f,aug,2020,cool,hold,Bristol,775.000000,777.000000,692.000000,10.0,True,False,True
38,fb5559f697fdfaa37875b00d94ac7a600e0d864c,aug,2020,cool,hold,Brownsville,746.445946,738.243243,737.905405,99.0,False,False,True


In [160]:
VT_aug.to_csv("Scraper_Output/State_Month_Day/VT/VT_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/VT-day/2017-dec-day-VT.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d462513f2ab60ff6e7adb3b1856da797689dca90,2017-12-25 14:55:00 UTC,heat,hold,663,750,750,VT,Burlington,60,False,False,False,Gas
1,d462513f2ab60ff6e7adb3b1856da797689dca90,2017-12-22 17:25:00 UTC,heat,hold,579,750,750,VT,Burlington,60,False,False,False,Gas
2,96fa575c91d1b6b302557250abefde9f8d00252f,2017-12-29 18:00:00 UTC,heat,auto,687,702,642,VT,Burlington,60,True,False,False,Gas
3,d462513f2ab60ff6e7adb3b1856da797689dca90,2017-12-22 17:35:00 UTC,heat,hold,580,750,750,VT,Burlington,60,False,False,False,Gas
4,d462513f2ab60ff6e7adb3b1856da797689dca90,2017-12-25 15:15:00 UTC,heat,hold,668,750,750,VT,Burlington,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67821,6f61787f517726a2f49e4b8fc65b4eda0ae311a2,2017-12-19 12:35:00 UTC,heat,auto,742,740,740,VT,Colchester,0,False,False,False,Gas
67822,6f61787f517726a2f49e4b8fc65b4eda0ae311a2,2017-12-26 16:35:00 UTC,heat,auto,736,740,740,VT,Colchester,0,False,False,False,Gas
67823,6f61787f517726a2f49e4b8fc65b4eda0ae311a2,2017-12-21 16:35:00 UTC,heat,auto,728,740,740,VT,Colchester,0,False,False,False,Gas
67824,6f61787f517726a2f49e4b8fc65b4eda0ae311a2,2017-12-18 12:45:00 UTC,heat,auto,741,740,740,VT,Colchester,0,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/VT/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/VT-day/2018-dec-day-VT.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5b7f5dbb625ed5102fc3e91ac27731652483eafd,2018-12-16 18:55:00 UTC,heat,auto,608,760,760,VT,Newfane,0,False,False,False,Gas
9,ca7bd09b630d8b445a2802cc54e5b3610f3faf66,2018-12-25 15:00:00 UTC,heat,auto,657,788,684,VT,Saint Albans,30,True,False,False,Gas
12,028cb525d267c3428a3b3c872643302a06d19401,2018-12-06 15:05:00 UTC,heat,auto,662,715,675,VT,Thetford,120,False,False,False,Gas
17,10c71a921bd6171381c0ee1080795b3cd0e8299d,2018-12-19 12:55:00 UTC,heat,hold,648,682,655,VT,Plymouth,70,False,False,False,Gas
19,6a5c72f744242e0fe1b5c89734fb53e692a08655,2018-12-19 15:30:00 UTC,heat,auto,694,734,657,VT,Charlotte,120,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112327,2ba5bba25731e900128c57ec9f931dd3ac5dd750,2018-12-23 18:45:00 UTC,heat,hold,694,690,690,VT,Weston,15,False,False,False,Gas
112328,2ba5bba25731e900128c57ec9f931dd3ac5dd750,2018-12-24 12:05:00 UTC,heat,hold,664,690,690,VT,Weston,15,False,False,False,Gas
112329,2ba5bba25731e900128c57ec9f931dd3ac5dd750,2018-12-23 18:25:00 UTC,heat,hold,695,690,690,VT,Weston,15,False,False,False,Gas
112330,2ba5bba25731e900128c57ec9f931dd3ac5dd750,2018-12-24 12:25:00 UTC,heat,hold,657,690,690,VT,Weston,15,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/VT/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/VT-day/2019-dec-day-VT.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
4,fb5559f697fdfaa37875b00d94ac7a600e0d864c,2019-12-01 12:20:00 UTC,heat,hold,688,700,695,VT,Brownsville,99,False,False,True,Electric
5,8c9560256e24bd3bd9cc777165a1c0ecd1fc1e9d,2019-12-19 12:35:00 UTC,heat,auto,726,780,760,VT,Winooski,105,False,False,False,Gas
8,8c9560256e24bd3bd9cc777165a1c0ecd1fc1e9d,2019-12-20 13:15:00 UTC,heat,hold,717,780,780,VT,Winooski,105,False,False,False,Gas
9,8c9560256e24bd3bd9cc777165a1c0ecd1fc1e9d,2019-12-14 17:15:00 UTC,heat,hold,744,750,750,VT,Winooski,105,False,False,False,Gas
11,8c9560256e24bd3bd9cc777165a1c0ecd1fc1e9d,2019-12-20 09:50:00 UTC,heat,hold,713,780,780,VT,Winooski,105,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123289,6f76dee396823efaa4928dd76539ab3f46489ee6,2019-12-14 16:15:00 UTC,heat,auto,653,730,730,VT,Bethel,0,False,False,False,Gas
123290,6f76dee396823efaa4928dd76539ab3f46489ee6,2019-12-14 16:10:00 UTC,heat,auto,657,730,730,VT,Bethel,0,False,False,False,Gas
123291,d58be700034341c053836c3763723e9a90b90ed2,2019-12-26 16:00:00 UTC,heat,hold,733,740,740,VT,Essex Junction,55,False,False,False,Gas
123292,d58be700034341c053836c3763723e9a90b90ed2,2019-12-26 15:55:00 UTC,heat,hold,736,740,740,VT,Essex Junction,55,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/VT/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/VT-day/2020-dec-day-VT.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2a41fbabce7e2de5b4604808f90017db24700f6e,2020-12-17 12:05:00 UTC,heat,auto,572,650,603,VT,Waterville,120,False,False,False,Gas
1,647073dd061dfdbda60d6ebe18579d91c09c5323,2020-12-07 16:10:00 UTC,heat,hold,678,693,682,VT,Milton,15,False,False,False,Gas
2,112d47ef77218a577e255d5857a6c719b7870f74,2020-12-15 12:35:00 UTC,heat,auto,618,702,604,VT,Londonderry,40,False,False,False,Gas
3,f2c4a89bc84b59cc6c62e423b3b5e281135c565f,2020-12-17 18:50:00 UTC,heat,hold,618,650,615,VT,Jericho,25,False,False,False,Gas
4,6a5c72f744242e0fe1b5c89734fb53e692a08655,2020-12-12 14:45:00 UTC,heat,auto,718,722,713,VT,Charlotte,120,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
107192,d58be700034341c053836c3763723e9a90b90ed2,2020-12-15 16:45:00 UTC,heat,hold,740,740,740,VT,Essex Junction,55,False,False,False,Gas
107193,d58be700034341c053836c3763723e9a90b90ed2,2020-12-15 17:00:00 UTC,heat,hold,733,740,740,VT,Essex Junction,55,False,False,False,Gas
107194,d58be700034341c053836c3763723e9a90b90ed2,2020-12-27 16:20:00 UTC,heat,hold,735,740,740,VT,Essex Junction,55,False,False,False,Gas
107195,d58be700034341c053836c3763723e9a90b90ed2,2020-12-27 15:25:00 UTC,heat,hold,737,740,740,VT,Essex Junction,55,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/VT/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/VT/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
VT_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/VT/dec/" + file)
    VT_dec = pd.concat([VT_dec, df])
    
VT_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,028cb525d267c3428a3b3c872643302a06d19401,dec,2017,heat,auto,Thetford,671.437500,665.718750,665.718750,120.0,False,False,False
1,028cb525d267c3428a3b3c872643302a06d19401,dec,2017,heat,hold,Thetford,679.799233,672.654731,669.159847,120.0,False,False,False
2,05baf6c0241b635d98e2d73b929c8e0f81a0e025,dec,2017,heat,hold,Hartland,615.022876,650.000000,620.552288,0.0,False,False,False
3,08e9d7365bb8f03903827359fb15dc779f0d72c8,dec,2017,heat,auto,Stowe,618.785714,653.476190,649.047619,15.0,False,False,False
4,08e9d7365bb8f03903827359fb15dc779f0d72c8,dec,2017,heat,hold,Stowe,667.090129,670.388412,669.873391,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
118,fae2077e3bd726479b0d4f7381810a11ddfd6945,dec,2020,heat,hold,Concord,697.585558,700.000000,700.000000,0.0,False,False,False
119,fb25c89e80abf37ee43b1d4c3b1aa4aaf4e948fb,dec,2020,heat,hold,Underhill Center,655.586177,663.127734,660.666667,30.0,False,False,False
120,fb5559f697fdfaa37875b00d94ac7a600e0d864c,dec,2020,heat,hold,Brownsville,666.211538,670.000000,670.000000,99.0,False,False,True
121,fb5f7cfad04c7e8e661e103399b09700809a96fb,dec,2020,heat,hold,Woodstock,680.178694,688.694158,688.309278,20.0,False,False,False


In [187]:
VT_dec.to_csv("Scraper_Output/State_Month_Day/VT/VT_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/VT/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
VT_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/VT/" + file)
    VT_all = pd.concat([VT_all, df])
    
VT_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0ddcca6e74610999ee10239e72330c797274efa8,aug,2017,auto,auto,South Burlington,742.555556,754.444444,631.666667,5.0,False,False,False
1,0e51eab7b259c251d7b77f7174b1bee53fb83ff3,aug,2017,heat,auto,westfield,743.811321,780.226415,649.141509,55.0,False,False,False
2,1cc2b77c1f9dd8971c8050cb5604220405ec8404,aug,2017,auto,hold,Colchester,708.294367,708.367959,650.026606,0.0,True,False,False
3,202b20f2fdcb124df820af1fd71955548fab26ff,aug,2017,cool,hold,South Burlington,714.122807,716.153846,716.153846,0.0,True,False,False
4,27f0ecb02e0966f44199b02555ae4662c9cbf6d2,aug,2017,heat,hold,Essex,682.000000,754.000000,640.000000,50.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
248,f0da960a3fa374f1f719439150052e38c531d274,jun,2021,heat,hold,Hartland,706.868311,650.000000,640.000000,0.0,False,False,False
249,f23a83d45499bc989a9f6bf289c0ae1606b4f60c,jun,2021,heat,hold,Hartford,681.296025,662.963389,646.174686,0.0,True,False,False
250,fb25c89e80abf37ee43b1d4c3b1aa4aaf4e948fb,jun,2021,heat,hold,Underhill Center,692.355408,650.000000,620.000000,30.0,False,False,False
251,fb5559f697fdfaa37875b00d94ac7a600e0d864c,jun,2021,cool,hold,Brownsville,741.208333,745.562500,744.062500,99.0,False,False,True


In [190]:
VT_all.to_csv("Scraper_Output/State_Month_Day/VT_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mVTe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['VT']
Unique jan_2018: ['UT']
Unique jan_2019: ['VT']
Unique jan_2020: ['VT']
Unique jan_2021: ['VT']
Unique feb_2017: ['VT']
Unique feb_2018: ['VT']
Unique feb_2019: ['VT']
Unique feb_2020: ['VT']
Unique feb_2021: ['VT']
Unique jun_2017: ['VT']
Unique jun_2018: ['VT']
Unique jun_2019: ['VT']
Unique jun_2020: ['VT']
Unique jun_2021: ['VT']
Unique jul_2017: ['VT']
Unique jul_2018: ['VT']
Unique jul_2019: ['VT']
Unique jul_2020: ['VT']
Unique jul_2021: ['VT']
Unique aug_2017: ['VT']
Unique aug_2018: ['VT']
Unique aug_2019: ['VT']
Unique aug_2020: ['VT']
Unique dec_2017: ['VT']
Unique dec_2018: ['VT']
Unique dec_2019: ['VT']
Unique dec_2020: ['VT']
