# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/RI-day/2017-jan-day-RI.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,c89cedc1970a3c7c112f0c3ef6b9d44c1580ae21,2017-01-25 13:10:00 UTC,heat,hold,678,604,604,RI,North Providence,0,False,False,False,Gas
3,7357379d344c770382ed31acdade35404877f5bc,2017-01-08 19:05:00 UTC,heat,hold,709,705,705,RI,Saunderstown,0,False,False,False,Gas
7,b8a6f71c92f065a9b83cd74befddd352daaee8e8,2017-01-15 14:15:00 UTC,heat,hold,636,703,676,RI,little compton,100,False,False,False,Gas
10,46ff6890fc030ff1235216ea5f81bc678a60ac0e,2017-01-02 12:55:00 UTC,heat,auto,658,758,658,RI,west Kingston,6,False,False,False,Gas
14,c89cedc1970a3c7c112f0c3ef6b9d44c1580ae21,2017-01-28 12:20:00 UTC,heat,auto,636,643,643,RI,North Providence,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59749,d9c7063a1c71443c09fd63f8963de6421d71244a,2017-01-25 19:10:00 UTC,heat,auto,724,730,730,RI,Smithfield,50,False,False,False,Gas
59750,535a9c94b64b2fa126c2624a815388df3c3f8961,2017-01-03 19:40:00 UTC,heat,hold,648,740,740,RI,Providence,100,False,False,False,Gas
59751,535a9c94b64b2fa126c2624a815388df3c3f8961,2017-01-03 19:50:00 UTC,heat,hold,649,740,740,RI,Providence,100,False,False,False,Gas
59752,aeb8ef59c11451a782c9a87bd22f7c5af26d2813,2017-01-13 12:15:00 UTC,heat,auto,743,750,750,RI,Providence,80,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
01bf3b3095111308e495a2e3010d8476e1c64af5,Jan,2017,heat,hold,west Kingston,645.500000,650.666667,640.000000,6.0,False,False,False
0acff50a5a8097d75e4524bdcd367521bda0cf1a,Jan,2017,heat,auto,Barrington,682.434783,679.282609,678.478261,0.0,False,False,False
0acff50a5a8097d75e4524bdcd367521bda0cf1a,Jan,2017,heat,hold,Barrington,689.077199,685.493716,684.127469,0.0,False,False,False
11dd99d2a0bcb3f35a2e0525c002274f96786c98,Jan,2017,heat,auto,Providence,646.756757,657.459459,648.527027,80.0,False,False,False
11dd99d2a0bcb3f35a2e0525c002274f96786c98,Jan,2017,heat,hold,Providence,676.288591,682.429530,681.463087,80.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
d9c7063a1c71443c09fd63f8963de6421d71244a,Jan,2017,heat,auto,Smithfield,662.552601,776.028902,669.857803,50.0,False,False,False
d9c7063a1c71443c09fd63f8963de6421d71244a,Jan,2017,heat,hold,Smithfield,702.879310,707.000000,706.413793,50.0,False,False,False
df339d7cf1668af9d81a79c076c27f667f429a7a,Jan,2017,heat,hold,Rumford,704.257703,705.714286,705.714286,100.0,False,False,False
f79fcef90217289f4b16beddd98e1a6eb0f11cca,Jan,2017,heat,auto,Pawtucket,650.500000,786.000000,651.000000,120.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/RI/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/RI-day/2018-jan-day-RI.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
3,ac0c54a9b8ef724d22170d864d06e8428609127f,2018-01-07 11:50:00 UTC,heat,hold,644,650,646,RI,Charlestown,40,False,False,False,Gas
4,f9588bea77a2c55f88af50b9a84848ae4aa67c2a,2018-01-06 19:45:00 UTC,heat,auto,630,824,626,RI,Newport,117,False,False,False,Gas
6,8fe0f9a040a97f0f6757c8aa0d609180ae49ea86,2018-01-19 10:45:00 UTC,heat,auto,636,652,645,RI,Smithfield,55,False,False,False,Gas
7,f9588bea77a2c55f88af50b9a84848ae4aa67c2a,2018-01-07 19:35:00 UTC,heat,auto,612,824,626,RI,Newport,117,False,False,False,Gas
8,d0f5ba4d97460e9e4650ebc82236b33d29ddfa14,2018-01-13 16:10:00 UTC,heat,hold,658,660,635,RI,Tiverton,10,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170412,d07c8c7911b4b7c928fc3f5cb8ec7c5f575f1535,2018-01-04 15:15:00 UTC,auto,hold,731,810,750,RI,South Kingstown,0,True,False,False,Gas
170413,d07c8c7911b4b7c928fc3f5cb8ec7c5f575f1535,2018-01-05 15:25:00 UTC,auto,hold,744,810,750,RI,South Kingstown,0,True,False,False,Gas
170414,d07c8c7911b4b7c928fc3f5cb8ec7c5f575f1535,2018-01-05 14:55:00 UTC,auto,hold,735,810,750,RI,South Kingstown,0,True,False,False,Gas
170415,64cd1bf9f469dafaaf35abad098e531eb49a2485,2018-01-04 10:40:00 UTC,heat,auto,697,760,760,RI,Warwick,70,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/RI/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/RI-day/2019-jan-day-RI.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,3677d45087c07eb494e9e694b7f39acd726da3f1,2019-01-06 13:45:00 UTC,heat,hold,684,694,694,RI,East Providence,80,False,False,False,Gas
2,3677d45087c07eb494e9e694b7f39acd726da3f1,2019-01-03 18:15:00 UTC,heat,hold,657,661,661,RI,East Providence,80,False,False,False,Gas
4,99e1203de45567a9a837a9b66f14f4fb1f752c76,2019-01-19 14:10:00 UTC,heat,hold,673,665,665,RI,East Greenwich,30,False,False,False,Gas
5,75c8a43d66aa1dc686e2e3aefab9d39fb4562982,2019-01-21 13:05:00 UTC,heat,auto,731,797,722,RI,North Providence,65,False,False,False,Gas
6,a427133f8f1d562e256e4d565e3e1d6e915465c0,2019-01-12 17:00:00 UTC,heat,hold,583,650,603,RI,Charlestown,45,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209461,b3056c664d197b3a00ec3867e86cab6a3a96177e,2019-01-12 12:15:00 UTC,heat,hold,644,760,760,RI,Providence,0,True,False,True,Electric
209462,b3056c664d197b3a00ec3867e86cab6a3a96177e,2019-01-12 13:55:00 UTC,heat,hold,628,760,760,RI,Providence,0,True,False,True,Electric
209463,b3056c664d197b3a00ec3867e86cab6a3a96177e,2019-01-12 12:55:00 UTC,heat,hold,636,760,760,RI,Providence,0,True,False,True,Electric
209464,b3056c664d197b3a00ec3867e86cab6a3a96177e,2019-01-12 13:45:00 UTC,heat,hold,630,760,760,RI,Providence,0,True,False,True,Electric


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/RI/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/RI-day/2020-jan-day-RI.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4513e2aeab4b19a34bf7dd0b96c4791593b67126,2020-01-22 17:50:00 UTC,heat,hold,667,669,669,RI,Rumford,70,False,False,False,Gas
1,d9196496fc544c3b087d43b8ca05c921c9b88f66,2020-01-12 13:45:00 UTC,heat,hold,713,715,715,RI,Ashaway,9,True,False,False,Gas
2,d9196496fc544c3b087d43b8ca05c921c9b88f66,2020-01-28 13:00:00 UTC,heat,hold,706,715,715,RI,Ashaway,9,True,False,False,Gas
3,4513e2aeab4b19a34bf7dd0b96c4791593b67126,2020-01-23 15:35:00 UTC,heat,hold,645,650,649,RI,Rumford,70,False,False,False,Gas
4,4513e2aeab4b19a34bf7dd0b96c4791593b67126,2020-01-20 18:20:00 UTC,heat,hold,647,650,649,RI,Rumford,70,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
215026,9d07a89fa0a4c926c307d011e029b0bcdc9d51ee,2020-01-03 16:05:00 UTC,auto,auto,704,810,760,RI,Cumberland,9,True,False,True,Electric
215027,9d07a89fa0a4c926c307d011e029b0bcdc9d51ee,2020-01-03 15:15:00 UTC,auto,auto,690,810,760,RI,Cumberland,9,True,False,True,Electric
215028,9d07a89fa0a4c926c307d011e029b0bcdc9d51ee,2020-01-03 16:15:00 UTC,auto,auto,710,810,760,RI,Cumberland,9,True,False,True,Electric
215029,9d07a89fa0a4c926c307d011e029b0bcdc9d51ee,2020-01-28 19:40:00 UTC,auto,hold,705,810,760,RI,Cumberland,9,True,False,True,Electric


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/RI/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/RI-day/2021-jan-day-RI.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6a30712b3b65b82f38b33a47059c2da1597f2d88,2021-01-24 18:25:00 UTC,heat,hold,697,702,702,RI,cranston,5,False,False,False,Gas
1,46ff6890fc030ff1235216ea5f81bc678a60ac0e,2021-01-21 16:35:00 UTC,heat,hold,687,701,701,RI,west Kingston,6,False,False,False,Gas
2,bafe7824ca9bc4e156a45cb524c69b1b2827f381,2021-01-28 19:10:00 UTC,heat,hold,722,715,715,RI,East Greenwich,50,False,False,False,Gas
3,bafe7824ca9bc4e156a45cb524c69b1b2827f381,2021-01-29 19:10:00 UTC,heat,hold,708,715,715,RI,East Greenwich,50,False,False,False,Gas
4,46ff6890fc030ff1235216ea5f81bc678a60ac0e,2021-01-21 17:30:00 UTC,heat,hold,702,701,701,RI,west Kingston,6,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123700,9d07a89fa0a4c926c307d011e029b0bcdc9d51ee,2021-01-19 19:10:00 UTC,auto,hold,743,810,760,RI,Cumberland,9,True,False,True,Electric
123701,319d959abbee94a38cba224532f6e2c84abf3991,2021-01-07 16:30:00 UTC,heat,hold,743,760,760,RI,North Providence,65,False,False,False,Gas
123702,319d959abbee94a38cba224532f6e2c84abf3991,2021-01-10 19:40:00 UTC,heat,hold,757,760,760,RI,North Providence,65,False,False,False,Gas
123703,9d07a89fa0a4c926c307d011e029b0bcdc9d51ee,2021-01-31 19:45:00 UTC,auto,hold,756,810,760,RI,Cumberland,9,True,False,True,Electric


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/RI/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/RI/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
RI_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/RI/jan/" + file)
    RI_jan = pd.concat([RI_jan, df])
    
RI_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01bf3b3095111308e495a2e3010d8476e1c64af5,Jan,2017,heat,hold,west Kingston,645.500000,650.666667,640.000000,6.0,False,False,False
1,0acff50a5a8097d75e4524bdcd367521bda0cf1a,Jan,2017,heat,auto,Barrington,682.434783,679.282609,678.478261,0.0,False,False,False
2,0acff50a5a8097d75e4524bdcd367521bda0cf1a,Jan,2017,heat,hold,Barrington,689.077199,685.493716,684.127469,0.0,False,False,False
3,11dd99d2a0bcb3f35a2e0525c002274f96786c98,Jan,2017,heat,auto,Providence,646.756757,657.459459,648.527027,80.0,False,False,False
4,11dd99d2a0bcb3f35a2e0525c002274f96786c98,Jan,2017,heat,hold,Providence,676.288591,682.429530,681.463087,80.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
130,ef4b276779457e8e202b48c505042f4347cf85ce,Jan,2021,heat,hold,North Providence,704.135843,721.129296,720.392799,65.0,False,False,False
131,f03399e99ea6fe83b27f31df27484274083f21d0,Jan,2021,auto,hold,South Kingstown,685.866667,760.000000,700.000000,25.0,False,False,False
132,f4e3e661c5dec7d0358f19a9d028b363998774ee,Jan,2021,auto,hold,East Greenwich,696.166942,753.516529,701.030579,50.0,False,False,False
133,f6500193235d26119ad38051383b8feb9669fe00,Jan,2021,heat,hold,Exeter,651.708824,654.744118,654.567647,30.0,True,False,False


In [34]:
RI_jan.to_csv("Scraper_Output/State_Month_Day/RI/RI_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/RI-day/2017-feb-day-RI.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7357379d344c770382ed31acdade35404877f5bc,2017-02-04 19:00:00 UTC,heat,hold,714,705,705,RI,Saunderstown,0,False,False,False,Gas
6,453dd0e5c2fe540c4199e9a7aaef3b729700f34c,2017-02-05 14:45:00 UTC,heat,auto,712,782,716,RI,Cranston,9,True,False,False,Gas
11,7357379d344c770382ed31acdade35404877f5bc,2017-02-18 18:05:00 UTC,heat,hold,726,705,705,RI,Saunderstown,0,False,False,False,Gas
14,7357379d344c770382ed31acdade35404877f5bc,2017-02-04 16:30:00 UTC,heat,hold,715,705,705,RI,Saunderstown,0,False,False,False,Gas
16,7357379d344c770382ed31acdade35404877f5bc,2017-02-18 19:00:00 UTC,heat,hold,722,705,705,RI,Saunderstown,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54061,d9c7063a1c71443c09fd63f8963de6421d71244a,2017-02-17 17:30:00 UTC,heat,auto,744,750,750,RI,Smithfield,50,False,False,False,Gas
54062,d9c7063a1c71443c09fd63f8963de6421d71244a,2017-02-17 19:55:00 UTC,heat,auto,750,750,750,RI,Smithfield,50,False,False,False,Gas
54063,d9c7063a1c71443c09fd63f8963de6421d71244a,2017-02-17 19:30:00 UTC,heat,auto,744,750,750,RI,Smithfield,50,False,False,False,Gas
54064,d9c7063a1c71443c09fd63f8963de6421d71244a,2017-02-17 17:55:00 UTC,heat,auto,743,750,750,RI,Smithfield,50,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/RI/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/RI-day/2018-feb-day-RI.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a427133f8f1d562e256e4d565e3e1d6e915465c0,2018-02-04 12:00:00 UTC,heat,hold,644,650,645,RI,Charlestown,45,False,False,False,Gas
2,99e1203de45567a9a837a9b66f14f4fb1f752c76,2018-02-21 12:30:00 UTC,heat,auto,678,650,673,RI,East Greenwich,30,False,False,False,Gas
4,a427133f8f1d562e256e4d565e3e1d6e915465c0,2018-02-04 12:25:00 UTC,heat,hold,648,650,645,RI,Charlestown,45,False,False,False,Gas
6,7357379d344c770382ed31acdade35404877f5bc,2018-02-17 18:35:00 UTC,heat,hold,712,680,665,RI,Saunderstown,0,False,False,False,Gas
7,99e1203de45567a9a837a9b66f14f4fb1f752c76,2018-02-14 12:55:00 UTC,heat,auto,681,650,648,RI,East Greenwich,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
155362,2a8377b8f2815b1a02fd7ffded9477808ffa2ba7,2018-02-16 18:25:00 UTC,heat,hold,747,750,750,RI,Cranston,10,False,False,False,Gas
155363,2a8377b8f2815b1a02fd7ffded9477808ffa2ba7,2018-02-02 15:45:00 UTC,heat,auto,749,750,750,RI,Cranston,10,False,False,False,Gas
155364,ef4b276779457e8e202b48c505042f4347cf85ce,2018-02-03 13:25:00 UTC,heat,hold,666,760,760,RI,North Providence,65,False,False,False,Gas
155365,ef4b276779457e8e202b48c505042f4347cf85ce,2018-02-03 12:40:00 UTC,heat,hold,659,760,760,RI,North Providence,65,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/RI/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/RI-day/2019-feb-day-RI.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,4513e2aeab4b19a34bf7dd0b96c4791593b67126,2019-02-26 12:40:00 UTC,heat,hold,647,650,643,RI,Rumford,70,False,False,False,Gas
2,df123a08b42c12d70d6bc6dc12a32585f8c6de25,2019-02-25 12:00:00 UTC,heat,auto,658,705,605,RI,Cumberland,0,False,False,False,Gas
3,4513e2aeab4b19a34bf7dd0b96c4791593b67126,2019-02-05 13:25:00 UTC,heat,hold,658,683,683,RI,Rumford,70,False,False,False,Gas
5,4513e2aeab4b19a34bf7dd0b96c4791593b67126,2019-02-28 12:30:00 UTC,heat,hold,653,655,655,RI,Rumford,70,False,False,False,Gas
6,95cf6c6b232e359965314849d19c8c26e7cd5b6d,2019-02-15 18:40:00 UTC,heat,hold,656,665,665,RI,Coventry,70,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
142856,b030451472675d2843724c1ea1e2df2386daf60c,2019-02-22 11:10:00 UTC,heat,hold,715,750,750,RI,Bristol,35,False,False,False,Gas
142857,b030451472675d2843724c1ea1e2df2386daf60c,2019-02-12 10:55:00 UTC,heat,hold,686,750,750,RI,Bristol,35,False,False,False,Gas
142858,b030451472675d2843724c1ea1e2df2386daf60c,2019-02-22 11:25:00 UTC,heat,hold,722,750,750,RI,Bristol,35,False,False,False,Gas
142859,b030451472675d2843724c1ea1e2df2386daf60c,2019-02-12 11:25:00 UTC,heat,hold,701,750,750,RI,Bristol,35,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/RI/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/RI-day/2020-feb-day-RI.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,196e996641e82efd4749acda6ffbc674009f4c05,2020-02-16 19:25:00 UTC,heat,auto,685,733,638,RI,Coventry,10,True,False,False,Gas
2,569360bd8c70cc4ecf6707d3325719356e9e4dd6,2020-02-02 13:30:00 UTC,heat,auto,661,665,665,RI,Tiverton,0,False,False,False,Gas
3,630cf50d8463f901f42094a4dce158644e75b639,2020-02-26 13:55:00 UTC,heat,auto,672,780,657,RI,Saunderstown,0,False,False,False,Gas
5,bb317a2284aed341541e70ba4896bc328ac7998b,2020-02-28 15:25:00 UTC,auto,hold,640,727,637,RI,Pawtucket,105,False,False,False,Gas
6,c945d17d8e98c2bbfdefa977dd6706b54fbeaa13,2020-02-04 12:15:00 UTC,heat,hold,713,716,716,RI,Providence,80,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187636,c945d17d8e98c2bbfdefa977dd6706b54fbeaa13,2020-02-18 19:35:00 UTC,heat,hold,734,760,760,RI,Providence,80,False,False,False,Gas
187637,b3056c664d197b3a00ec3867e86cab6a3a96177e,2020-02-16 16:45:00 UTC,heat,hold,758,760,760,RI,Providence,0,True,False,True,Electric
187638,b3056c664d197b3a00ec3867e86cab6a3a96177e,2020-02-15 18:50:00 UTC,heat,hold,755,760,760,RI,Providence,0,True,False,True,Electric
187639,b3056c664d197b3a00ec3867e86cab6a3a96177e,2020-02-16 15:45:00 UTC,heat,hold,756,760,760,RI,Providence,0,True,False,True,Electric


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/RI/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/RI-day/2021-feb-day-RI.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d5c979efe0d7ff7157ecd9e13492244cc3d9032f,2021-02-17 17:00:00 UTC,heat,hold,651,652,652,RI,Warwick,30,True,False,False,Gas
2,c9a79fe796abfa29568bab2ab7fe68fb1b51f2f4,2021-02-05 17:50:00 UTC,heat,hold,687,685,685,RI,Warwick,0,False,False,False,Gas
3,6a30712b3b65b82f38b33a47059c2da1597f2d88,2021-02-14 19:05:00 UTC,heat,hold,705,703,703,RI,cranston,5,False,False,False,Gas
7,46ff6890fc030ff1235216ea5f81bc678a60ac0e,2021-02-15 13:20:00 UTC,heat,hold,710,721,721,RI,west Kingston,6,False,False,False,Gas
12,4513e2aeab4b19a34bf7dd0b96c4791593b67126,2021-02-26 18:20:00 UTC,heat,hold,659,650,649,RI,Rumford,70,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99235,ef2b8123ad013848cd9d49436dba64b43dd5e397,2021-02-01 17:30:00 UTC,heat,hold,684,690,690,RI,East Greenwich,35,False,False,False,Gas
99236,22dc70d7a4a4fbd3ef8242785999fe75043dc178,2021-02-01 19:55:00 UTC,heat,hold,680,690,690,RI,Little Compton,5,False,False,False,Gas
99237,22dc70d7a4a4fbd3ef8242785999fe75043dc178,2021-02-22 19:40:00 UTC,heat,hold,683,690,690,RI,Little Compton,5,False,False,False,Gas
99238,22dc70d7a4a4fbd3ef8242785999fe75043dc178,2021-02-01 19:50:00 UTC,heat,hold,683,690,690,RI,Little Compton,5,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/RI/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/RI/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
RI_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/RI/feb/" + file)
    RI_feb = pd.concat([RI_feb, df])
    
RI_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01bf3b3095111308e495a2e3010d8476e1c64af5,feb,2017,heat,auto,west Kingston,672.600000,730.000000,640.000000,6.0,False,False,False
1,01bf3b3095111308e495a2e3010d8476e1c64af5,feb,2017,heat,hold,west Kingston,681.068493,730.000000,640.000000,6.0,False,False,False
2,0acff50a5a8097d75e4524bdcd367521bda0cf1a,feb,2017,heat,auto,Barrington,669.786325,670.316239,669.957265,0.0,False,False,False
3,0acff50a5a8097d75e4524bdcd367521bda0cf1a,feb,2017,heat,hold,Barrington,692.867749,686.401392,686.278422,0.0,False,False,False
4,104e38634e8391e53876769e8a940326154bf776,feb,2017,heat,hold,Barrington,707.125000,706.000000,706.000000,95.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
124,f03399e99ea6fe83b27f31df27484274083f21d0,feb,2021,auto,hold,South Kingstown,658.652174,760.000000,660.000000,25.0,False,False,False
125,f4e3e661c5dec7d0358f19a9d028b363998774ee,feb,2021,auto,hold,East Greenwich,696.609130,751.771755,700.916548,50.0,False,False,False
126,f6500193235d26119ad38051383b8feb9669fe00,feb,2021,heat,hold,Exeter,653.862464,658.260745,658.260745,30.0,True,False,False
127,f65284597a5041dd2b09d8250972a11651305201,feb,2021,heat,hold,Warwick,668.437500,670.000000,670.000000,27.0,False,False,False


In [67]:
RI_feb.to_csv("Scraper_Output/State_Month_Day/RI/RI_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/RI-day/2017-jun-day-RI.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7357379d344c770382ed31acdade35404877f5bc,2017-06-10 17:20:00 UTC,auto,hold,755,765,685,RI,Saunderstown,0,False,False,False,Gas
1,af4f8edc11316059b444657bc8730c65197253e2,2017-06-20 08:50:00 UTC,auto,hold,765,765,685,RI,Warwick,5,False,False,False,Gas
2,af4f8edc11316059b444657bc8730c65197253e2,2017-06-20 09:05:00 UTC,auto,hold,765,765,685,RI,Warwick,5,False,False,False,Gas
3,fdc84179acf8e9c036a4b3e58bc8b11c0cac8f77,2017-06-20 18:05:00 UTC,cool,hold,748,840,840,RI,South Kingstown,0,False,False,False,Gas
4,af4f8edc11316059b444657bc8730c65197253e2,2017-06-20 09:40:00 UTC,auto,hold,765,765,685,RI,Warwick,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62101,aeb8ef59c11451a782c9a87bd22f7c5af26d2813,2017-06-30 19:15:00 UTC,cool,hold,749,760,760,RI,Providence,80,False,False,False,Gas
62102,aeb8ef59c11451a782c9a87bd22f7c5af26d2813,2017-06-30 19:35:00 UTC,cool,hold,761,760,760,RI,Providence,80,False,False,False,Gas
62103,d9c7063a1c71443c09fd63f8963de6421d71244a,2017-06-27 10:30:00 UTC,cool,auto,714,770,760,RI,Smithfield,50,False,False,False,Gas
62104,d9c7063a1c71443c09fd63f8963de6421d71244a,2017-06-10 10:50:00 UTC,cool,auto,711,760,760,RI,Smithfield,50,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/RI/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/RI-day/2018-jun-day-RI.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6a30712b3b65b82f38b33a47059c2da1597f2d88,2018-06-29 16:25:00 UTC,cool,hold,736,735,735,RI,cranston,5,False,False,False,Gas
3,7357379d344c770382ed31acdade35404877f5bc,2018-06-17 17:40:00 UTC,cool,hold,703,705,705,RI,Saunderstown,0,False,False,False,Gas
5,3270d6c6f56434923d63bbd951d1674120e5a83b,2018-06-17 19:40:00 UTC,cool,hold,727,722,722,RI,Narragansett,25,False,False,False,Gas
6,96efe99e0687a0d3fdc286bf2ca1f3b0cf3c9dda,2018-06-15 19:50:00 UTC,auto,hold,699,695,645,RI,Providence,15,False,False,False,Gas
7,96efe99e0687a0d3fdc286bf2ca1f3b0cf3c9dda,2018-06-26 18:45:00 UTC,cool,hold,725,714,714,RI,Providence,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132390,e9bc20103b38e9881626222e6cbac9f154f887f2,2018-06-26 18:45:00 UTC,cool,hold,749,742,760,RI,Warwick,20,True,False,False,Gas
132391,e9bc20103b38e9881626222e6cbac9f154f887f2,2018-06-27 10:35:00 UTC,cool,auto,713,720,760,RI,Warwick,20,True,False,False,Gas
132392,e9bc20103b38e9881626222e6cbac9f154f887f2,2018-06-27 15:15:00 UTC,cool,auto,723,720,760,RI,Warwick,20,True,False,False,Gas
132393,e9bc20103b38e9881626222e6cbac9f154f887f2,2018-06-28 11:50:00 UTC,cool,auto,720,720,760,RI,Warwick,20,True,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/RI/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/RI-day/2019-jun-day-RI.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5b8a9a4fb314842aff1e74d0a753fa5cea7bd8ab,2019-06-17 18:35:00 UTC,cool,auto,754,790,788,RI,cranston,100,False,False,False,Gas
2,7c5dcb38366648ba7fd224cc6c337ad54d414e7b,2019-06-02 19:00:00 UTC,cool,hold,711,707,707,RI,Warwick,60,False,False,False,Gas
3,6a30712b3b65b82f38b33a47059c2da1597f2d88,2019-06-30 17:00:00 UTC,cool,hold,736,735,735,RI,cranston,5,False,False,False,Gas
4,2878c4994062d59f3dcb46668c20332bc9288215,2019-06-30 11:35:00 UTC,cool,auto,741,748,748,RI,Cranston,55,False,False,False,Gas
5,6a30712b3b65b82f38b33a47059c2da1597f2d88,2019-06-27 16:30:00 UTC,cool,hold,723,725,725,RI,cranston,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173871,3fddfe1485efe616d3705f27ef40b09945009a4a,2019-06-23 12:40:00 UTC,cool,auto,702,700,700,RI,Pawtucket,80,False,False,False,Gas
173872,3fddfe1485efe616d3705f27ef40b09945009a4a,2019-06-23 17:25:00 UTC,cool,auto,707,700,700,RI,Pawtucket,80,False,False,False,Gas
173873,3fddfe1485efe616d3705f27ef40b09945009a4a,2019-06-23 11:20:00 UTC,cool,auto,697,700,700,RI,Pawtucket,80,False,False,False,Gas
173874,3fddfe1485efe616d3705f27ef40b09945009a4a,2019-06-23 11:55:00 UTC,cool,auto,698,700,700,RI,Pawtucket,80,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/RI/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/RI-day/2020-jun-day-RI.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,46ff6890fc030ff1235216ea5f81bc678a60ac0e,2020-06-21 18:55:00 UTC,cool,hold,720,710,701,RI,west Kingston,6,False,False,False,Gas
1,60ae7bd0a89c8924007ceedb7d462c0cc457d3bc,2020-06-27 18:40:00 UTC,cool,hold,754,717,717,RI,Saunderstown,30,False,False,False,Gas
3,104e38634e8391e53876769e8a940326154bf776,2020-06-06 18:30:00 UTC,cool,hold,741,732,732,RI,Barrington,95,False,False,False,Gas
4,52b4a34fac68fce2b185847feaea0498f274843d,2020-06-04 14:05:00 UTC,cool,hold,734,705,705,RI,Providence,0,True,False,True,Electric
5,6a30712b3b65b82f38b33a47059c2da1597f2d88,2020-06-11 17:10:00 UTC,cool,hold,723,725,725,RI,cranston,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
179405,e9bc20103b38e9881626222e6cbac9f154f887f2,2020-06-05 17:40:00 UTC,cool,hold,760,760,760,RI,Warwick,20,True,False,False,Gas
179406,e9bc20103b38e9881626222e6cbac9f154f887f2,2020-06-11 16:30:00 UTC,cool,hold,741,760,760,RI,Warwick,20,True,False,False,Gas
179407,e9bc20103b38e9881626222e6cbac9f154f887f2,2020-06-11 18:15:00 UTC,cool,hold,748,760,760,RI,Warwick,20,True,False,False,Gas
179408,e9bc20103b38e9881626222e6cbac9f154f887f2,2020-06-05 12:05:00 UTC,cool,hold,736,760,760,RI,Warwick,20,True,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/RI/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/RI-day/2021-jun-day-RI.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bafe7824ca9bc4e156a45cb524c69b1b2827f381,2021-06-06 15:35:00 UTC,auto,hold,730,710,655,RI,East Greenwich,50,False,False,False,Gas
1,52b4a34fac68fce2b185847feaea0498f274843d,2021-06-05 17:40:00 UTC,cool,hold,811,675,625,RI,Providence,0,True,False,True,Electric
2,52b4a34fac68fce2b185847feaea0498f274843d,2021-06-06 16:05:00 UTC,cool,hold,811,675,625,RI,Providence,0,True,False,True,Electric
3,7357379d344c770382ed31acdade35404877f5bc,2021-06-28 16:20:00 UTC,cool,hold,706,715,715,RI,Saunderstown,0,False,False,False,Gas
4,8fe5e33c7683f3cee1a182d5564f49b9519b8d5e,2021-06-19 18:30:00 UTC,cool,hold,766,704,704,RI,Warwick,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
102942,d0bdb5c173927605a63b36af466460e6197dda63,2021-06-09 12:50:00 UTC,cool,hold,759,760,760,RI,Pawtucket,65,True,False,False,Gas
102943,d0bdb5c173927605a63b36af466460e6197dda63,2021-06-30 16:25:00 UTC,cool,hold,768,760,760,RI,Pawtucket,65,True,False,False,Gas
102944,d0bdb5c173927605a63b36af466460e6197dda63,2021-06-08 15:35:00 UTC,cool,hold,759,760,760,RI,Pawtucket,65,True,False,False,Gas
102945,d0bdb5c173927605a63b36af466460e6197dda63,2021-06-10 16:50:00 UTC,cool,hold,763,760,760,RI,Pawtucket,65,True,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/RI/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/RI/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
RI_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/RI/jun/" + file)
    RI_jun = pd.concat([RI_jun, df])
    
RI_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01bf3b3095111308e495a2e3010d8476e1c64af5,jun,2017,cool,auto,west Kingston,715.541667,710.500000,640.000000,6.0,False,False,False
1,0499a1d83aac1e394606c0a1666889a43e88a731,jun,2017,cool,auto,Rumford,743.449405,743.891369,721.312500,0.0,True,False,False
2,0499a1d83aac1e394606c0a1666889a43e88a731,jun,2017,cool,hold,Rumford,747.683014,732.562201,732.544258,0.0,True,False,False
3,085d6b55257709a5e3464f47c884726839d32297,jun,2017,cool,auto,Warwick,715.446429,720.446429,650.000000,0.0,False,False,False
4,085d6b55257709a5e3464f47c884726839d32297,jun,2017,cool,hold,Warwick,721.232558,719.372093,719.372093,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
136,f6500193235d26119ad38051383b8feb9669fe00,jun,2021,heat,hold,Exeter,669.509615,660.000000,660.000000,30.0,True,False,False
137,f65284597a5041dd2b09d8250972a11651305201,jun,2021,cool,hold,Warwick,713.568016,718.213670,718.213670,27.0,False,False,False
138,f6b2a6725b65b91843c4dc49c0c43c0f503fa0b1,jun,2021,auto,hold,East Greenwich,701.833333,700.000000,640.000000,50.0,False,False,False
139,f709a3ee3134aec7acb6df637f8bb81f0d85e2fd,jun,2021,cool,hold,East Greenwich,734.897959,733.367347,733.367347,10.0,False,False,False


In [100]:
RI_jun.to_csv("Scraper_Output/State_Month_Day/RI/RI_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/RI-day/2017-jul-day-RI.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fdc84179acf8e9c036a4b3e58bc8b11c0cac8f77,2017-07-22 18:15:00 UTC,cool,hold,845,840,840,RI,South Kingstown,0,False,False,False,Gas
1,c89cedc1970a3c7c112f0c3ef6b9d44c1580ae21,2017-07-09 18:35:00 UTC,cool,hold,710,705,705,RI,North Providence,0,False,False,False,Gas
2,c89cedc1970a3c7c112f0c3ef6b9d44c1580ae21,2017-07-21 17:20:00 UTC,cool,hold,727,715,715,RI,North Providence,0,False,False,False,Gas
4,3270d6c6f56434923d63bbd951d1674120e5a83b,2017-07-17 17:05:00 UTC,cool,hold,728,725,725,RI,Narragansett,25,False,False,False,Gas
6,3270d6c6f56434923d63bbd951d1674120e5a83b,2017-07-16 14:40:00 UTC,cool,hold,717,725,725,RI,Narragansett,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114961,b8bb88cbcc0dc3ca189cc2bbfcec6859de8dd540,2017-07-14 11:15:00 UTC,cool,hold,699,760,760,RI,Little Compton,5,False,False,False,Gas
114962,b8bb88cbcc0dc3ca189cc2bbfcec6859de8dd540,2017-07-14 11:30:00 UTC,cool,hold,699,760,760,RI,Little Compton,5,False,False,False,Gas
114963,b8bb88cbcc0dc3ca189cc2bbfcec6859de8dd540,2017-07-14 12:20:00 UTC,cool,hold,702,760,760,RI,Little Compton,5,False,False,False,Gas
114964,bea5fbfc4bfa7c81ad92dfd7ce4fb94139610810,2017-07-29 11:30:00 UTC,cool,hold,755,760,760,RI,Bristol,0,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/RI/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/RI-day/2018-jul-day-RI.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,94a8d4525aac534159258ac90af3e45fa7819bb2,2018-07-15 19:20:00 UTC,cool,hold,755,782,782,RI,Warwick,15,False,False,False,Gas
1,6a30712b3b65b82f38b33a47059c2da1597f2d88,2018-07-11 16:50:00 UTC,cool,hold,736,735,735,RI,cranston,5,False,False,False,Gas
2,7357379d344c770382ed31acdade35404877f5bc,2018-07-27 07:35:00 UTC,cool,hold,715,715,715,RI,Saunderstown,0,False,False,False,Gas
3,6a30712b3b65b82f38b33a47059c2da1597f2d88,2018-07-29 18:10:00 UTC,cool,hold,740,735,735,RI,cranston,5,False,False,False,Gas
4,7357379d344c770382ed31acdade35404877f5bc,2018-07-27 08:20:00 UTC,cool,hold,716,715,715,RI,Saunderstown,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180180,dd629493a501bc7af00f4da1aee751dd388b98bb,2018-07-25 13:00:00 UTC,cool,hold,705,760,760,RI,Warwick,27,True,False,False,Gas
180181,dd629493a501bc7af00f4da1aee751dd388b98bb,2018-07-30 11:35:00 UTC,cool,hold,714,760,760,RI,Warwick,27,True,False,False,Gas
180182,dd629493a501bc7af00f4da1aee751dd388b98bb,2018-07-22 08:50:00 UTC,cool,hold,705,760,760,RI,Warwick,27,True,False,False,Gas
180183,dd629493a501bc7af00f4da1aee751dd388b98bb,2018-07-17 10:00:00 UTC,cool,hold,683,760,760,RI,Warwick,27,True,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/RI/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/RI-day/2019-jul-day-RI.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,10aa1f10b029b57b052031927fadaf47f789e45f,2019-07-03 19:45:00 UTC,auto,auto,766,760,685,RI,Narragansett,0,False,False,False,Gas
1,bafe7824ca9bc4e156a45cb524c69b1b2827f381,2019-07-23 15:30:00 UTC,auto,hold,696,695,645,RI,East Greenwich,50,False,False,False,Gas
2,bafe7824ca9bc4e156a45cb524c69b1b2827f381,2019-07-21 18:15:00 UTC,auto,hold,721,715,665,RI,East Greenwich,50,False,False,False,Gas
3,6a30712b3b65b82f38b33a47059c2da1597f2d88,2019-07-06 16:55:00 UTC,cool,hold,730,725,725,RI,cranston,5,False,False,False,Gas
4,bafe7824ca9bc4e156a45cb524c69b1b2827f381,2019-07-23 12:30:00 UTC,auto,hold,696,695,645,RI,East Greenwich,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
248397,f6500193235d26119ad38051383b8feb9669fe00,2019-07-15 18:40:00 UTC,cool,auto,768,760,760,RI,Exeter,30,True,False,False,Gas
248398,f6500193235d26119ad38051383b8feb9669fe00,2019-07-18 15:45:00 UTC,cool,hold,741,760,760,RI,Exeter,30,True,False,False,Gas
248399,f6500193235d26119ad38051383b8feb9669fe00,2019-07-15 18:55:00 UTC,cool,auto,768,760,760,RI,Exeter,30,True,False,False,Gas
248400,f6500193235d26119ad38051383b8feb9669fe00,2019-07-18 16:15:00 UTC,cool,hold,741,760,760,RI,Exeter,30,True,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/RI/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/RI-day/2020-jul-day-RI.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,97163f8c2bcfd9b454062e96af33d88745ce728e,2020-07-19 18:20:00 UTC,cool,hold,717,717,717,RI,Barrington,55,False,False,False,Gas
1,38290a38cdd05b4b82031fc3e951665416534641,2020-07-31 10:10:00 UTC,cool,hold,724,717,717,RI,Coventry,15,False,False,False,Gas
2,6a30712b3b65b82f38b33a47059c2da1597f2d88,2020-07-15 15:30:00 UTC,cool,hold,720,725,725,RI,cranston,5,False,False,False,Gas
4,6a30712b3b65b82f38b33a47059c2da1597f2d88,2020-07-10 17:10:00 UTC,cool,hold,732,732,732,RI,cranston,5,False,False,False,Gas
7,3270d6c6f56434923d63bbd951d1674120e5a83b,2020-07-09 15:55:00 UTC,cool,hold,717,725,725,RI,Narragansett,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223105,e9bc20103b38e9881626222e6cbac9f154f887f2,2020-07-02 19:25:00 UTC,cool,auto,742,740,760,RI,Warwick,20,True,False,False,Gas
223106,e9bc20103b38e9881626222e6cbac9f154f887f2,2020-07-20 16:55:00 UTC,cool,auto,757,760,760,RI,Warwick,20,True,False,False,Gas
223107,e9bc20103b38e9881626222e6cbac9f154f887f2,2020-07-29 16:55:00 UTC,cool,auto,710,710,760,RI,Warwick,20,True,False,False,Gas
223108,e9bc20103b38e9881626222e6cbac9f154f887f2,2020-07-20 18:35:00 UTC,cool,auto,757,760,760,RI,Warwick,20,True,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/RI/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/RI-day/2021-jul-day-RI.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6a30712b3b65b82f38b33a47059c2da1597f2d88,2021-07-10 12:25:00 UTC,cool,hold,715,717,717,RI,cranston,5,False,False,False,Gas
1,e2259433d9e1d90ab5604c0ad5ac50f28b965422,2021-07-26 18:25:00 UTC,auto,hold,777,767,648,RI,Middletown,20,False,False,False,Gas
2,8fe5e33c7683f3cee1a182d5564f49b9519b8d5e,2021-07-25 16:40:00 UTC,cool,hold,726,744,744,RI,Warwick,15,False,False,False,Gas
3,17be78eabd7988d37db06eca5fd2ca282c29ec9e,2021-07-21 17:35:00 UTC,auto,hold,781,780,642,RI,Middletown,20,False,False,False,Gas
4,8fe5e33c7683f3cee1a182d5564f49b9519b8d5e,2021-07-10 14:15:00 UTC,cool,hold,709,704,704,RI,Warwick,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124600,dd629493a501bc7af00f4da1aee751dd388b98bb,2021-07-29 19:50:00 UTC,cool,hold,719,760,760,RI,Warwick,27,True,False,False,Gas
124601,e9bc20103b38e9881626222e6cbac9f154f887f2,2021-07-29 19:45:00 UTC,cool,hold,740,760,760,RI,Warwick,20,True,False,False,Gas
124602,e9bc20103b38e9881626222e6cbac9f154f887f2,2021-07-29 19:30:00 UTC,cool,hold,740,760,760,RI,Warwick,20,True,False,False,Gas
124603,e9bc20103b38e9881626222e6cbac9f154f887f2,2021-07-29 19:10:00 UTC,cool,hold,739,760,760,RI,Warwick,20,True,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/RI/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/RI/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
RI_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/RI/jul/" + file)
    RI_jul = pd.concat([RI_jul, df])
    
RI_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0499a1d83aac1e394606c0a1666889a43e88a731,jul,2017,cool,auto,Rumford,728.038696,729.438261,705.700000,0.0,True,False,False
1,0499a1d83aac1e394606c0a1666889a43e88a731,jul,2017,cool,hold,Rumford,721.590989,721.730565,721.717314,0.0,True,False,False
2,085d6b55257709a5e3464f47c884726839d32297,jul,2017,cool,auto,Warwick,744.843318,745.525346,666.172811,0.0,False,False,False
3,085d6b55257709a5e3464f47c884726839d32297,jul,2017,cool,hold,Warwick,731.270000,737.000000,737.000000,0.0,False,False,False
4,0acff50a5a8097d75e4524bdcd367521bda0cf1a,jul,2017,cool,auto,Barrington,743.059783,725.592391,709.070652,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,f03399e99ea6fe83b27f31df27484274083f21d0,jul,2021,auto,hold,South Kingstown,711.555556,710.166667,660.111111,25.0,False,False,False
116,f6500193235d26119ad38051383b8feb9669fe00,jul,2021,cool,hold,Exeter,709.114943,711.459770,711.459770,30.0,True,False,False
117,f65284597a5041dd2b09d8250972a11651305201,jul,2021,cool,hold,Warwick,712.941685,716.043197,715.993521,27.0,False,False,False
118,f709a3ee3134aec7acb6df637f8bb81f0d85e2fd,jul,2021,cool,hold,East Greenwich,729.277778,720.055556,719.888889,10.0,False,False,False


In [133]:
RI_jul.to_csv("Scraper_Output/State_Month_Day/RI/RI_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/RI-day/2017-aug-day-RI.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fdc84179acf8e9c036a4b3e58bc8b11c0cac8f77,2017-08-06 16:50:00 UTC,cool,hold,798,800,800,RI,South Kingstown,0,False,False,False,Gas
1,fdc84179acf8e9c036a4b3e58bc8b11c0cac8f77,2017-08-21 13:45:00 UTC,cool,hold,753,721,721,RI,South Kingstown,0,False,False,False,Gas
3,fdc84179acf8e9c036a4b3e58bc8b11c0cac8f77,2017-08-06 10:40:00 UTC,cool,hold,767,800,800,RI,South Kingstown,0,False,False,False,Gas
5,fdc84179acf8e9c036a4b3e58bc8b11c0cac8f77,2017-08-06 13:50:00 UTC,cool,hold,779,800,800,RI,South Kingstown,0,False,False,False,Gas
7,c89cedc1970a3c7c112f0c3ef6b9d44c1580ae21,2017-08-03 17:30:00 UTC,cool,hold,718,715,715,RI,North Providence,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
135434,3baa77b16ee366f285f8ba8b4e5faf7545023ed3,2017-08-12 17:00:00 UTC,cool,hold,700,700,700,RI,Cumberland,20,True,False,False,Gas
135435,3baa77b16ee366f285f8ba8b4e5faf7545023ed3,2017-08-12 18:25:00 UTC,cool,hold,703,700,700,RI,Cumberland,20,True,False,False,Gas
135436,3baa77b16ee366f285f8ba8b4e5faf7545023ed3,2017-08-04 17:20:00 UTC,cool,auto,700,700,700,RI,Cumberland,20,True,False,False,Gas
135437,3baa77b16ee366f285f8ba8b4e5faf7545023ed3,2017-08-01 13:40:00 UTC,cool,auto,717,730,700,RI,Cumberland,20,True,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/RI/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/RI-day/2018-aug-day-RI.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,813cd40834f068eb26d0b1632151d709d04cdf52,2018-08-14 19:55:00 UTC,cool,hold,788,786,786,RI,North Kingstown,0,True,False,False,Gas
3,942fe98b0d512485bd847a6916d3f7420ca3ab5d,2018-08-07 11:40:00 UTC,auto,hold,731,730,641,RI,North Smithfield,10,False,False,True,Electric
6,46df39f6235465faf9a7dd81f7e14f9fe3de6954,2018-08-15 13:50:00 UTC,cool,hold,733,737,737,RI,Narragansett,25,False,False,False,Gas
7,6a30712b3b65b82f38b33a47059c2da1597f2d88,2018-08-09 19:05:00 UTC,cool,hold,729,725,725,RI,cranston,5,False,False,False,Gas
8,6a30712b3b65b82f38b33a47059c2da1597f2d88,2018-08-27 18:35:00 UTC,cool,hold,739,735,735,RI,cranston,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
186289,3fe115f64bbb07eee3eb7b153cef47dd22805e51,2018-08-26 13:20:00 UTC,cool,hold,683,700,700,RI,Cranston,50,False,False,True,Electric
186290,3fe115f64bbb07eee3eb7b153cef47dd22805e51,2018-08-04 18:45:00 UTC,cool,hold,706,700,700,RI,Cranston,50,False,False,True,Electric
186291,3fe115f64bbb07eee3eb7b153cef47dd22805e51,2018-08-04 19:50:00 UTC,cool,hold,702,700,700,RI,Cranston,50,False,False,True,Electric
186292,3fe115f64bbb07eee3eb7b153cef47dd22805e51,2018-08-18 17:20:00 UTC,cool,hold,704,700,700,RI,Cranston,50,False,False,True,Electric


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/RI/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/RI-day/2019-aug-day-RI.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6a30712b3b65b82f38b33a47059c2da1597f2d88,2019-08-03 19:50:00 UTC,cool,hold,731,725,725,RI,cranston,5,False,False,False,Gas
1,6a30712b3b65b82f38b33a47059c2da1597f2d88,2019-08-28 17:15:00 UTC,cool,hold,724,725,725,RI,cranston,5,False,False,False,Gas
2,bafe7824ca9bc4e156a45cb524c69b1b2827f381,2019-08-06 13:30:00 UTC,auto,hold,696,695,645,RI,East Greenwich,50,False,False,False,Gas
3,bafe7824ca9bc4e156a45cb524c69b1b2827f381,2019-08-05 13:35:00 UTC,auto,hold,699,695,645,RI,East Greenwich,50,False,False,False,Gas
4,194efb4536d086b2554cbce579b9bed2a10a80d6,2019-08-24 15:15:00 UTC,cool,hold,742,771,719,RI,Cumberland,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203740,f6500193235d26119ad38051383b8feb9669fe00,2019-08-31 15:45:00 UTC,cool,hold,743,760,760,RI,Exeter,30,True,False,False,Gas
203741,f6500193235d26119ad38051383b8feb9669fe00,2019-08-31 15:30:00 UTC,cool,hold,742,760,760,RI,Exeter,30,True,False,False,Gas
203742,f6500193235d26119ad38051383b8feb9669fe00,2019-08-31 15:25:00 UTC,cool,hold,741,760,760,RI,Exeter,30,True,False,False,Gas
203743,f6500193235d26119ad38051383b8feb9669fe00,2019-08-31 17:15:00 UTC,cool,hold,754,760,760,RI,Exeter,30,True,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/RI/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/RI-day/2020-aug-day-RI.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,194efb4536d086b2554cbce579b9bed2a10a80d6,2020-08-23 19:25:00 UTC,cool,hold,783,780,719,RI,Cumberland,0,False,False,False,Gas
1,196e996641e82efd4749acda6ffbc674009f4c05,2020-08-01 19:20:00 UTC,cool,hold,726,719,719,RI,Coventry,10,True,False,False,Gas
2,bdfc4577889b0581a73c316172c17a98c6cab878,2020-08-13 17:20:00 UTC,cool,hold,766,735,735,RI,Cranston,85,False,False,False,Gas
3,6a30712b3b65b82f38b33a47059c2da1597f2d88,2020-08-01 18:45:00 UTC,cool,hold,729,725,725,RI,cranston,5,False,False,False,Gas
4,6a30712b3b65b82f38b33a47059c2da1597f2d88,2020-08-24 16:15:00 UTC,cool,hold,728,725,725,RI,cranston,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
207783,3fe115f64bbb07eee3eb7b153cef47dd22805e51,2020-08-06 11:50:00 UTC,cool,auto,704,700,700,RI,Cranston,50,False,False,True,Electric
207784,3fe115f64bbb07eee3eb7b153cef47dd22805e51,2020-08-30 16:00:00 UTC,cool,auto,710,700,700,RI,Cranston,50,False,False,True,Electric
207785,3fe115f64bbb07eee3eb7b153cef47dd22805e51,2020-08-22 14:15:00 UTC,cool,auto,703,700,700,RI,Cranston,50,False,False,True,Electric
207786,3fe115f64bbb07eee3eb7b153cef47dd22805e51,2020-08-17 09:30:00 UTC,cool,auto,700,700,700,RI,Cranston,50,False,False,True,Electric


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/RI/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/RI/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
RI_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/RI/aug/" + file)
    RI_aug = pd.concat([RI_aug, df])
    
RI_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01bf3b3095111308e495a2e3010d8476e1c64af5,aug,2017,cool,hold,west Kingston,718.687500,710.875000,707.750000,6.0,False,False,False
1,0499a1d83aac1e394606c0a1666889a43e88a731,aug,2017,cool,auto,Rumford,719.604074,718.382559,715.588160,0.0,True,False,False
2,0499a1d83aac1e394606c0a1666889a43e88a731,aug,2017,cool,hold,Rumford,728.862069,728.755698,728.714202,0.0,True,False,False
3,0514eecfa7daa0b89d2395df124ef11397770f6f,aug,2017,cool,auto,Richmond,729.400000,761.600000,723.400000,10.0,False,False,False
4,0514eecfa7daa0b89d2395df124ef11397770f6f,aug,2017,cool,hold,Richmond,717.643505,772.541793,739.252769,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,f709a3ee3134aec7acb6df637f8bb81f0d85e2fd,aug,2020,cool,auto,East Greenwich,698.000000,710.000000,710.000000,10.0,False,False,False
212,f709a3ee3134aec7acb6df637f8bb81f0d85e2fd,aug,2020,cool,hold,East Greenwich,693.519231,690.384615,690.384615,10.0,False,False,False
213,f9588bea77a2c55f88af50b9a84848ae4aa67c2a,aug,2020,heat,auto,Newport,728.645783,779.032530,679.934940,117.0,False,False,False
214,fe17a4476d8d563cfec5bafc834b18904ea50342,aug,2020,cool,auto,East Providence,725.373134,720.626866,621.910448,100.0,False,False,False


In [160]:
RI_aug.to_csv("Scraper_Output/State_Month_Day/RI/RI_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/RI-day/2017-dec-day-RI.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
3,99e1203de45567a9a837a9b66f14f4fb1f752c76,2017-12-12 12:50:00 UTC,heat,hold,681,655,655,RI,East Greenwich,30,False,False,False,Gas
8,23fbaf1e2a3e91fb9c6a1323cd178f51894c69d3,2017-12-20 18:25:00 UTC,auto,hold,662,715,665,RI,Cumberland,0,False,False,False,Gas
15,23fbaf1e2a3e91fb9c6a1323cd178f51894c69d3,2017-12-21 15:50:00 UTC,auto,hold,678,715,665,RI,Cumberland,0,False,False,False,Gas
16,813cd40834f068eb26d0b1632151d709d04cdf52,2017-12-25 13:55:00 UTC,auto,hold,660,786,651,RI,North Kingstown,0,True,False,False,Gas
20,4b6d7048a18b97d34997e9f6be2e5b6937f83a8b,2017-12-10 14:25:00 UTC,heat,hold,668,653,644,RI,Barrington,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178397,6f62e4fd1f9a0d2f2a3b14371f53f5cbc3cfba5a,2017-12-30 19:40:00 UTC,cool,hold,736,760,760,RI,Providence,90,True,False,False,Gas
178398,c3700fb964f85fd8928ccd0d68588866c5e44ded,2017-12-07 17:25:00 UTC,heat,hold,708,760,760,RI,Warwick,40,False,False,False,Gas
178399,c3c790efccfe0d76647fd2836b7dcfb98a63df76,2017-12-29 17:20:00 UTC,heat,hold,726,760,760,RI,Cumberland,5,False,False,False,Gas
178400,c3c790efccfe0d76647fd2836b7dcfb98a63df76,2017-12-29 17:10:00 UTC,heat,hold,710,760,760,RI,Cumberland,5,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/RI/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/RI-day/2018-dec-day-RI.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,813cd40834f068eb26d0b1632151d709d04cdf52,2018-12-25 13:45:00 UTC,heat,hold,655,664,664,RI,North Kingstown,0,True,False,False,Gas
1,3677d45087c07eb494e9e694b7f39acd726da3f1,2018-12-24 15:45:00 UTC,heat,hold,666,661,661,RI,East Providence,80,False,False,False,Gas
2,d5c979efe0d7ff7157ecd9e13492244cc3d9032f,2018-12-31 19:00:00 UTC,heat,hold,720,715,707,RI,Warwick,30,True,False,False,Gas
3,3677d45087c07eb494e9e694b7f39acd726da3f1,2018-12-25 17:30:00 UTC,heat,hold,706,661,661,RI,East Providence,80,False,False,False,Gas
4,ac0c54a9b8ef724d22170d864d06e8428609127f,2018-12-01 12:55:00 UTC,heat,hold,594,650,603,RI,Charlestown,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198193,b3056c664d197b3a00ec3867e86cab6a3a96177e,2018-12-26 15:30:00 UTC,heat,hold,759,760,760,RI,Providence,0,True,False,True,Electric
198194,b3056c664d197b3a00ec3867e86cab6a3a96177e,2018-12-26 13:45:00 UTC,heat,hold,732,760,760,RI,Providence,0,True,False,True,Electric
198195,b3056c664d197b3a00ec3867e86cab6a3a96177e,2018-12-26 16:15:00 UTC,heat,hold,756,760,760,RI,Providence,0,True,False,True,Electric
198196,b3056c664d197b3a00ec3867e86cab6a3a96177e,2018-12-26 13:10:00 UTC,heat,hold,736,760,760,RI,Providence,0,True,False,True,Electric


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/RI/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/RI-day/2019-dec-day-RI.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c945d17d8e98c2bbfdefa977dd6706b54fbeaa13,2019-12-31 18:45:00 UTC,heat,hold,712,719,719,RI,Providence,80,False,False,False,Gas
1,bb317a2284aed341541e70ba4896bc328ac7998b,2019-12-06 08:10:00 UTC,auto,hold,655,717,657,RI,Pawtucket,105,False,False,False,Gas
2,bb317a2284aed341541e70ba4896bc328ac7998b,2019-12-24 16:00:00 UTC,auto,hold,656,717,657,RI,Pawtucket,105,False,False,False,Gas
3,897e4f1b6e186b2c8f1c168f918bfdfd09fd38e1,2019-12-03 17:00:00 UTC,heat,hold,684,689,689,RI,Middletown,70,False,False,False,Gas
4,bb317a2284aed341541e70ba4896bc328ac7998b,2019-12-16 07:40:00 UTC,auto,hold,663,717,667,RI,Pawtucket,105,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220066,9d07a89fa0a4c926c307d011e029b0bcdc9d51ee,2019-12-29 15:20:00 UTC,auto,auto,675,800,750,RI,Cumberland,9,True,False,True,Electric
220067,9d07a89fa0a4c926c307d011e029b0bcdc9d51ee,2019-12-29 17:45:00 UTC,auto,auto,725,800,750,RI,Cumberland,9,True,False,True,Electric
220068,c945d17d8e98c2bbfdefa977dd6706b54fbeaa13,2019-12-21 17:40:00 UTC,heat,hold,718,750,750,RI,Providence,80,False,False,False,Gas
220069,9d07a89fa0a4c926c307d011e029b0bcdc9d51ee,2019-12-07 16:10:00 UTC,auto,hold,654,810,760,RI,Cumberland,9,True,False,True,Electric


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/RI/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/RI-day/2020-dec-day-RI.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,10aa1f10b029b57b052031927fadaf47f789e45f,2020-12-16 13:15:00 UTC,heat,hold,700,696,696,RI,Narragansett,0,False,False,False,Gas
1,c945d17d8e98c2bbfdefa977dd6706b54fbeaa13,2020-12-21 17:45:00 UTC,heat,hold,727,729,729,RI,Providence,80,False,False,False,Gas
2,40346d5727db19d8c34d86a5898f58d323698756,2020-12-08 11:10:00 UTC,heat,auto,634,677,673,RI,Coventry,30,True,False,False,Gas
3,c945d17d8e98c2bbfdefa977dd6706b54fbeaa13,2020-12-17 13:35:00 UTC,heat,hold,712,719,719,RI,Providence,80,False,False,False,Gas
5,94a8d4525aac534159258ac90af3e45fa7819bb2,2020-12-25 10:45:00 UTC,heat,hold,677,650,622,RI,Warwick,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
200758,319d959abbee94a38cba224532f6e2c84abf3991,2020-12-18 18:05:00 UTC,heat,auto,757,760,760,RI,North Providence,65,False,False,False,Gas
200759,319d959abbee94a38cba224532f6e2c84abf3991,2020-12-14 19:30:00 UTC,heat,auto,758,760,760,RI,North Providence,65,False,False,False,Gas
200760,ef4b276779457e8e202b48c505042f4347cf85ce,2020-12-08 11:55:00 UTC,heat,hold,700,760,760,RI,North Providence,65,False,False,False,Gas
200761,319d959abbee94a38cba224532f6e2c84abf3991,2020-12-18 16:25:00 UTC,heat,auto,758,760,760,RI,North Providence,65,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/RI/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/RI/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
RI_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/RI/dec/" + file)
    RI_dec = pd.concat([RI_dec, df])
    
RI_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0070cd978fc0580290524724554baacf937fdfde,dec,2017,heat,auto,Tiverton,633.041667,650.000000,650.000000,0.0,False,False,False
1,0070cd978fc0580290524724554baacf937fdfde,dec,2017,heat,hold,Tiverton,696.715686,700.000000,700.000000,0.0,False,False,False
2,01bf3b3095111308e495a2e3010d8476e1c64af5,dec,2017,heat,auto,west Kingston,644.000000,670.000000,670.000000,6.0,False,False,False
3,01bf3b3095111308e495a2e3010d8476e1c64af5,dec,2017,heat,hold,west Kingston,656.074074,673.666667,668.148148,6.0,False,False,False
4,04ab0545019eb9f69971a49695bf5ba5d3d1569e,dec,2017,heat,auto,West Warwick,698.213930,752.537313,680.228856,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
231,f65284597a5041dd2b09d8250972a11651305201,dec,2020,heat,auto,Warwick,689.205071,691.702402,680.436388,27.0,False,False,False
232,f65284597a5041dd2b09d8250972a11651305201,dec,2020,heat,hold,Warwick,710.241509,691.516226,691.508679,27.0,False,False,False
233,f6b2a6725b65b91843c4dc49c0c43c0f503fa0b1,dec,2020,auto,auto,East Greenwich,645.500000,730.000000,680.000000,50.0,False,False,False
234,f709a3ee3134aec7acb6df637f8bb81f0d85e2fd,dec,2020,heat,hold,East Greenwich,671.500000,681.250000,681.250000,10.0,False,False,False


In [187]:
RI_dec.to_csv("Scraper_Output/State_Month_Day/RI/RI_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/RI/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
RI_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/RI/" + file)
    RI_all = pd.concat([RI_all, df])
    
RI_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01bf3b3095111308e495a2e3010d8476e1c64af5,aug,2017,cool,hold,west Kingston,718.687500,710.875000,707.750000,6.0,False,False,False
1,0499a1d83aac1e394606c0a1666889a43e88a731,aug,2017,cool,auto,Rumford,719.604074,718.382559,715.588160,0.0,True,False,False
2,0499a1d83aac1e394606c0a1666889a43e88a731,aug,2017,cool,hold,Rumford,728.862069,728.755698,728.714202,0.0,True,False,False
3,0514eecfa7daa0b89d2395df124ef11397770f6f,aug,2017,cool,auto,Richmond,729.400000,761.600000,723.400000,10.0,False,False,False
4,0514eecfa7daa0b89d2395df124ef11397770f6f,aug,2017,cool,hold,Richmond,717.643505,772.541793,739.252769,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
945,f6500193235d26119ad38051383b8feb9669fe00,jun,2021,heat,hold,Exeter,669.509615,660.000000,660.000000,30.0,True,False,False
946,f65284597a5041dd2b09d8250972a11651305201,jun,2021,cool,hold,Warwick,713.568016,718.213670,718.213670,27.0,False,False,False
947,f6b2a6725b65b91843c4dc49c0c43c0f503fa0b1,jun,2021,auto,hold,East Greenwich,701.833333,700.000000,640.000000,50.0,False,False,False
948,f709a3ee3134aec7acb6df637f8bb81f0d85e2fd,jun,2021,cool,hold,East Greenwich,734.897959,733.367347,733.367347,10.0,False,False,False


In [190]:
RI_all.to_csv("Scraper_Output/State_Month_Day/RI_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mRIe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['RI']
Unique jan_2018: ['RI']
Unique jan_2019: ['RI']
Unique jan_2020: ['RI']
Unique jan_2021: ['RI']
Unique feb_2017: ['RI']
Unique feb_2018: ['RI']
Unique feb_2019: ['RI']
Unique feb_2020: ['RI']
Unique feb_2021: ['RI']
Unique jun_2017: ['RI']
Unique jun_2018: ['RI']
Unique jun_2019: ['RI']
Unique jun_2020: ['RI']
Unique jun_2021: ['RI']
Unique jul_2017: ['RI']
Unique jul_2018: ['RI']
Unique jul_2019: ['RI']
Unique jul_2020: ['RI']
Unique jul_2021: ['RI']
Unique aug_2017: ['RI']
Unique aug_2018: ['RI']
Unique aug_2019: ['RI']
Unique aug_2020: ['RI']
Unique dec_2017: ['RI']
Unique dec_2018: ['RI']
Unique dec_2019: ['RI']
Unique dec_2020: ['RI']
