# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/ME-day/2017-jan-day-ME.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6ea13a6a8839c82c41cc8b0da4d0749b2070e49f,2017-01-14T16:00:00Z,heat,hold,666,660,660,ME,North Yarmouth,0,False,False,False,Gas
1,6ea13a6a8839c82c41cc8b0da4d0749b2070e49f,2017-01-14T16:40:00Z,heat,hold,653,660,660,ME,North Yarmouth,0,False,False,False,Gas
2,6ea13a6a8839c82c41cc8b0da4d0749b2070e49f,2017-01-26T15:50:00Z,heat,hold,676,660,660,ME,North Yarmouth,0,False,False,False,Gas
3,6ea13a6a8839c82c41cc8b0da4d0749b2070e49f,2017-01-29T15:30:00Z,heat,auto,710,748,660,ME,North Yarmouth,0,False,False,False,Gas
4,6ea13a6a8839c82c41cc8b0da4d0749b2070e49f,2017-01-14T17:10:00Z,heat,hold,650,660,660,ME,North Yarmouth,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11584,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2017-01-28T16:20:00Z,heat,hold,719,720,720,ME,Greenville,35,False,False,False,Gas
11585,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2017-01-28T13:55:00Z,heat,hold,707,720,720,ME,Greenville,35,False,False,False,Gas
11586,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2017-01-28T17:35:00Z,heat,hold,715,720,720,ME,Greenville,35,False,False,False,Gas
11587,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2017-01-29T12:30:00Z,heat,hold,733,720,720,ME,Greenville,35,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
02a6a361c70d1052cbb1d7d80490d8326ae8e684,Jan,2017,heat,auto,Naples,618.355263,664.092105,662.434211,15.0,False,False,False
02a6a361c70d1052cbb1d7d80490d8326ae8e684,Jan,2017,heat,hold,Naples,638.916462,656.786241,638.945946,15.0,False,False,False
071d40a2ebc85623a1b5da94a1ee4af028115825,Jan,2017,heat,auto,North Yarmouth,647.962963,650.0,650.0,25.0,False,False,False
071d40a2ebc85623a1b5da94a1ee4af028115825,Jan,2017,heat,hold,North Yarmouth,685.066667,669.333333,670.333333,25.0,False,False,False
239e363dc2695713ba155ce9a713fdff9c1ba533,Jan,2017,heat,auto,Glenburn,705.727273,703.909091,703.090909,40.0,False,False,False
239e363dc2695713ba155ce9a713fdff9c1ba533,Jan,2017,heat,hold,Glenburn,718.675676,718.567568,718.162162,40.0,False,False,False
4c07c4cdb374e0cf0049e8af72014378ff2ac2c8,Jan,2017,heat,hold,Portland,647.795918,651.159184,649.930612,55.0,False,False,False
6ea13a6a8839c82c41cc8b0da4d0749b2070e49f,Jan,2017,heat,auto,North Yarmouth,675.238754,696.273356,695.0,0.0,False,False,False
6ea13a6a8839c82c41cc8b0da4d0749b2070e49f,Jan,2017,heat,hold,North Yarmouth,674.6416,685.4056,684.2096,0.0,False,False,False
900d136751fd54281c19a951b6e6c4f605fc7d99,Jan,2017,heat,auto,Naples,658.765487,757.743363,669.079646,15.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/ME/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/ME-day/2018-jan-day-ME.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2018-01-27T17:25:00Z,heat,hold,737,750,750,ME,Westbrook,0,True,False,False,Gas
3,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2018-01-27T16:55:00Z,heat,hold,730,750,750,ME,Westbrook,0,True,False,False,Gas
4,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2018-01-19T13:50:00Z,heat,hold,660,703,703,ME,Westbrook,0,True,False,False,Gas
5,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2018-01-27T16:35:00Z,heat,hold,723,750,750,ME,Westbrook,0,True,False,False,Gas
6,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2018-01-19T14:10:00Z,heat,hold,672,703,703,ME,Westbrook,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24704,c491ad48441073aa08a818dbfe01ac8ad2a83838,2018-01-07T13:55:00Z,heat,auto,737,740,740,ME,South Portland,75,False,False,False,Gas
24705,c491ad48441073aa08a818dbfe01ac8ad2a83838,2018-01-07T14:35:00Z,heat,auto,743,740,740,ME,South Portland,75,False,False,False,Gas
24706,c491ad48441073aa08a818dbfe01ac8ad2a83838,2018-01-14T18:25:00Z,heat,hold,708,740,740,ME,South Portland,75,False,False,False,Gas
24707,c491ad48441073aa08a818dbfe01ac8ad2a83838,2018-01-07T14:40:00Z,heat,auto,738,740,740,ME,South Portland,75,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/ME/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/ME-day/2019-jan-day-ME.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b8befed1a37cff3660e8cf283e30c8bfa82d79b4,2019-01-27 17:55:00 UTC,heat,hold,715,718,718,ME,Portland,50,True,False,False,Gas
1,b8befed1a37cff3660e8cf283e30c8bfa82d79b4,2019-01-27 13:25:00 UTC,heat,hold,681,718,718,ME,Portland,50,True,False,False,Gas
2,eb3eccd199425d9cde1444492ebe176a77fca82d,2019-01-22 19:30:00 UTC,heat,hold,711,721,721,ME,Westbrook,0,False,False,False,Gas
3,b8befed1a37cff3660e8cf283e30c8bfa82d79b4,2019-01-27 13:15:00 UTC,heat,hold,678,718,718,ME,Portland,50,True,False,False,Gas
4,eb3eccd199425d9cde1444492ebe176a77fca82d,2019-01-22 19:25:00 UTC,heat,hold,707,721,721,ME,Westbrook,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60297,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2019-01-09 19:55:00 UTC,heat,hold,735,740,740,ME,Windham,20,True,False,False,Gas
60298,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2019-01-25 19:05:00 UTC,heat,hold,733,740,740,ME,Windham,20,True,False,False,Gas
60299,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2019-01-30 13:10:00 UTC,heat,hold,728,740,740,ME,Windham,20,True,False,False,Gas
60300,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2019-01-09 18:35:00 UTC,heat,hold,744,740,740,ME,Windham,20,True,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/ME/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/ME-day/2020-jan-day-ME.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,44e3878da8b0ef688698fbdd28127b4468861f02,2020-01-28 19:55:00 UTC,heat,hold,685,689,689,ME,Scarborough,29,False,False,False,Gas
2,eb3eccd199425d9cde1444492ebe176a77fca82d,2020-01-16 19:15:00 UTC,heat,hold,718,721,721,ME,Westbrook,0,False,False,False,Gas
3,eb3eccd199425d9cde1444492ebe176a77fca82d,2020-01-17 17:20:00 UTC,heat,hold,704,704,704,ME,Westbrook,0,False,False,False,Gas
4,eb3eccd199425d9cde1444492ebe176a77fca82d,2020-01-16 18:30:00 UTC,heat,hold,714,721,721,ME,Westbrook,0,False,False,False,Gas
6,44e3878da8b0ef688698fbdd28127b4468861f02,2020-01-30 18:15:00 UTC,heat,hold,632,650,635,ME,Scarborough,29,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72728,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2020-01-12 17:10:00 UTC,heat,hold,699,740,740,ME,Windham,20,True,False,False,Gas
72729,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2020-01-19 13:10:00 UTC,heat,hold,698,740,740,ME,Windham,20,True,False,False,Gas
72730,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2020-01-19 17:40:00 UTC,heat,hold,731,740,740,ME,Windham,20,True,False,False,Gas
72731,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2020-01-19 13:00:00 UTC,heat,hold,695,740,740,ME,Windham,20,True,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/ME/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/ME-day/2021-jan-day-ME.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,db15333c03d900e5068f914b4bbe0929609c4884,2021-01-16 15:05:00 UTC,heat,hold,704,712,712,ME,Portland,15,False,False,False,Gas
1,eb3eccd199425d9cde1444492ebe176a77fca82d,2021-01-25 18:40:00 UTC,heat,hold,712,714,714,ME,Westbrook,0,False,False,False,Gas
2,2d3f19df9837542e28597777e3227078de997066,2021-01-10 15:55:00 UTC,heat,hold,671,679,679,ME,Windham,10,False,False,False,Gas
3,db15333c03d900e5068f914b4bbe0929609c4884,2021-01-29 17:25:00 UTC,heat,hold,703,712,712,ME,Portland,15,False,False,False,Gas
4,db15333c03d900e5068f914b4bbe0929609c4884,2021-01-27 19:25:00 UTC,heat,hold,686,692,692,ME,Portland,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50182,def0ec3e5e8faffc7af3af52828081d48494a02d,2021-01-31 12:15:00 UTC,heat,hold,659,680,680,ME,Presque Isle,10,False,False,False,Gas
50183,def0ec3e5e8faffc7af3af52828081d48494a02d,2021-01-31 12:45:00 UTC,heat,hold,666,680,680,ME,Presque Isle,10,False,False,False,Gas
50184,def0ec3e5e8faffc7af3af52828081d48494a02d,2021-01-31 13:20:00 UTC,heat,hold,672,680,680,ME,Presque Isle,10,False,False,False,Gas
50185,def0ec3e5e8faffc7af3af52828081d48494a02d,2021-01-10 13:20:00 UTC,heat,hold,640,680,680,ME,Presque Isle,10,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/ME/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/ME/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stacMEoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
ME_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/ME/jan/" + file)
    ME_jan = pd.concat([ME_jan, df])
    
ME_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,02a6a361c70d1052cbb1d7d80490d8326ae8e684,Jan,2017,heat,auto,Naples,618.355263,664.092105,662.434211,15.0,False,False,False
1,02a6a361c70d1052cbb1d7d80490d8326ae8e684,Jan,2017,heat,hold,Naples,638.916462,656.786241,638.945946,15.0,False,False,False
2,071d40a2ebc85623a1b5da94a1ee4af028115825,Jan,2017,heat,auto,North Yarmouth,647.962963,650.000000,650.000000,25.0,False,False,False
3,071d40a2ebc85623a1b5da94a1ee4af028115825,Jan,2017,heat,hold,North Yarmouth,685.066667,669.333333,670.333333,25.0,False,False,False
4,239e363dc2695713ba155ce9a713fdff9c1ba533,Jan,2017,heat,auto,Glenburn,705.727273,703.909091,703.090909,40.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
48,f1fae5f41b71e0d0f847b8cbd4efab65ce7ea28e,Jan,2021,auto,hold,Wells,631.977591,790.000000,634.285714,5.0,True,False,False
49,f7c3348165779a7256fc4759ae0e980e6120b30f,Jan,2021,heat,hold,Newport,666.931034,655.655172,655.655172,15.0,False,False,False
50,fc8c8f510d6307093df9024832c3c54b27416586,Jan,2021,heat,hold,Lisbon,646.007582,650.000000,650.000000,10.0,False,False,False
51,ff455f6c827dd3c854c66a6f09c6ba0dc0bc7d7b,Jan,2021,heat,hold,Portland,681.909091,680.000000,680.000000,15.0,False,False,False


In [34]:
ME_jan.to_csv("Scraper_Output/State_Month_Day/ME/ME_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/ME-day/2017-feb-day-ME.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,db15333c03d900e5068f914b4bbe0929609c4884,2017-02-04T13:00:00Z,heat,auto,680,750,676,ME,Portland,15,False,False,False,Gas
1,db15333c03d900e5068f914b4bbe0929609c4884,2017-02-04T12:40:00Z,heat,auto,681,750,676,ME,Portland,15,False,False,False,Gas
2,db15333c03d900e5068f914b4bbe0929609c4884,2017-02-04T12:55:00Z,heat,auto,681,750,676,ME,Portland,15,False,False,False,Gas
3,db15333c03d900e5068f914b4bbe0929609c4884,2017-02-04T12:45:00Z,heat,auto,681,750,676,ME,Portland,15,False,False,False,Gas
4,6ea13a6a8839c82c41cc8b0da4d0749b2070e49f,2017-02-03T14:30:00Z,heat,auto,676,735,663,ME,North Yarmouth,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11151,95d75afc1ea185ac792938fc8be853952bc72144,2017-02-25T15:40:00Z,heat,hold,682,680,680,ME,Cooper,25,False,False,False,Gas
11152,ae94ac840d4ff83ecaab9cbde2e0f7102bdd81ae,2017-02-10T12:20:00Z,heat,hold,666,680,680,ME,Portland,55,False,False,False,Gas
11153,ae94ac840d4ff83ecaab9cbde2e0f7102bdd81ae,2017-02-10T12:15:00Z,heat,hold,663,680,680,ME,Portland,55,False,False,False,Gas
11154,ae94ac840d4ff83ecaab9cbde2e0f7102bdd81ae,2017-02-10T12:55:00Z,heat,hold,673,680,680,ME,Portland,55,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/ME/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/ME-day/2018-feb-day-ME.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2018-02-16T17:10:00Z,heat,hold,769,663,663,ME,Westbrook,0,True,False,False,Gas
1,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2018-02-16T16:50:00Z,heat,hold,767,663,663,ME,Westbrook,0,True,False,False,Gas
2,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2018-02-16T13:50:00Z,heat,hold,706,663,663,ME,Westbrook,0,True,False,False,Gas
3,c5bc7c9568931d0b8979393514c99d545786ae72,2018-02-08T13:00:00Z,heat,hold,718,716,716,ME,Lewiston,90,False,False,False,Gas
4,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2018-02-16T18:00:00Z,heat,hold,748,663,663,ME,Westbrook,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20880,af68b7762e6793591d3284dde997c800c9f58566,2018-02-03T10:55:00Z,heat,hold,719,730,730,ME,Corinna,0,False,False,False,Gas
20881,af68b7762e6793591d3284dde997c800c9f58566,2018-02-11T13:25:00Z,heat,auto,730,730,730,ME,Corinna,0,False,False,False,Gas
20882,af68b7762e6793591d3284dde997c800c9f58566,2018-02-03T10:40:00Z,heat,hold,740,730,730,ME,Corinna,0,False,False,False,Gas
20883,af68b7762e6793591d3284dde997c800c9f58566,2018-02-11T11:45:00Z,heat,auto,741,730,730,ME,Corinna,0,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/ME/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/ME-day/2019-feb-day-ME.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5c9e2ffae2d25c72f4fa1ed17fd9694fa07de261,2019-02-16 17:30:00 UTC,heat,auto,679,780,682,ME,Glenburn,40,False,False,False,Gas
1,02a6a361c70d1052cbb1d7d80490d8326ae8e684,2019-02-02 19:00:00 UTC,heat,hold,639,659,653,ME,Naples,15,False,False,False,Gas
2,b4cd14cb69ad26849775c10ec8d78686544393f3,2019-02-10 17:55:00 UTC,heat,hold,663,661,661,ME,Newport,15,False,False,False,Gas
3,c491ad48441073aa08a818dbfe01ac8ad2a83838,2019-02-18 13:00:00 UTC,heat,hold,677,770,696,ME,South Portland,75,False,False,False,Gas
4,4ca101ab97c9e2b36a6de942d8dfa00a87b9159d,2019-02-08 17:55:00 UTC,heat,auto,667,652,652,ME,Brunswick,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50413,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2019-02-08 15:10:00 UTC,heat,hold,739,740,740,ME,Windham,20,True,False,False,Gas
50414,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2019-02-01 12:10:00 UTC,heat,hold,697,740,740,ME,Windham,20,True,False,False,Gas
50415,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2019-02-01 12:15:00 UTC,heat,hold,699,740,740,ME,Windham,20,True,False,False,Gas
50416,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2019-02-03 15:15:00 UTC,heat,hold,721,740,740,ME,Windham,20,True,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/ME/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/ME-day/2020-feb-day-ME.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2020-02-17 19:20:00 UTC,heat,hold,652,721,721,ME,Westbrook,0,True,False,False,Gas
1,db15333c03d900e5068f914b4bbe0929609c4884,2020-02-15 18:40:00 UTC,heat,hold,700,702,702,ME,Portland,15,False,False,False,Gas
2,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2020-02-17 17:55:00 UTC,heat,hold,658,721,721,ME,Westbrook,0,True,False,False,Gas
3,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2020-02-17 19:00:00 UTC,heat,hold,655,721,721,ME,Westbrook,0,True,False,False,Gas
4,db15333c03d900e5068f914b4bbe0929609c4884,2020-02-28 19:45:00 UTC,heat,hold,698,702,702,ME,Portland,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72833,def0ec3e5e8faffc7af3af52828081d48494a02d,2020-02-23 13:05:00 UTC,heat,hold,672,680,680,ME,Presque Isle,10,False,False,False,Gas
72834,def0ec3e5e8faffc7af3af52828081d48494a02d,2020-02-24 12:25:00 UTC,heat,hold,659,680,680,ME,Presque Isle,10,False,False,False,Gas
72835,def0ec3e5e8faffc7af3af52828081d48494a02d,2020-02-23 12:35:00 UTC,heat,hold,670,680,680,ME,Presque Isle,10,False,False,False,Gas
72836,def0ec3e5e8faffc7af3af52828081d48494a02d,2020-02-23 13:50:00 UTC,heat,hold,677,680,680,ME,Presque Isle,10,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/ME/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/ME-day/2021-feb-day-ME.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,db15333c03d900e5068f914b4bbe0929609c4884,2021-02-07 17:10:00 UTC,heat,hold,707,712,712,ME,Portland,15,False,False,False,Gas
1,c5bc7c9568931d0b8979393514c99d545786ae72,2021-02-15 13:00:00 UTC,heat,hold,701,724,697,ME,Lewiston,90,False,False,False,Gas
2,db15333c03d900e5068f914b4bbe0929609c4884,2021-02-18 16:55:00 UTC,heat,hold,701,702,702,ME,Portland,15,False,False,False,Gas
3,db15333c03d900e5068f914b4bbe0929609c4884,2021-02-07 16:45:00 UTC,heat,hold,709,712,712,ME,Portland,15,False,False,False,Gas
4,db15333c03d900e5068f914b4bbe0929609c4884,2021-02-12 14:25:00 UTC,heat,hold,693,692,692,ME,Portland,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49746,36cf8dfe4915f94e009b168cf31efd7d81679e4e,2021-02-26 19:55:00 UTC,heat,hold,662,660,660,ME,Fryeburg,120,False,False,False,Gas
49747,36cf8dfe4915f94e009b168cf31efd7d81679e4e,2021-02-16 18:00:00 UTC,heat,hold,659,660,660,ME,Fryeburg,120,False,False,False,Gas
49748,36cf8dfe4915f94e009b168cf31efd7d81679e4e,2021-02-16 18:15:00 UTC,heat,hold,660,660,660,ME,Fryeburg,120,False,False,False,Gas
49749,36cf8dfe4915f94e009b168cf31efd7d81679e4e,2021-02-20 19:20:00 UTC,heat,hold,669,660,660,ME,Fryeburg,120,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/ME/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/ME/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stacMEoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
ME_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/ME/feb/" + file)
    ME_feb = pd.concat([ME_feb, df])
    
ME_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,02a6a361c70d1052cbb1d7d80490d8326ae8e684,feb,2017,heat,hold,Naples,647.626667,652.786667,641.913333,15.0,False,False,False
1,071d40a2ebc85623a1b5da94a1ee4af028115825,feb,2017,heat,hold,North Yarmouth,680.706897,674.681034,674.827586,25.0,False,False,False
2,239e363dc2695713ba155ce9a713fdff9c1ba533,feb,2017,heat,auto,Glenburn,719.812500,780.000000,720.000000,40.0,False,False,False
3,4c07c4cdb374e0cf0049e8af72014378ff2ac2c8,feb,2017,heat,hold,Portland,659.790123,659.629630,659.629630,55.0,False,False,False
4,590487e13b23b41e232cab63f588978494880281,feb,2017,heat,auto,Portland,655.823529,750.000000,654.411765,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
52,def0ec3e5e8faffc7af3af52828081d48494a02d,feb,2021,heat,hold,Presque Isle,641.557087,657.978346,645.273622,10.0,False,False,False
53,e002570d883bdc948f492a07fe24a29380bfd71d,feb,2021,heat,hold,Saco,690.641509,695.283019,695.283019,0.0,False,False,False
54,e4e047a743392addccea6117b9d1731b70406478,feb,2021,heat,hold,Fryeburg,676.223684,681.842105,681.828947,7.0,False,False,False
55,f1fae5f41b71e0d0f847b8cbd4efab65ce7ea28e,feb,2021,auto,hold,Wells,639.379032,790.000000,640.000000,5.0,True,False,False


In [67]:
ME_feb.to_csv("Scraper_Output/State_Month_Day/ME/ME_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/ME-day/2017-jun-day-ME.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6ea13a6a8839c82c41cc8b0da4d0749b2070e49f,2017-06-06 16:45:00 UTC,heat,hold,684,699,686,ME,North Yarmouth,0,False,False,False,Gas
1,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2017-06-23 16:45:00 UTC,heat,hold,731,766,724,ME,Greenville,35,False,False,False,Gas
1178,918b620a2e58d39ca3a3293448661334e4c0c905,2017-06-08 19:05:00 UTC,heat,auto,752,650,630,ME,Cooper,35,False,False,False,Gas
1179,918b620a2e58d39ca3a3293448661334e4c0c905,2017-06-15 15:10:00 UTC,heat,auto,689,650,630,ME,Cooper,35,False,False,False,Gas
1180,918b620a2e58d39ca3a3293448661334e4c0c905,2017-06-06 14:20:00 UTC,heat,auto,639,650,630,ME,Cooper,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10889,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2017-06-23 12:30:00 UTC,heat,hold,707,710,710,ME,Greenville,35,False,False,False,Gas
10890,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2017-06-23 11:40:00 UTC,heat,hold,699,710,710,ME,Greenville,35,False,False,False,Gas
10891,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2017-06-23 12:00:00 UTC,heat,hold,707,710,710,ME,Greenville,35,False,False,False,Gas
10892,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2017-06-23 11:25:00 UTC,heat,hold,684,710,710,ME,Greenville,35,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/ME/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/ME-day/2018-jun-day-ME.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,0de75d1de6a14bfa2a7ac33b197d9f2b68cc41c8,2018-06-12T12:25:00Z,heat,hold,678,672,662,ME,Presque Isle,10,False,False,False,Gas
2,0de75d1de6a14bfa2a7ac33b197d9f2b68cc41c8,2018-06-10T15:00:00Z,heat,hold,670,660,625,ME,Presque Isle,10,False,False,False,Gas
3,0de75d1de6a14bfa2a7ac33b197d9f2b68cc41c8,2018-06-26T12:05:00Z,heat,hold,683,664,649,ME,Presque Isle,10,False,False,False,Gas
4,0de75d1de6a14bfa2a7ac33b197d9f2b68cc41c8,2018-06-08T13:25:00Z,heat,hold,673,665,655,ME,Presque Isle,10,False,False,False,Gas
5,0de75d1de6a14bfa2a7ac33b197d9f2b68cc41c8,2018-06-15T12:40:00Z,heat,hold,671,659,651,ME,Presque Isle,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26165,6ea13a6a8839c82c41cc8b0da4d0749b2070e49f,2018-06-04T14:35:00Z,heat,hold,666,680,680,ME,North Yarmouth,0,False,False,False,Gas
26166,6ea13a6a8839c82c41cc8b0da4d0749b2070e49f,2018-06-04T14:40:00Z,heat,hold,669,680,680,ME,North Yarmouth,0,False,False,False,Gas
26167,6ea13a6a8839c82c41cc8b0da4d0749b2070e49f,2018-06-04T14:55:00Z,heat,hold,676,680,680,ME,North Yarmouth,0,False,False,False,Gas
26168,6ea13a6a8839c82c41cc8b0da4d0749b2070e49f,2018-06-04T14:00:00Z,heat,hold,660,680,680,ME,North Yarmouth,0,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/ME/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/ME-day/2019-jun-day-ME.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,db15333c03d900e5068f914b4bbe0929609c4884,2019-06-07 18:45:00 UTC,heat,hold,721,695,695,ME,Portland,15,False,False,False,Gas
1,54c88af9f2ad9dcd5c2e651aa030eb01babc196c,2019-06-17 14:40:00 UTC,heat,hold,688,728,714,ME,Yarmouth,99,True,False,False,Gas
2,db15333c03d900e5068f914b4bbe0929609c4884,2019-06-30 13:35:00 UTC,cool,auto,730,770,695,ME,Portland,15,False,False,False,Gas
3,db15333c03d900e5068f914b4bbe0929609c4884,2019-06-07 16:05:00 UTC,heat,hold,705,695,695,ME,Portland,15,False,False,False,Gas
4,db15333c03d900e5068f914b4bbe0929609c4884,2019-06-03 19:45:00 UTC,heat,hold,682,655,655,ME,Portland,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62140,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2019-06-09 17:05:00 UTC,heat,hold,728,680,680,ME,Windham,20,True,False,False,Gas
62141,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2019-06-09 15:35:00 UTC,heat,hold,705,680,680,ME,Windham,20,True,False,False,Gas
62142,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2019-06-09 19:25:00 UTC,heat,hold,753,680,680,ME,Windham,20,True,False,False,Gas
62143,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2019-06-09 16:25:00 UTC,heat,hold,718,680,680,ME,Windham,20,True,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/ME/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/ME-day/2020-jun-day-ME.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,def0ec3e5e8faffc7af3af52828081d48494a02d,2020-06-06 12:50:00 UTC,heat,hold,678,680,680,ME,Presque Isle,10,False,False,False,Gas
1,def0ec3e5e8faffc7af3af52828081d48494a02d,2020-06-09 10:35:00 UTC,heat,hold,675,680,680,ME,Presque Isle,10,False,False,False,Gas
2,def0ec3e5e8faffc7af3af52828081d48494a02d,2020-06-09 10:25:00 UTC,heat,hold,669,680,680,ME,Presque Isle,10,False,False,False,Gas
3,def0ec3e5e8faffc7af3af52828081d48494a02d,2020-06-06 12:45:00 UTC,heat,hold,678,680,680,ME,Presque Isle,10,False,False,False,Gas
4,def0ec3e5e8faffc7af3af52828081d48494a02d,2020-06-07 11:10:00 UTC,heat,hold,678,680,680,ME,Presque Isle,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68630,def0ec3e5e8faffc7af3af52828081d48494a02d,2020-06-06 12:30:00 UTC,heat,hold,675,680,680,ME,Presque Isle,10,False,False,False,Gas
68631,def0ec3e5e8faffc7af3af52828081d48494a02d,2020-06-07 12:15:00 UTC,heat,hold,682,680,680,ME,Presque Isle,10,False,False,False,Gas
68632,def0ec3e5e8faffc7af3af52828081d48494a02d,2020-06-07 11:55:00 UTC,heat,hold,674,680,680,ME,Presque Isle,10,False,False,False,Gas
68633,def0ec3e5e8faffc7af3af52828081d48494a02d,2020-06-07 11:40:00 UTC,heat,hold,677,680,680,ME,Presque Isle,10,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/ME/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/ME-day/2021-jun-day-ME.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2d3f19df9837542e28597777e3227078de997066,2021-06-04 09:05:00 UTC,cool,hold,697,719,719,ME,Windham,10,False,False,False,Gas
1,db15333c03d900e5068f914b4bbe0929609c4884,2021-06-20 19:25:00 UTC,cool,hold,742,742,742,ME,Portland,15,False,False,False,Gas
2,2d3f19df9837542e28597777e3227078de997066,2021-06-04 10:20:00 UTC,cool,hold,697,719,719,ME,Windham,10,False,False,False,Gas
3,883b41097dd9e03f592ed7428b08809727f5fb29,2021-06-03 16:25:00 UTC,cool,hold,729,792,657,ME,Raymond,39,False,False,True,Electric
4,db15333c03d900e5068f914b4bbe0929609c4884,2021-06-20 16:05:00 UTC,cool,hold,746,744,744,ME,Portland,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38575,def0ec3e5e8faffc7af3af52828081d48494a02d,2021-06-01 09:20:00 UTC,heat,hold,676,680,680,ME,Presque Isle,10,False,False,False,Gas
38576,def0ec3e5e8faffc7af3af52828081d48494a02d,2021-06-01 10:00:00 UTC,heat,hold,677,680,680,ME,Presque Isle,10,False,False,False,Gas
38577,def0ec3e5e8faffc7af3af52828081d48494a02d,2021-06-01 10:05:00 UTC,heat,hold,676,680,680,ME,Presque Isle,10,False,False,False,Gas
38578,def0ec3e5e8faffc7af3af52828081d48494a02d,2021-06-01 09:40:00 UTC,heat,hold,681,680,680,ME,Presque Isle,10,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/ME/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/ME/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stacMEoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
ME_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/ME/jun/" + file)
    ME_jun = pd.concat([ME_jun, df])
    
ME_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,06322bf1e7bd15eb760e1cc07c01525c4fd679bc,jun,2017,heat,hold,Harpswell,697.510915,669.042821,669.042821,5.0,False,False,False
1,071d40a2ebc85623a1b5da94a1ee4af028115825,jun,2017,heat,auto,North Yarmouth,707.791667,670.000000,670.000000,25.0,False,False,False
2,67f53f5424fc9069ec1785ebc1b3f47de39dbbe7,jun,2017,heat,hold,Harpswell,649.454545,679.727273,679.272727,5.0,False,False,False
3,6ea13a6a8839c82c41cc8b0da4d0749b2070e49f,jun,2017,heat,hold,North Yarmouth,682.786325,688.965812,687.965812,0.0,False,False,False
4,918b620a2e58d39ca3a3293448661334e4c0c905,jun,2017,heat,auto,Cooper,701.156429,650.000000,630.000000,35.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
25,d103d7c927baf49b25a1ed7d30e0e0c795359a45,jun,2021,heat,hold,Sanford,692.378855,759.000000,629.000000,0.0,True,False,False
26,d49970b6290cf340b7bc1b1270843d53fdc2d1b9,jun,2021,heat,hold,Windham,729.262500,730.350000,730.350000,20.0,False,False,False
27,db15333c03d900e5068f914b4bbe0929609c4884,jun,2021,cool,hold,Portland,734.126214,748.524272,748.524272,15.0,False,False,False
28,def0ec3e5e8faffc7af3af52828081d48494a02d,jun,2021,heat,hold,Presque Isle,683.437500,686.020833,686.020833,10.0,False,False,False


In [100]:
ME_jun.to_csv("Scraper_Output/State_Month_Day/ME/ME_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/ME-day/2017-jul-day-ME.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,db15333c03d900e5068f914b4bbe0929609c4884,2017-07-09T16:40:00Z,auto,auto,719,775,725,ME,Portland,15,False,False,False,Gas
1,db15333c03d900e5068f914b4bbe0929609c4884,2017-07-09T17:25:00Z,auto,auto,735,775,725,ME,Portland,15,False,False,False,Gas
2,db15333c03d900e5068f914b4bbe0929609c4884,2017-07-09T16:25:00Z,auto,hold,719,735,645,ME,Portland,15,False,False,False,Gas
3,db15333c03d900e5068f914b4bbe0929609c4884,2017-07-09T18:55:00Z,auto,auto,735,775,725,ME,Portland,15,False,False,False,Gas
4,db15333c03d900e5068f914b4bbe0929609c4884,2017-07-09T16:10:00Z,auto,hold,718,735,645,ME,Portland,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4533,590487e13b23b41e232cab63f588978494880281,2017-07-09T17:50:00Z,auto,auto,749,750,700,ME,Portland,15,False,False,False,Gas
4534,590487e13b23b41e232cab63f588978494880281,2017-07-09T17:55:00Z,auto,auto,749,750,700,ME,Portland,15,False,False,False,Gas
4535,590487e13b23b41e232cab63f588978494880281,2017-07-09T19:40:00Z,auto,auto,761,750,700,ME,Portland,15,False,False,False,Gas
4536,590487e13b23b41e232cab63f588978494880281,2017-07-09T19:50:00Z,auto,auto,758,750,700,ME,Portland,15,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/ME/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/ME-day/2018-jul-day-ME.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,db15333c03d900e5068f914b4bbe0929609c4884,2018-07-31T16:10:00Z,auto,hold,732,725,675,ME,Portland,15,False,False,False,Gas
1,db15333c03d900e5068f914b4bbe0929609c4884,2018-07-31T17:05:00Z,auto,hold,732,725,675,ME,Portland,15,False,False,False,Gas
2,db15333c03d900e5068f914b4bbe0929609c4884,2018-07-11T17:25:00Z,auto,hold,748,745,675,ME,Portland,15,False,False,False,Gas
3,db15333c03d900e5068f914b4bbe0929609c4884,2018-07-31T15:15:00Z,auto,hold,730,725,675,ME,Portland,15,False,False,False,Gas
4,db15333c03d900e5068f914b4bbe0929609c4884,2018-07-31T17:35:00Z,auto,hold,732,725,675,ME,Portland,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28207,c491ad48441073aa08a818dbfe01ac8ad2a83838,2018-07-07T18:05:00Z,heat,auto,731,770,680,ME,South Portland,75,False,False,False,Gas
28208,c491ad48441073aa08a818dbfe01ac8ad2a83838,2018-07-08T18:40:00Z,heat,auto,756,770,680,ME,South Portland,75,False,False,False,Gas
28209,c491ad48441073aa08a818dbfe01ac8ad2a83838,2018-07-07T17:35:00Z,heat,auto,742,770,680,ME,South Portland,75,False,False,False,Gas
28210,c491ad48441073aa08a818dbfe01ac8ad2a83838,2018-07-07T12:15:00Z,heat,auto,729,770,680,ME,South Portland,75,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/ME/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/ME-day/2019-jul-day-ME.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,883b41097dd9e03f592ed7428b08809727f5fb29,2019-07-09 12:15:00 UTC,cool,auto,723,750,750,ME,Raymond,39,False,False,True,Electric
1,883b41097dd9e03f592ed7428b08809727f5fb29,2019-07-10 10:40:00 UTC,cool,auto,738,770,770,ME,Raymond,39,False,False,True,Electric
2,883b41097dd9e03f592ed7428b08809727f5fb29,2019-07-10 11:20:00 UTC,cool,auto,737,770,770,ME,Raymond,39,False,False,True,Electric
3,883b41097dd9e03f592ed7428b08809727f5fb29,2019-07-09 19:35:00 UTC,cool,auto,782,750,750,ME,Raymond,39,False,False,True,Electric
4,883b41097dd9e03f592ed7428b08809727f5fb29,2019-07-17 19:55:00 UTC,cool,auto,757,750,750,ME,Raymond,39,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68343,883b41097dd9e03f592ed7428b08809727f5fb29,2019-07-13 18:05:00 UTC,cool,auto,784,740,740,ME,Raymond,39,False,False,True,Electric
68344,590487e13b23b41e232cab63f588978494880281,2019-07-12 18:50:00 UTC,cool,hold,746,740,740,ME,Portland,15,False,False,False,Gas
68345,590487e13b23b41e232cab63f588978494880281,2019-07-12 18:10:00 UTC,cool,hold,736,740,740,ME,Portland,15,False,False,False,Gas
68346,db15333c03d900e5068f914b4bbe0929609c4884,2019-07-14 15:05:00 UTC,cool,hold,744,740,740,ME,Portland,15,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/ME/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/ME-day/2020-jul-day-ME.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2d3f19df9837542e28597777e3227078de997066,2020-07-22 09:45:00 UTC,cool,hold,709,750,750,ME,Windham,10,False,False,False,Gas
1,2d3f19df9837542e28597777e3227078de997066,2020-07-22 09:30:00 UTC,cool,hold,703,750,750,ME,Windham,10,False,False,False,Gas
2,db15333c03d900e5068f914b4bbe0929609c4884,2020-07-20 16:05:00 UTC,cool,hold,725,722,722,ME,Portland,15,False,False,False,Gas
3,2d3f19df9837542e28597777e3227078de997066,2020-07-22 12:40:00 UTC,cool,hold,711,750,750,ME,Windham,10,False,False,False,Gas
4,2d3f19df9837542e28597777e3227078de997066,2020-07-24 11:05:00 UTC,cool,auto,726,750,750,ME,Windham,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71198,c8c95ff0b34a5e7ba4c7d9cc3e0b7d2bd27cb9dd,2020-07-03 11:25:00 UTC,cool,hold,713,740,740,ME,Falmouth,10,True,False,False,Gas
71199,c8c95ff0b34a5e7ba4c7d9cc3e0b7d2bd27cb9dd,2020-07-19 19:40:00 UTC,cool,auto,742,740,740,ME,Falmouth,10,True,False,False,Gas
71200,c8c95ff0b34a5e7ba4c7d9cc3e0b7d2bd27cb9dd,2020-07-19 17:30:00 UTC,cool,auto,734,740,740,ME,Falmouth,10,True,False,False,Gas
71201,c8c95ff0b34a5e7ba4c7d9cc3e0b7d2bd27cb9dd,2020-07-05 13:40:00 UTC,cool,hold,718,740,740,ME,Falmouth,10,True,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/ME/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/ME-day/2021-jul-day-ME.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,db15333c03d900e5068f914b4bbe0929609c4884,2021-07-20T18:20:00Z,cool,hold,739,742,742,ME,Portland,15,False,False,False,Gas
1,db15333c03d900e5068f914b4bbe0929609c4884,2021-07-20T16:20:00Z,cool,hold,709,742,742,ME,Portland,15,False,False,False,Gas
2,2d3f19df9837542e28597777e3227078de997066,2021-07-23T09:00:00Z,cool,hold,724,729,729,ME,Windham,10,False,False,False,Gas
3,d49970b6290cf340b7bc1b1270843d53fdc2d1b9,2021-07-08T19:30:00Z,heat,hold,767,770,770,ME,Windham,20,False,False,False,Gas
4,db15333c03d900e5068f914b4bbe0929609c4884,2021-07-01T14:15:00Z,cool,hold,726,722,722,ME,Portland,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31453,a98a991a67b9fd6aa265f042bb45f80797c49746,2021-07-03T12:20:00Z,heat,hold,742,740,740,ME,Scarborough,100,True,False,False,Gas
31454,a98a991a67b9fd6aa265f042bb45f80797c49746,2021-07-03T12:10:00Z,heat,hold,741,740,740,ME,Scarborough,100,True,False,False,Gas
31455,a98a991a67b9fd6aa265f042bb45f80797c49746,2021-07-03T14:15:00Z,heat,hold,749,740,740,ME,Scarborough,100,True,False,False,Gas
31456,a98a991a67b9fd6aa265f042bb45f80797c49746,2021-07-03T11:05:00Z,heat,hold,737,740,740,ME,Scarborough,100,True,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/ME/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/ME/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stacMEoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
ME_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/ME/jul/" + file)
    ME_jul = pd.concat([ME_jul, df])
    
ME_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,590487e13b23b41e232cab63f588978494880281,jul,2017,auto,auto,Portland,751.950000,750.000000,700.000000,15.0,False,False,False
1,590487e13b23b41e232cab63f588978494880281,jul,2017,auto,hold,Portland,738.333333,745.000000,695.000000,15.0,False,False,False
2,a86f074cd58d2ce47d145a5ddf4492c1acca3635,jul,2017,heat,auto,South Portland,730.862745,780.000000,650.000000,85.0,False,False,False
3,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,jul,2017,heat,hold,Greenville,721.246835,663.481013,658.943038,35.0,False,False,False
4,b7c214e6715b7136e0cf0fbdb5b83337dda9032b,jul,2017,auto,auto,Eliot,717.784578,726.036720,640.000000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16,d103d7c927baf49b25a1ed7d30e0e0c795359a45,jul,2021,cool,hold,Sanford,719.109810,736.999024,719.554905,0.0,True,False,False
17,d103d7c927baf49b25a1ed7d30e0e0c795359a45,jul,2021,heat,hold,Sanford,708.197368,655.592105,653.881579,0.0,True,False,False
18,d49970b6290cf340b7bc1b1270843d53fdc2d1b9,jul,2021,heat,hold,Windham,758.190476,767.190476,767.190476,20.0,False,False,False
19,db15333c03d900e5068f914b4bbe0929609c4884,jul,2021,cool,hold,Portland,718.267568,720.029730,719.551351,15.0,False,False,False


In [133]:
ME_jul.to_csv("Scraper_Output/State_Month_Day/ME/ME_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/ME-day/2017-aug-day-ME.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c5bc7c9568931d0b8979393514c99d545786ae72,2017-08-25 15:30:00 UTC,heat,hold,713,750,750,ME,Lewiston,90,False,False,False,Gas
1,c5bc7c9568931d0b8979393514c99d545786ae72,2017-08-25 15:35:00 UTC,heat,hold,715,750,750,ME,Lewiston,90,False,False,False,Gas
2,c5bc7c9568931d0b8979393514c99d545786ae72,2017-08-25 15:40:00 UTC,heat,hold,717,750,750,ME,Lewiston,90,False,False,False,Gas
3,c5bc7c9568931d0b8979393514c99d545786ae72,2017-08-25 15:25:00 UTC,heat,hold,711,750,750,ME,Lewiston,90,False,False,False,Gas
4,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2017-08-31 12:35:00 UTC,heat,hold,699,655,628,ME,Greenville,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6235,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2017-08-03 19:00:00 UTC,heat,hold,732,650,650,ME,Greenville,35,False,False,False,Gas
6236,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2017-08-03 18:30:00 UTC,heat,hold,747,650,650,ME,Greenville,35,False,False,False,Gas
6237,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2017-08-03 12:25:00 UTC,heat,hold,715,650,650,ME,Greenville,35,False,False,False,Gas
6238,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2017-08-03 15:15:00 UTC,heat,hold,735,650,650,ME,Greenville,35,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/ME/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/ME-day/2018-aug-day-ME.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b2d9a7c254d8f9954e32533676d156d263cecd5a,2018-08-18T16:35:00Z,auto,auto,692,690,640,ME,Falmouth,10,True,False,False,Gas
1,b2d9a7c254d8f9954e32533676d156d263cecd5a,2018-08-17T12:30:00Z,auto,hold,681,690,640,ME,Falmouth,10,True,False,False,Gas
2,b2d9a7c254d8f9954e32533676d156d263cecd5a,2018-08-05T19:45:00Z,auto,auto,692,690,640,ME,Falmouth,10,True,False,False,Gas
3,b2d9a7c254d8f9954e32533676d156d263cecd5a,2018-08-17T17:35:00Z,auto,hold,709,710,640,ME,Falmouth,10,True,False,False,Gas
4,b2d9a7c254d8f9954e32533676d156d263cecd5a,2018-08-30T12:35:00Z,auto,hold,694,690,640,ME,Falmouth,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23647,590487e13b23b41e232cab63f588978494880281,2018-08-06T18:40:00Z,auto,auto,803,730,660,ME,Portland,15,False,False,False,Gas
23648,590487e13b23b41e232cab63f588978494880281,2018-08-06T19:45:00Z,auto,auto,812,730,660,ME,Portland,15,False,False,False,Gas
23649,590487e13b23b41e232cab63f588978494880281,2018-08-06T19:35:00Z,auto,auto,811,730,660,ME,Portland,15,False,False,False,Gas
23650,590487e13b23b41e232cab63f588978494880281,2018-08-06T19:20:00Z,auto,auto,809,730,660,ME,Portland,15,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/ME/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/ME-day/2019-aug-day-ME.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,883b41097dd9e03f592ed7428b08809727f5fb29,2019-08-11 18:30:00 UTC,cool,auto,786,800,790,ME,Raymond,39,False,False,True,Electric
1,883b41097dd9e03f592ed7428b08809727f5fb29,2019-08-12 16:50:00 UTC,cool,auto,796,745,745,ME,Raymond,39,False,False,True,Electric
2,883b41097dd9e03f592ed7428b08809727f5fb29,2019-08-09 15:00:00 UTC,cool,auto,755,750,750,ME,Raymond,39,False,False,True,Electric
3,883b41097dd9e03f592ed7428b08809727f5fb29,2019-08-06 19:10:00 UTC,cool,auto,779,750,750,ME,Raymond,39,False,False,True,Electric
4,883b41097dd9e03f592ed7428b08809727f5fb29,2019-08-13 14:50:00 UTC,cool,auto,732,780,780,ME,Raymond,39,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68813,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2019-08-16 13:05:00 UTC,heat,hold,736,740,740,ME,Greenville,35,False,False,False,Gas
68814,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2019-08-16 12:20:00 UTC,heat,hold,741,740,740,ME,Greenville,35,False,False,False,Gas
68815,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2019-08-16 12:35:00 UTC,heat,hold,739,740,740,ME,Greenville,35,False,False,False,Gas
68816,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,2019-08-16 12:50:00 UTC,heat,hold,738,740,740,ME,Greenville,35,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/ME/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/ME-day/2020-aug-day-ME.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7aed90f1acc624bf0fc1c76698cf32d7e2dbf36e,2020-08-11 12:15:00 UTC,auto,hold,713,720,660,ME,York,5,False,False,False,Gas
1,7aed90f1acc624bf0fc1c76698cf32d7e2dbf36e,2020-08-08 11:30:00 UTC,auto,hold,700,720,660,ME,York,5,False,False,False,Gas
2,7aed90f1acc624bf0fc1c76698cf32d7e2dbf36e,2020-08-09 14:25:00 UTC,auto,hold,720,720,660,ME,York,5,False,False,False,Gas
3,7aed90f1acc624bf0fc1c76698cf32d7e2dbf36e,2020-08-12 11:50:00 UTC,auto,hold,713,720,660,ME,York,5,False,False,False,Gas
4,7aed90f1acc624bf0fc1c76698cf32d7e2dbf36e,2020-08-08 14:20:00 UTC,auto,hold,710,720,660,ME,York,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
70146,2d3f19df9837542e28597777e3227078de997066,2020-08-01 10:15:00 UTC,cool,auto,721,740,740,ME,Windham,10,False,False,False,Gas
70147,2d3f19df9837542e28597777e3227078de997066,2020-08-01 07:50:00 UTC,cool,auto,727,740,740,ME,Windham,10,False,False,False,Gas
70148,2d3f19df9837542e28597777e3227078de997066,2020-08-01 07:10:00 UTC,cool,auto,726,740,740,ME,Windham,10,False,False,False,Gas
70149,2d3f19df9837542e28597777e3227078de997066,2020-08-01 11:50:00 UTC,cool,auto,727,740,740,ME,Windham,10,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/ME/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/ME/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stacMEoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
ME_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/ME/aug/" + file)
    ME_aug = pd.concat([ME_aug, df])
    
ME_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,a86f074cd58d2ce47d145a5ddf4492c1acca3635,aug,2017,heat,auto,South Portland,727.166667,780.000000,640.000000,85.0,False,False,False
1,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,aug,2017,heat,auto,Greenville,719.458333,730.416667,729.333333,35.0,False,False,False
2,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,aug,2017,heat,hold,Greenville,709.020612,656.490027,642.978723,35.0,False,False,False
3,c5bc7c9568931d0b8979393514c99d545786ae72,aug,2017,heat,hold,Lewiston,713.200000,750.000000,750.000000,90.0,False,False,False
4,db15333c03d900e5068f914b4bbe0929609c4884,aug,2017,cool,auto,Portland,709.057018,710.000000,650.000000,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
20,d103d7c927baf49b25a1ed7d30e0e0c795359a45,aug,2020,cool,hold,Sanford,734.545979,747.478604,719.971472,0.0,True,False,False
21,db15333c03d900e5068f914b4bbe0929609c4884,aug,2020,cool,auto,Portland,725.932692,720.173077,719.769231,15.0,False,False,False
22,db15333c03d900e5068f914b4bbe0929609c4884,aug,2020,cool,hold,Portland,730.757009,721.205607,721.084112,15.0,False,False,False
23,e002570d883bdc948f492a07fe24a29380bfd71d,aug,2020,heat,hold,Saco,786.642857,650.000000,620.000000,0.0,False,False,False


In [160]:
ME_aug.to_csv("Scraper_Output/State_Month_Day/ME/ME_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/ME-day/2017-dec-day-ME.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,0b9bc48af842c50d1c433ab2aebe001d355ba6df,2017-12-26T16:10:00Z,heat,hold,714,741,713,ME,Freeport,0,False,False,False,Gas
7,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2017-12-22T14:55:00Z,heat,hold,727,736,736,ME,Westbrook,0,True,False,False,Gas
8,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2017-12-22T13:10:00Z,heat,hold,698,736,736,ME,Westbrook,0,True,False,False,Gas
9,9931e4537a3f0c9b6eb4dc811cb00abd4edde7d0,2017-12-22T16:40:00Z,heat,hold,727,736,736,ME,Westbrook,0,True,False,False,Gas
15,0b9bc48af842c50d1c433ab2aebe001d355ba6df,2017-12-08T16:00:00Z,heat,hold,703,745,705,ME,Freeport,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25112,af68b7762e6793591d3284dde997c800c9f58566,2017-12-25T19:30:00Z,heat,hold,725,740,740,ME,Corinna,0,False,False,False,Gas
25113,af68b7762e6793591d3284dde997c800c9f58566,2017-12-14T14:20:00Z,heat,auto,717,740,740,ME,Corinna,0,False,False,False,Gas
25114,af68b7762e6793591d3284dde997c800c9f58566,2017-12-25T18:55:00Z,heat,hold,707,740,740,ME,Corinna,0,False,False,False,Gas
25115,af68b7762e6793591d3284dde997c800c9f58566,2017-12-23T12:20:00Z,heat,auto,742,740,740,ME,Corinna,0,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/ME/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/ME-day/2018-dec-day-ME.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,99a8ed3c56af8c5f4408cf3bb2497456705f6b4f,2018-12-09 16:20:00 UTC,heat,auto,653,650,635,ME,Madawaska,60,False,False,False,Gas
1,c5bc7c9568931d0b8979393514c99d545786ae72,2018-12-30 14:25:00 UTC,heat,hold,695,774,657,ME,Lewiston,90,False,False,False,Gas
2,99a8ed3c56af8c5f4408cf3bb2497456705f6b4f,2018-12-09 16:40:00 UTC,heat,auto,654,650,635,ME,Madawaska,60,False,False,False,Gas
3,99a8ed3c56af8c5f4408cf3bb2497456705f6b4f,2018-12-09 16:45:00 UTC,heat,auto,655,650,635,ME,Madawaska,60,False,False,False,Gas
4,99a8ed3c56af8c5f4408cf3bb2497456705f6b4f,2018-12-09 16:55:00 UTC,heat,auto,656,650,635,ME,Madawaska,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57337,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2018-12-28 14:40:00 UTC,heat,hold,749,740,740,ME,Windham,20,True,False,False,Gas
57338,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2018-12-28 13:15:00 UTC,heat,hold,728,740,740,ME,Windham,20,True,False,False,Gas
57339,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2018-12-23 13:15:00 UTC,heat,hold,745,740,740,ME,Windham,20,True,False,False,Gas
57340,bbb876616d411aa75fe3a5f3ddc0bae3e24c8f32,2018-12-28 15:00:00 UTC,heat,hold,740,740,740,ME,Windham,20,True,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/ME/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/ME-day/2019-dec-day-ME.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,54c88af9f2ad9dcd5c2e651aa030eb01babc196c,2019-12-06 14:35:00 UTC,heat,auto,689,691,691,ME,Yarmouth,99,True,False,False,Gas
1,54c88af9f2ad9dcd5c2e651aa030eb01babc196c,2019-12-06 16:15:00 UTC,heat,auto,705,703,703,ME,Yarmouth,99,True,False,False,Gas
2,54c88af9f2ad9dcd5c2e651aa030eb01babc196c,2019-12-05 14:05:00 UTC,heat,auto,657,667,667,ME,Yarmouth,99,True,False,False,Gas
3,54c88af9f2ad9dcd5c2e651aa030eb01babc196c,2019-12-06 18:25:00 UTC,heat,auto,687,696,696,ME,Yarmouth,99,True,False,False,Gas
4,c491ad48441073aa08a818dbfe01ac8ad2a83838,2019-12-04 14:15:00 UTC,heat,auto,746,780,750,ME,South Portland,75,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79664,38618e0152941e18a31c699f121721d267397df7,2019-12-11 19:10:00 UTC,heat,auto,658,660,660,ME,Old Town,50,False,False,False,Gas
79665,38618e0152941e18a31c699f121721d267397df7,2019-12-01 10:10:00 UTC,heat,auto,653,660,660,ME,Old Town,50,False,False,False,Gas
79666,38618e0152941e18a31c699f121721d267397df7,2019-12-12 15:25:00 UTC,heat,auto,660,660,660,ME,Old Town,50,False,False,False,Gas
79667,38618e0152941e18a31c699f121721d267397df7,2019-12-15 11:35:00 UTC,heat,hold,663,660,660,ME,Old Town,50,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/ME/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/ME-day/2020-dec-day-ME.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,db15333c03d900e5068f914b4bbe0929609c4884,2020-12-26 19:20:00 UTC,heat,hold,714,712,712,ME,Portland,15,False,False,False,Gas
1,02a6a361c70d1052cbb1d7d80490d8326ae8e684,2020-12-06 17:00:00 UTC,heat,auto,619,730,675,ME,Naples,15,False,False,False,Gas
2,02a6a361c70d1052cbb1d7d80490d8326ae8e684,2020-12-08 14:50:00 UTC,heat,auto,604,715,665,ME,Naples,15,False,False,False,Gas
3,02a6a361c70d1052cbb1d7d80490d8326ae8e684,2020-12-06 17:30:00 UTC,heat,auto,622,730,675,ME,Naples,15,False,False,False,Gas
4,db15333c03d900e5068f914b4bbe0929609c4884,2020-12-17 15:15:00 UTC,heat,hold,693,702,702,ME,Portland,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57825,d20ccf5b4cc587cf0271ef0e24b9906469f17d26,2020-12-01 12:00:00 UTC,heat,hold,737,740,740,ME,Winthrop,0,False,False,False,Gas
57826,d20ccf5b4cc587cf0271ef0e24b9906469f17d26,2020-12-01 12:25:00 UTC,heat,hold,740,740,740,ME,Winthrop,0,False,False,False,Gas
57827,d20ccf5b4cc587cf0271ef0e24b9906469f17d26,2020-12-01 13:55:00 UTC,heat,hold,744,740,740,ME,Winthrop,0,False,False,False,Gas
57828,d20ccf5b4cc587cf0271ef0e24b9906469f17d26,2020-12-01 18:25:00 UTC,heat,hold,736,740,740,ME,Winthrop,0,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/ME/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/ME/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stacMEoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
ME_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/ME/dec/" + file)
    ME_dec = pd.concat([ME_dec, df])
    
ME_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,02a6a361c70d1052cbb1d7d80490d8326ae8e684,dec,2017,heat,hold,Naples,646.778226,665.887097,663.625000,15.0,False,False,False
1,06322bf1e7bd15eb760e1cc07c01525c4fd679bc,dec,2017,heat,auto,Harpswell,691.647059,673.333333,673.333333,5.0,False,False,False
2,06322bf1e7bd15eb760e1cc07c01525c4fd679bc,dec,2017,heat,hold,Harpswell,685.994493,672.407553,672.407553,5.0,False,False,False
3,071d40a2ebc85623a1b5da94a1ee4af028115825,dec,2017,heat,hold,North Yarmouth,693.833333,680.000000,680.000000,25.0,False,False,False
4,0adbc6ab910c02e6c98c5e3b698aa65408f25408,dec,2017,heat,auto,Old Orchard,642.173203,650.000000,643.333333,97.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
79,e4e047a743392addccea6117b9d1731b70406478,dec,2020,heat,auto,Fryeburg,616.952381,650.000000,640.000000,7.0,False,False,False
80,e4e047a743392addccea6117b9d1731b70406478,dec,2020,heat,hold,Fryeburg,649.346154,651.115385,639.307692,7.0,False,False,False
81,ff455f6c827dd3c854c66a6f09c6ba0dc0bc7d7b,dec,2020,heat,auto,Portland,692.553191,690.000000,689.829787,15.0,False,False,False
82,ff455f6c827dd3c854c66a6f09c6ba0dc0bc7d7b,dec,2020,heat,hold,Portland,678.586957,680.869565,680.086957,15.0,False,False,False


In [187]:
ME_dec.to_csv("Scraper_Output/State_Month_Day/ME/ME_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/ME/") if f.endswith(".csv")]

# files

In [189]:
# https://stacMEoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
ME_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/ME/" + file)
    ME_all = pd.concat([ME_all, df])
    
ME_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,a86f074cd58d2ce47d145a5ddf4492c1acca3635,aug,2017,heat,auto,South Portland,727.166667,780.000000,640.000000,85.0,False,False,False
1,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,aug,2017,heat,auto,Greenville,719.458333,730.416667,729.333333,35.0,False,False,False
2,aeb6a66ea9334f71ba2b04f324a9a0cb61776743,aug,2017,heat,hold,Greenville,709.020612,656.490027,642.978723,35.0,False,False,False
3,c5bc7c9568931d0b8979393514c99d545786ae72,aug,2017,heat,hold,Lewiston,713.200000,750.000000,750.000000,90.0,False,False,False
4,db15333c03d900e5068f914b4bbe0929609c4884,aug,2017,cool,auto,Portland,709.057018,710.000000,650.000000,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,d103d7c927baf49b25a1ed7d30e0e0c795359a45,jun,2021,heat,hold,Sanford,692.378855,759.000000,629.000000,0.0,True,False,False
171,d49970b6290cf340b7bc1b1270843d53fdc2d1b9,jun,2021,heat,hold,Windham,729.262500,730.350000,730.350000,20.0,False,False,False
172,db15333c03d900e5068f914b4bbe0929609c4884,jun,2021,cool,hold,Portland,734.126214,748.524272,748.524272,15.0,False,False,False
173,def0ec3e5e8faffc7af3af52828081d48494a02d,jun,2021,heat,hold,Presque Isle,683.437500,686.020833,686.020833,10.0,False,False,False


In [190]:
ME_all.to_csv("Scraper_Output/State_Month_Day/ME_all_day.csv", header=True, index=False)

In [191]:
# DatachecME to maMEe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['ME']
Unique jan_2018: ['ME']
Unique jan_2019: ['ME']
Unique jan_2020: ['ME']
Unique jan_2021: ['ME']
Unique feb_2017: ['ME']
Unique feb_2018: ['ME']
Unique feb_2019: ['ME']
Unique feb_2020: ['ME']
Unique feb_2021: ['ME']
Unique jun_2017: ['ME']
Unique jun_2018: ['ME']
Unique jun_2019: ['ME']
Unique jun_2020: ['ME']
Unique jun_2021: ['ME']
Unique jul_2017: ['ME']
Unique jul_2018: ['ME']
Unique jul_2019: ['ME']
Unique jul_2020: ['ME']
Unique jul_2021: ['ME']
Unique aug_2017: ['ME']
Unique aug_2018: ['ME']
Unique aug_2019: ['ME']
Unique aug_2020: ['ME']
Unique dec_2017: ['ME']
Unique dec_2018: ['ME']
Unique dec_2019: ['ME']
Unique dec_2020: ['ME']
