# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/MT-day/2017-jan-day-MT.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5d1ff376e3fc7ee713a1138cbb661ddad9b1e257,2017-01-06T17:40:00Z,heat,hold,666,674,674,MT,Bozeman,15,True,False,False,Gas
1,23d834c190c3686c02c030ee0bf42003872bd591,2017-01-17T18:30:00Z,heat,hold,696,724,694,MT,Bozeman,25,False,False,False,Gas
2,5d1ff376e3fc7ee713a1138cbb661ddad9b1e257,2017-01-08T17:15:00Z,heat,hold,642,663,641,MT,Bozeman,15,True,False,False,Gas
3,5d1ff376e3fc7ee713a1138cbb661ddad9b1e257,2017-01-23T18:40:00Z,heat,hold,655,708,644,MT,Bozeman,15,True,False,False,Gas
4,29f5966d7808882d6c76a0a2d7091c18b3587704,2017-01-20T18:10:00Z,heat,hold,740,745,745,MT,Bozeman,9,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7920,a446833ed31f9039f78e43743749d949ab71cfe0,2017-01-08T14:20:00Z,auto,hold,652,750,700,MT,Fort Benton,16,False,False,False,Gas
7921,a446833ed31f9039f78e43743749d949ab71cfe0,2017-01-06T13:55:00Z,auto,hold,657,750,700,MT,Fort Benton,16,False,False,False,Gas
7922,a446833ed31f9039f78e43743749d949ab71cfe0,2017-01-06T13:50:00Z,auto,hold,661,750,700,MT,Fort Benton,16,False,False,False,Gas
7923,a446833ed31f9039f78e43743749d949ab71cfe0,2017-01-08T13:30:00Z,auto,hold,651,750,700,MT,Fort Benton,16,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
23d834c190c3686c02c030ee0bf42003872bd591,Jan,2017,heat,hold,Bozeman,692.737226,698.912409,697.160584,25.0,False,False,False
29f5966d7808882d6c76a0a2d7091c18b3587704,Jan,2017,heat,auto,Bozeman,690.696629,693.11236,692.719101,9.0,True,False,False
29f5966d7808882d6c76a0a2d7091c18b3587704,Jan,2017,heat,hold,Bozeman,733.701031,737.85567,737.845361,9.0,True,False,False
3e35e2d37f16eb3b72b2cbb145e6ed964ce01ca4,Jan,2017,auto,auto,Great Falls,719.717391,783.188406,720.0,45.0,False,False,False
3e35e2d37f16eb3b72b2cbb145e6ed964ce01ca4,Jan,2017,auto,hold,Great Falls,724.25,780.681818,716.590909,45.0,False,False,False
40f358a2efb733ab89fe3bb6e2f7b0280ccb4917,Jan,2017,heat,hold,Billings,686.144385,690.454545,689.877005,70.0,False,False,False
5d1ff376e3fc7ee713a1138cbb661ddad9b1e257,Jan,2017,heat,auto,Bozeman,593.0,731.75,633.75,15.0,True,False,False
5d1ff376e3fc7ee713a1138cbb661ddad9b1e257,Jan,2017,heat,hold,Bozeman,657.972222,665.787037,657.564815,15.0,True,False,False
7258db280a1da00d548cd8ea30f21c7d7d4932e0,Jan,2017,heat,auto,Bozeman,655.606061,660.0,660.0,16.0,True,False,False
7258db280a1da00d548cd8ea30f21c7d7d4932e0,Jan,2017,heat,hold,Bozeman,666.013245,668.463576,668.430464,16.0,True,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/MT/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/MT-day/2018-jan-day-MT.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,666c08e73f2bee116322fafbf2fc1b2972422b81,2018-01-20T15:55:00Z,heat,hold,672,737,623,MT,Bozeman,7,False,False,False,Gas
1,0728208cff5242993896da88a882e076094b765f,2018-01-10T13:15:00Z,heat,auto,634,745,678,MT,Billings,50,False,False,False,Gas
2,037988cc24eaeaa08f7002dbc926d02c8f9c0f1c,2018-01-30T15:05:00Z,heat,hold,595,666,666,MT,Eureka,10,True,False,True,Electric
3,29f5966d7808882d6c76a0a2d7091c18b3587704,2018-01-15T19:45:00Z,auto,hold,727,805,745,MT,Bozeman,9,True,False,False,Gas
4,29f5966d7808882d6c76a0a2d7091c18b3587704,2018-01-11T18:20:00Z,auto,hold,734,785,735,MT,Bozeman,9,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16236,e4ed8ab8303e5e1de4b181ce40545279719e6069,2018-01-09T16:20:00Z,heat,hold,656,650,650,MT,Bozeman,10,False,False,False,Gas
16237,e4ed8ab8303e5e1de4b181ce40545279719e6069,2018-01-13T13:55:00Z,heat,hold,654,650,650,MT,Bozeman,10,False,False,False,Gas
16238,e4ed8ab8303e5e1de4b181ce40545279719e6069,2018-01-02T19:50:00Z,heat,hold,680,650,650,MT,Bozeman,10,False,False,False,Gas
16239,e4ed8ab8303e5e1de4b181ce40545279719e6069,2018-01-08T19:30:00Z,heat,hold,657,650,650,MT,Bozeman,10,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/MT/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/MT-day/2019-jan-day-MT.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,23d834c190c3686c02c030ee0bf42003872bd591,2019-01-05 18:05:00 UTC,heat,hold,706,775,661,MT,Bozeman,25,False,False,False,Gas
1,34fbbdfdbc62b907f2cb4f26559df29b230273e3,2019-01-02 16:25:00 UTC,heat,hold,687,695,684,MT,Missoula,25,False,False,False,Gas
2,34fbbdfdbc62b907f2cb4f26559df29b230273e3,2019-01-03 14:05:00 UTC,heat,hold,679,687,687,MT,Missoula,25,False,False,False,Gas
3,34fbbdfdbc62b907f2cb4f26559df29b230273e3,2019-01-03 13:45:00 UTC,heat,hold,683,687,687,MT,Missoula,25,False,False,False,Gas
4,34fbbdfdbc62b907f2cb4f26559df29b230273e3,2019-01-30 15:55:00 UTC,heat,hold,684,691,688,MT,Missoula,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33951,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2019-01-24 14:15:00 UTC,heat,hold,758,760,760,MT,Great Falls,70,False,False,False,Gas
33952,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2019-01-14 18:55:00 UTC,heat,hold,747,760,760,MT,Great Falls,70,False,False,False,Gas
33953,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2019-01-13 14:20:00 UTC,heat,hold,755,760,760,MT,Great Falls,70,False,False,False,Gas
33954,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2019-01-17 15:05:00 UTC,heat,hold,758,760,760,MT,Great Falls,70,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/MT/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/MT-day/2020-jan-day-MT.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b9a976f581bd8ee81013df76fbd5b52f926136dc,2020-01-11 18:15:00 UTC,heat,hold,700,748,677,MT,Belgrade,0,False,False,False,Gas
1,9539afa609d7ffff710c3308be33b6a4a4bbd8a8,2020-01-07 17:55:00 UTC,auto,hold,663,739,629,MT,Missoula,9,False,False,False,Gas
2,23d834c190c3686c02c030ee0bf42003872bd591,2020-01-15 19:50:00 UTC,heat,hold,694,776,662,MT,Bozeman,25,False,False,False,Gas
3,7e82f7bb0ad060bdb1e139bed8aa5d5f869cf054,2020-01-10 13:55:00 UTC,heat,hold,665,712,663,MT,East Helena,40,False,False,False,Gas
4,a6b5547e34405b7e11d145b6b81f37e2850b7cd9,2020-01-16 14:15:00 UTC,heat,auto,689,697,687,MT,Missoula,70,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41387,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2020-01-20 17:30:00 UTC,heat,hold,753,760,760,MT,Great Falls,70,False,False,False,Gas
41388,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2020-01-03 17:45:00 UTC,heat,hold,753,760,760,MT,Great Falls,70,False,False,False,Gas
41389,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2020-01-02 18:25:00 UTC,heat,hold,760,760,760,MT,Great Falls,70,False,False,False,Gas
41390,99fa8d150f6b0ed05de548014a2751d573270a6b,2020-01-06 14:40:00 UTC,heat,auto,670,760,760,MT,Corvallis,10,True,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/MT/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/MT-day/2021-jan-day-MT.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ca4dd6ff45035f8730f89c3b3ac556556fa37fef,2021-01-31T19:45:00Z,heat,hold,657,650,640,MT,Livingston,15,False,False,False,Gas
1,ca4dd6ff45035f8730f89c3b3ac556556fa37fef,2021-01-31T18:25:00Z,heat,hold,661,650,640,MT,Livingston,15,False,False,False,Gas
2,ca4dd6ff45035f8730f89c3b3ac556556fa37fef,2021-01-31T19:20:00Z,heat,hold,658,650,640,MT,Livingston,15,False,False,False,Gas
3,3749c1cf564746f9904314c5d33b96db515c066b,2021-01-28T07:30:00Z,heat,hold,640,650,640,MT,Butte,118,False,False,False,Gas
4,ca4dd6ff45035f8730f89c3b3ac556556fa37fef,2021-01-31T18:45:00Z,heat,hold,660,650,640,MT,Livingston,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26929,b9a68c99fa994007bfbe7dfc018ec73539555478,2021-01-20T14:55:00Z,heat,hold,695,700,700,MT,Billings,60,False,False,False,Gas
26930,b9a68c99fa994007bfbe7dfc018ec73539555478,2021-01-20T14:45:00Z,heat,hold,698,700,700,MT,Billings,60,False,False,False,Gas
26931,b9a68c99fa994007bfbe7dfc018ec73539555478,2021-01-26T19:15:00Z,heat,hold,697,700,700,MT,Billings,60,False,False,False,Gas
26932,b9a68c99fa994007bfbe7dfc018ec73539555478,2021-01-26T19:00:00Z,heat,hold,689,700,700,MT,Billings,60,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/MT/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MT/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stacMToverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MT_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MT/jan/" + file)
    MT_jan = pd.concat([MT_jan, df])
    
MT_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,23d834c190c3686c02c030ee0bf42003872bd591,Jan,2017,heat,hold,Bozeman,692.737226,698.912409,697.160584,25.0,False,False,False
1,29f5966d7808882d6c76a0a2d7091c18b3587704,Jan,2017,heat,auto,Bozeman,690.696629,693.112360,692.719101,9.0,True,False,False
2,29f5966d7808882d6c76a0a2d7091c18b3587704,Jan,2017,heat,hold,Bozeman,733.701031,737.855670,737.845361,9.0,True,False,False
3,3e35e2d37f16eb3b72b2cbb145e6ed964ce01ca4,Jan,2017,auto,auto,Great Falls,719.717391,783.188406,720.000000,45.0,False,False,False
4,3e35e2d37f16eb3b72b2cbb145e6ed964ce01ca4,Jan,2017,auto,hold,Great Falls,724.250000,780.681818,716.590909,45.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
40,f404081c89442a6b4699e93e43dc4bb72f9e2dc1,Jan,2021,auto,hold,Great Falls,695.554745,747.761861,697.761861,55.0,False,False,False
41,f469a99acb054dbddd08e713d682acf5035c42df,Jan,2021,heat,hold,Helena,678.380952,680.000000,680.000000,49.0,False,False,False
42,f5101dfe9a0543a0dd3d8b1adbec2dd33f3047a8,Jan,2021,heat,hold,Bozeman,692.170213,699.297872,698.765957,90.0,False,False,False
43,fd93662943beeb4603fd63aacbbbb7abb3183470,Jan,2021,heat,hold,NASHUA,695.613636,674.136364,672.954545,30.0,False,False,False


In [34]:
MT_jan.to_csv("Scraper_Output/State_Month_Day/MT/MT_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/MT-day/2017-feb-day-MT.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,2017-02-28T15:20:00Z,heat,auto,699,706,694,MT,Billings,55,False,False,False,Gas
1,5d1ff376e3fc7ee713a1138cbb661ddad9b1e257,2017-02-27T18:20:00Z,heat,auto,619,741,635,MT,Bozeman,15,True,False,False,Gas
2,29f5966d7808882d6c76a0a2d7091c18b3587704,2017-02-02T14:20:00Z,heat,hold,731,735,735,MT,Bozeman,9,True,False,False,Gas
3,29f5966d7808882d6c76a0a2d7091c18b3587704,2017-02-02T15:55:00Z,heat,hold,734,735,735,MT,Bozeman,9,True,False,False,Gas
4,29f5966d7808882d6c76a0a2d7091c18b3587704,2017-02-06T18:35:00Z,heat,hold,714,715,715,MT,Bozeman,9,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7222,29f5966d7808882d6c76a0a2d7091c18b3587704,2017-02-08T19:20:00Z,heat,hold,735,740,740,MT,Bozeman,9,True,False,False,Gas
7223,29f5966d7808882d6c76a0a2d7091c18b3587704,2017-02-08T19:25:00Z,heat,hold,740,740,740,MT,Bozeman,9,True,False,False,Gas
7224,29f5966d7808882d6c76a0a2d7091c18b3587704,2017-02-08T19:15:00Z,heat,hold,731,740,740,MT,Bozeman,9,True,False,False,Gas
7225,29f5966d7808882d6c76a0a2d7091c18b3587704,2017-02-08T19:50:00Z,heat,hold,735,740,740,MT,Bozeman,9,True,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/MT/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/MT-day/2018-feb-day-MT.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a9250271635d87fc0b19dbef1fb1508e9a6f5630,2018-02-13T14:35:00Z,heat,hold,626,790,790,MT,Butte,100,False,False,False,Gas
1,a9250271635d87fc0b19dbef1fb1508e9a6f5630,2018-02-13T14:20:00Z,heat,hold,626,790,790,MT,Butte,100,False,False,False,Gas
2,a9250271635d87fc0b19dbef1fb1508e9a6f5630,2018-02-13T13:45:00Z,heat,hold,616,790,790,MT,Butte,100,False,False,False,Gas
4,5dda6449bb5575fdf2a538f8208a3556b55c447b,2018-02-15T13:55:00Z,heat,hold,682,686,686,MT,Helena,10,True,False,False,Gas
5,a9250271635d87fc0b19dbef1fb1508e9a6f5630,2018-02-13T13:30:00Z,heat,hold,609,790,790,MT,Butte,100,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17624,29f5966d7808882d6c76a0a2d7091c18b3587704,2018-02-05T19:55:00Z,heat,hold,751,750,750,MT,Bozeman,9,True,False,False,Gas
17625,29f5966d7808882d6c76a0a2d7091c18b3587704,2018-02-19T19:20:00Z,heat,hold,730,750,750,MT,Bozeman,9,True,False,False,Gas
17626,29f5966d7808882d6c76a0a2d7091c18b3587704,2018-02-19T19:40:00Z,heat,hold,749,750,750,MT,Bozeman,9,True,False,False,Gas
17627,29f5966d7808882d6c76a0a2d7091c18b3587704,2018-02-19T19:45:00Z,heat,hold,748,750,750,MT,Bozeman,9,True,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/MT/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/MT-day/2019-feb-day-MT.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ca4dd6ff45035f8730f89c3b3ac556556fa37fef,2019-02-26T13:55:00Z,heat,auto,633,743,607,MT,Livingston,15,False,False,False,Gas
1,2013b05eece28f2e08b8cd6da888a9b04dbdc854,2019-02-05T13:05:00Z,heat,hold,670,706,676,MT,Missoula,30,True,False,False,Gas
2,34fbbdfdbc62b907f2cb4f26559df29b230273e3,2019-02-01T14:00:00Z,heat,hold,672,677,677,MT,Missoula,25,False,False,False,Gas
3,23d834c190c3686c02c030ee0bf42003872bd591,2019-02-01T19:20:00Z,heat,hold,681,685,679,MT,Bozeman,25,False,False,False,Gas
4,2013b05eece28f2e08b8cd6da888a9b04dbdc854,2019-02-27T13:20:00Z,heat,hold,682,718,687,MT,Missoula,30,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23787,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2019-02-17T15:10:00Z,heat,hold,757,760,760,MT,Great Falls,70,False,False,False,Gas
23788,e4ed8ab8303e5e1de4b181ce40545279719e6069,2019-02-16T14:05:00Z,heat,hold,761,760,760,MT,Bozeman,10,False,False,False,Gas
23789,e4ed8ab8303e5e1de4b181ce40545279719e6069,2019-02-16T14:00:00Z,heat,hold,744,760,760,MT,Bozeman,10,False,False,False,Gas
23790,e4ed8ab8303e5e1de4b181ce40545279719e6069,2019-02-16T13:55:00Z,heat,hold,733,760,760,MT,Bozeman,10,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/MT/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/MT-day/2020-feb-day-MT.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ca4dd6ff45035f8730f89c3b3ac556556fa37fef,2020-02-23 19:55:00 UTC,heat,auto,701,784,609,MT,Livingston,15,False,False,False,Gas
1,9539afa609d7ffff710c3308be33b6a4a4bbd8a8,2020-02-11 19:30:00 UTC,auto,hold,695,774,724,MT,Missoula,9,False,False,False,Gas
2,ca4dd6ff45035f8730f89c3b3ac556556fa37fef,2020-02-02 19:55:00 UTC,heat,auto,697,742,654,MT,Livingston,15,False,False,False,Gas
3,a6b5547e34405b7e11d145b6b81f37e2850b7cd9,2020-02-11 13:30:00 UTC,heat,hold,684,694,688,MT,Missoula,70,False,False,False,Gas
4,666c08e73f2bee116322fafbf2fc1b2972422b81,2020-02-02 15:55:00 UTC,auto,hold,705,777,701,MT,Bozeman,7,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35147,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2020-02-07 14:10:00 UTC,heat,hold,751,760,760,MT,Great Falls,70,False,False,False,Gas
35148,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2020-02-05 19:20:00 UTC,heat,hold,751,760,760,MT,Great Falls,70,False,False,False,Gas
35149,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2020-02-25 16:40:00 UTC,heat,hold,758,760,760,MT,Great Falls,70,False,False,False,Gas
35150,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2020-02-28 18:40:00 UTC,heat,hold,758,760,760,MT,Great Falls,70,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/MT/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/MT-day/2021-feb-day-MT.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,2021-02-10T15:55:00Z,heat,hold,698,709,709,MT,Billings,55,False,False,False,Gas
1,23d834c190c3686c02c030ee0bf42003872bd591,2021-02-12T19:05:00Z,heat,hold,699,790,790,MT,Bozeman,25,False,False,False,Gas
2,3749c1cf564746f9904314c5d33b96db515c066b,2021-02-23T07:50:00Z,heat,hold,640,650,638,MT,Butte,118,False,False,False,Gas
3,34fbbdfdbc62b907f2cb4f26559df29b230273e3,2021-02-22T17:20:00Z,heat,hold,696,702,699,MT,Missoula,25,False,False,False,Gas
4,666c08e73f2bee116322fafbf2fc1b2972422b81,2021-02-17T14:55:00Z,auto,hold,686,727,682,MT,Bozeman,7,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28765,b9a68c99fa994007bfbe7dfc018ec73539555478,2021-02-17T16:35:00Z,heat,hold,698,700,700,MT,Billings,60,False,False,False,Gas
28766,b9a68c99fa994007bfbe7dfc018ec73539555478,2021-02-20T15:55:00Z,heat,hold,695,700,700,MT,Billings,60,False,False,False,Gas
28767,b9a68c99fa994007bfbe7dfc018ec73539555478,2021-02-19T19:55:00Z,heat,hold,696,700,700,MT,Billings,60,False,False,False,Gas
28768,b9a68c99fa994007bfbe7dfc018ec73539555478,2021-02-10T16:05:00Z,heat,hold,699,700,700,MT,Billings,60,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/MT/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MT/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stacMToverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MT_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MT/feb/" + file)
    MT_feb = pd.concat([MT_feb, df])
    
MT_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,23d834c190c3686c02c030ee0bf42003872bd591,feb,2017,heat,hold,Bozeman,694.828571,704.571429,704.571429,25.0,False,False,False
1,29f5966d7808882d6c76a0a2d7091c18b3587704,feb,2017,heat,auto,Bozeman,708.055556,711.055556,710.444444,9.0,True,False,False
2,29f5966d7808882d6c76a0a2d7091c18b3587704,feb,2017,heat,hold,Bozeman,726.107895,724.047368,724.047368,9.0,True,False,False
3,3e35e2d37f16eb3b72b2cbb145e6ed964ce01ca4,feb,2017,auto,auto,Great Falls,699.500000,780.000000,720.000000,45.0,False,False,False
4,3e35e2d37f16eb3b72b2cbb145e6ed964ce01ca4,feb,2017,auto,hold,Great Falls,720.469880,780.000000,703.012048,45.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
42,f404081c89442a6b4699e93e43dc4bb72f9e2dc1,feb,2021,auto,hold,Great Falls,691.392548,747.196707,697.194107,55.0,False,False,False
43,f469a99acb054dbddd08e713d682acf5035c42df,feb,2021,heat,hold,Helena,594.479167,720.000000,720.000000,49.0,False,False,False
44,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,feb,2021,heat,hold,Billings,698.000000,709.000000,709.000000,55.0,False,False,False
45,fd93662943beeb4603fd63aacbbbb7abb3183470,feb,2021,heat,hold,NASHUA,695.920000,700.680000,699.760000,30.0,False,False,False


In [67]:
MT_feb.to_csv("Scraper_Output/State_Month_Day/MT/MT_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/MT-day/2017-jun-day-MT.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c9a27b6d42a3b1e3d96263a706132ded7796fb6f,2017-06-25T19:35:00Z,auto,hold,720,720,640,MT,Great Falls,50,False,False,False,Gas
1,c9a27b6d42a3b1e3d96263a706132ded7796fb6f,2017-06-25T18:30:00Z,auto,hold,733,720,640,MT,Great Falls,50,False,False,False,Gas
2,c9a27b6d42a3b1e3d96263a706132ded7796fb6f,2017-06-25T18:35:00Z,auto,hold,732,720,640,MT,Great Falls,50,False,False,False,Gas
3,c9a27b6d42a3b1e3d96263a706132ded7796fb6f,2017-06-25T19:00:00Z,auto,hold,718,745,640,MT,Great Falls,50,False,False,False,Gas
4,c9a27b6d42a3b1e3d96263a706132ded7796fb6f,2017-06-25T19:05:00Z,auto,hold,717,724,640,MT,Great Falls,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12565,0728208cff5242993896da88a882e076094b765f,2017-06-01T12:35:00Z,cool,auto,691,740,680,MT,Billings,50,False,False,False,Gas
12566,0728208cff5242993896da88a882e076094b765f,2017-06-01T12:45:00Z,cool,auto,691,740,680,MT,Billings,50,False,False,False,Gas
12567,0728208cff5242993896da88a882e076094b765f,2017-06-01T12:20:00Z,cool,auto,691,740,680,MT,Billings,50,False,False,False,Gas
12568,0728208cff5242993896da88a882e076094b765f,2017-06-01T12:30:00Z,cool,auto,691,740,680,MT,Billings,50,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/MT/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/MT-day/2018-jun-day-MT.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1e4188c276ca852a75feef8c5e3fc27e1f409c4d,2018-06-24T18:45:00Z,cool,auto,697,700,662,MT,Helena,5,False,False,False,Gas
1,1e4188c276ca852a75feef8c5e3fc27e1f409c4d,2018-06-23T19:20:00Z,cool,auto,703,700,662,MT,Helena,5,False,False,False,Gas
2,1e4188c276ca852a75feef8c5e3fc27e1f409c4d,2018-06-23T13:00:00Z,cool,auto,703,700,662,MT,Helena,5,False,False,False,Gas
3,1e4188c276ca852a75feef8c5e3fc27e1f409c4d,2018-06-23T14:40:00Z,cool,auto,702,700,662,MT,Helena,5,False,False,False,Gas
4,1e4188c276ca852a75feef8c5e3fc27e1f409c4d,2018-06-23T15:15:00Z,cool,auto,703,700,662,MT,Helena,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23500,b8a5d9a9f81bb0725b32a07d2ff69b1aba4bff04,2018-06-26T16:50:00Z,cool,hold,740,750,750,MT,Billings,37,False,False,False,Gas
23501,b8a5d9a9f81bb0725b32a07d2ff69b1aba4bff04,2018-06-25T17:15:00Z,cool,hold,720,750,750,MT,Billings,37,False,False,False,Gas
23502,b8a5d9a9f81bb0725b32a07d2ff69b1aba4bff04,2018-06-27T14:00:00Z,cool,hold,719,750,750,MT,Billings,37,False,False,False,Gas
23503,b8a5d9a9f81bb0725b32a07d2ff69b1aba4bff04,2018-06-27T16:20:00Z,cool,hold,721,750,750,MT,Billings,37,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/MT/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/MT-day/2019-jun-day-MT.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7352a85776176198f0abb065b891160881b30270,2019-06-02 15:05:00 UTC,cool,hold,659,727,677,MT,Bozeman,15,False,False,False,Gas
1,7352a85776176198f0abb065b891160881b30270,2019-06-02 19:40:00 UTC,cool,hold,690,727,677,MT,Bozeman,15,False,False,False,Gas
2,7352a85776176198f0abb065b891160881b30270,2019-06-02 16:10:00 UTC,cool,hold,669,727,677,MT,Bozeman,15,False,False,False,Gas
3,7352a85776176198f0abb065b891160881b30270,2019-06-02 16:20:00 UTC,cool,hold,673,727,677,MT,Bozeman,15,False,False,False,Gas
4,7352a85776176198f0abb065b891160881b30270,2019-06-02 18:05:00 UTC,cool,hold,680,727,677,MT,Bozeman,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44100,1fe82232930e26efc50ba4da526c354676d03dec,2019-06-08 18:30:00 UTC,auto,auto,682,750,680,MT,Helena,38,False,False,False,Gas
44101,1fe82232930e26efc50ba4da526c354676d03dec,2019-06-09 16:20:00 UTC,auto,auto,683,750,680,MT,Helena,38,False,False,False,Gas
44102,1fe82232930e26efc50ba4da526c354676d03dec,2019-06-09 18:40:00 UTC,auto,auto,684,750,680,MT,Helena,38,False,False,False,Gas
44103,77c344f9bb93b933ebc934a7ebba997d720726d5,2019-06-27 17:05:00 UTC,heat,auto,681,680,680,MT,Butte,90,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/MT/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/MT-day/2020-jun-day-MT.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2f91f344abd5823f11abdfa57e08bb514aa09564,2020-06-26 18:55:00 UTC,cool,hold,723,701,701,MT,Billings,50,True,False,False,Gas
1,e0715a2e7cf7287698dc580b2f479f4eaf35e0ea,2020-06-02 12:00:00 UTC,auto,hold,702,762,712,MT,Kalispell,40,False,False,False,Gas
2,77c344f9bb93b933ebc934a7ebba997d720726d5,2020-06-09 19:25:00 UTC,heat,hold,660,732,642,MT,Butte,90,False,False,False,Gas
3,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,2020-06-13 15:05:00 UTC,auto,hold,717,688,638,MT,Billings,55,False,False,False,Gas
4,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,2020-06-13 16:40:00 UTC,auto,hold,702,688,638,MT,Billings,55,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49047,b7a28b95c2726be560baf3656422ba13edc9398a,2020-06-06 18:10:00 UTC,auto,hold,729,750,700,MT,Kalispell,5,False,False,False,Gas
49048,b7a28b95c2726be560baf3656422ba13edc9398a,2020-06-10 12:30:00 UTC,auto,hold,695,750,700,MT,Kalispell,5,False,False,False,Gas
49049,b7a28b95c2726be560baf3656422ba13edc9398a,2020-06-11 13:25:00 UTC,auto,hold,719,750,700,MT,Kalispell,5,False,False,False,Gas
49050,b7a28b95c2726be560baf3656422ba13edc9398a,2020-06-07 16:55:00 UTC,auto,hold,702,750,700,MT,Kalispell,5,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/MT/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/MT-day/2021-jun-day-MT.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5630eed158cf32dc803ac57a6956f5b6e46d8095,2021-06-24 15:40:00 UTC,heat,hold,696,719,679,MT,Bozeman,100,False,False,False,Gas
1,2f91f344abd5823f11abdfa57e08bb514aa09564,2021-06-25 18:25:00 UTC,cool,hold,723,730,721,MT,Billings,50,True,False,False,Gas
2,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,2021-06-05 14:55:00 UTC,auto,hold,685,669,619,MT,Billings,55,False,False,False,Gas
3,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,2021-06-04 16:15:00 UTC,auto,hold,663,663,613,MT,Billings,55,False,False,False,Gas
4,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,2021-06-04 16:30:00 UTC,auto,hold,668,663,613,MT,Billings,55,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25881,4cd54fe9a866c28682055020800c8f0b5af25df5,2021-06-26 11:40:00 UTC,cool,hold,713,760,760,MT,Billings,9,True,False,False,Gas
25882,4cd54fe9a866c28682055020800c8f0b5af25df5,2021-06-08 18:30:00 UTC,cool,hold,701,760,760,MT,Billings,9,True,False,False,Gas
25883,4cd54fe9a866c28682055020800c8f0b5af25df5,2021-06-25 17:25:00 UTC,cool,hold,711,760,760,MT,Billings,9,True,False,False,Gas
25884,4cd54fe9a866c28682055020800c8f0b5af25df5,2021-06-10 17:45:00 UTC,cool,hold,726,760,760,MT,Billings,9,True,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/MT/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MT/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stacMToverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MT_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MT/jun/" + file)
    MT_jun = pd.concat([MT_jun, df])
    
MT_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0728208cff5242993896da88a882e076094b765f,jun,2017,cool,auto,Billings,690.833333,740.583333,680.000000,50.0,False,False,False
1,0bc51b95b2dc23713f611e182c689ad03dc2c300,jun,2017,cool,auto,Great Falls,698.791667,710.000000,695.000000,10.0,False,False,False
2,0bc51b95b2dc23713f611e182c689ad03dc2c300,jun,2017,cool,hold,Great Falls,693.491667,701.595833,701.595833,10.0,False,False,False
3,2323dbdb6769370962e7858668fdc12bc0eaefcf,jun,2017,heat,hold,Deer Lodge,670.834101,650.041475,650.009217,55.0,False,False,False
4,29f5966d7808882d6c76a0a2d7091c18b3587704,jun,2017,auto,auto,Bozeman,692.416667,745.000000,695.000000,9.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
45,f404081c89442a6b4699e93e43dc4bb72f9e2dc1,jun,2021,auto,hold,Great Falls,721.327862,730.421420,680.421420,55.0,False,False,False
46,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,jun,2021,auto,hold,Billings,679.107143,667.160714,617.160714,55.0,False,False,False
47,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,jun,2021,cool,hold,Billings,664.848837,655.116279,654.720930,55.0,False,False,False
48,fd93662943beeb4603fd63aacbbbb7abb3183470,jun,2021,cool,hold,NASHUA,745.318584,741.955752,740.814159,30.0,False,False,False


In [100]:
MT_jun.to_csv("Scraper_Output/State_Month_Day/MT/MT_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/MT-day/2017-jul-day-MT.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3e35e2d37f16eb3b72b2cbb145e6ed964ce01ca4,2017-07-13T19:55:00Z,auto,hold,746,730,680,MT,Great Falls,45,False,False,False,Gas
1,3e35e2d37f16eb3b72b2cbb145e6ed964ce01ca4,2017-07-13T19:50:00Z,auto,hold,746,730,680,MT,Great Falls,45,False,False,False,Gas
2,3e35e2d37f16eb3b72b2cbb145e6ed964ce01ca4,2017-07-15T17:25:00Z,auto,hold,740,730,680,MT,Great Falls,45,False,False,False,Gas
3,3e35e2d37f16eb3b72b2cbb145e6ed964ce01ca4,2017-07-15T18:50:00Z,auto,hold,758,730,680,MT,Great Falls,45,False,False,False,Gas
4,3e35e2d37f16eb3b72b2cbb145e6ed964ce01ca4,2017-07-15T16:15:00Z,auto,hold,741,730,680,MT,Great Falls,45,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14685,c9a27b6d42a3b1e3d96263a706132ded7796fb6f,2017-07-06T18:25:00Z,auto,hold,774,740,680,MT,Great Falls,50,False,False,False,Gas
14686,bbdf7f0987b78887505d5f3fa975d0a42b1baabe,2017-07-30T17:10:00Z,cool,auto,723,720,680,MT,Scobey,35,False,False,False,Gas
14687,bbdf7f0987b78887505d5f3fa975d0a42b1baabe,2017-07-30T17:05:00Z,cool,auto,724,720,680,MT,Scobey,35,False,False,False,Gas
14688,bbdf7f0987b78887505d5f3fa975d0a42b1baabe,2017-07-30T19:50:00Z,cool,auto,722,720,680,MT,Scobey,35,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/MT/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/MT-day/2018-jul-day-MT.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e4ed8ab8303e5e1de4b181ce40545279719e6069,2018-07-27T13:45:00Z,heat,hold,690,650,630,MT,Bozeman,10,False,False,False,Gas
1,e4ed8ab8303e5e1de4b181ce40545279719e6069,2018-07-16T14:00:00Z,heat,hold,680,650,630,MT,Bozeman,10,False,False,False,Gas
2,e4ed8ab8303e5e1de4b181ce40545279719e6069,2018-07-30T12:40:00Z,heat,hold,692,650,630,MT,Bozeman,10,False,False,False,Gas
3,e4ed8ab8303e5e1de4b181ce40545279719e6069,2018-07-17T14:10:00Z,heat,hold,669,650,630,MT,Bozeman,10,False,False,False,Gas
4,e4ed8ab8303e5e1de4b181ce40545279719e6069,2018-07-11T15:35:00Z,heat,hold,661,650,630,MT,Bozeman,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26346,5a1fbc37826124394a837d94e7f46800280d115f,2018-07-08T15:55:00Z,cool,auto,732,760,760,MT,Great Falls,70,False,False,False,Gas
26347,5a1fbc37826124394a837d94e7f46800280d115f,2018-07-07T14:05:00Z,cool,auto,744,760,760,MT,Great Falls,70,False,False,False,Gas
26348,5a1fbc37826124394a837d94e7f46800280d115f,2018-07-08T15:20:00Z,cool,auto,732,760,760,MT,Great Falls,70,False,False,False,Gas
26349,5a1fbc37826124394a837d94e7f46800280d115f,2018-07-08T15:25:00Z,cool,auto,732,760,760,MT,Great Falls,70,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/MT/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/MT-day/2019-jul-day-MT.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7352a85776176198f0abb065b891160881b30270,2019-07-13 19:25:00 UTC,cool,hold,714,707,707,MT,Bozeman,15,False,False,False,Gas
1,7352a85776176198f0abb065b891160881b30270,2019-07-14 18:00:00 UTC,cool,hold,709,697,697,MT,Bozeman,15,False,False,False,Gas
2,7352a85776176198f0abb065b891160881b30270,2019-07-14 19:55:00 UTC,cool,hold,718,697,697,MT,Bozeman,15,False,False,False,Gas
3,2f91f344abd5823f11abdfa57e08bb514aa09564,2019-07-13 16:35:00 UTC,cool,hold,730,760,749,MT,Billings,50,True,False,False,Gas
4,c9a27b6d42a3b1e3d96263a706132ded7796fb6f,2019-07-04 13:55:00 UTC,auto,auto,709,808,658,MT,Great Falls,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59306,7012732a88f8e228eeeb1ade7f2f27d367c51866,2019-07-12 13:25:00 UTC,cool,auto,711,780,680,MT,Bozeman,5,False,False,False,Gas
59307,7012732a88f8e228eeeb1ade7f2f27d367c51866,2019-07-15 18:45:00 UTC,cool,auto,730,780,680,MT,Bozeman,5,False,False,False,Gas
59308,7012732a88f8e228eeeb1ade7f2f27d367c51866,2019-07-19 17:30:00 UTC,cool,auto,684,780,680,MT,Bozeman,5,False,False,False,Gas
59309,7012732a88f8e228eeeb1ade7f2f27d367c51866,2019-07-19 18:40:00 UTC,cool,auto,685,780,680,MT,Bozeman,5,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/MT/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/MT-day/2020-jul-day-MT.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5e65748abfa7413de4b81ab0d609cf2b3d225520,2020-07-26 19:35:00 UTC,cool,hold,735,747,747,MT,Missoula,5,False,False,False,Gas
1,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,2020-07-24 17:25:00 UTC,cool,hold,711,697,697,MT,Billings,55,False,False,False,Gas
2,2f91f344abd5823f11abdfa57e08bb514aa09564,2020-07-02 18:35:00 UTC,cool,hold,711,701,701,MT,Billings,50,True,False,False,Gas
3,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,2020-07-13 18:00:00 UTC,auto,hold,701,697,647,MT,Billings,55,False,False,False,Gas
4,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,2020-07-23 19:30:00 UTC,cool,hold,711,697,697,MT,Billings,55,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46113,1e4188c276ca852a75feef8c5e3fc27e1f409c4d,2020-07-19 19:55:00 UTC,cool,auto,682,680,680,MT,Helena,5,False,False,False,Gas
46114,1e4188c276ca852a75feef8c5e3fc27e1f409c4d,2020-07-19 19:50:00 UTC,cool,auto,680,680,680,MT,Helena,5,False,False,False,Gas
46115,1e4188c276ca852a75feef8c5e3fc27e1f409c4d,2020-07-07 14:30:00 UTC,cool,hold,681,680,680,MT,Helena,5,False,False,False,Gas
46116,1e4188c276ca852a75feef8c5e3fc27e1f409c4d,2020-07-26 19:20:00 UTC,cool,hold,694,680,680,MT,Helena,5,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/MT/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/MT-day/2021-jul-day-MT.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2f91f344abd5823f11abdfa57e08bb514aa09564,2021-07-09T19:05:00Z,cool,hold,734,730,721,MT,Billings,50,True,False,False,Gas
1,2f91f344abd5823f11abdfa57e08bb514aa09564,2021-07-21T15:50:00Z,cool,hold,738,740,721,MT,Billings,50,True,False,False,Gas
2,2f91f344abd5823f11abdfa57e08bb514aa09564,2021-07-09T19:20:00Z,cool,hold,733,730,721,MT,Billings,50,True,False,False,Gas
3,2f91f344abd5823f11abdfa57e08bb514aa09564,2021-07-21T15:00:00Z,cool,hold,734,740,721,MT,Billings,50,True,False,False,Gas
4,2f91f344abd5823f11abdfa57e08bb514aa09564,2021-07-23T18:50:00Z,cool,hold,722,730,721,MT,Billings,50,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26853,6e0c9d625f3ce2c91fb0dace7c2f92867c27badb,2021-07-15T19:40:00Z,cool,hold,788,760,760,MT,Scobey,50,False,False,False,Gas
26854,6e0c9d625f3ce2c91fb0dace7c2f92867c27badb,2021-07-15T19:55:00Z,cool,hold,776,760,760,MT,Scobey,50,False,False,False,Gas
26855,b9a68c99fa994007bfbe7dfc018ec73539555478,2021-07-26T19:55:00Z,cool,hold,766,760,760,MT,Billings,60,False,False,False,Gas
26856,b9a68c99fa994007bfbe7dfc018ec73539555478,2021-07-26T19:30:00Z,cool,hold,766,760,760,MT,Billings,60,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/MT/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MT/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stacMToverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MT_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MT/jul/" + file)
    MT_jul = pd.concat([MT_jul, df])
    
MT_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0bc51b95b2dc23713f611e182c689ad03dc2c300,jul,2017,cool,auto,Great Falls,708.000000,735.000000,740.000000,10.0,False,False,False
1,0bc51b95b2dc23713f611e182c689ad03dc2c300,jul,2017,cool,hold,Great Falls,724.242424,740.393939,740.393939,10.0,False,False,False
2,1e4188c276ca852a75feef8c5e3fc27e1f409c4d,jul,2017,cool,auto,Helena,715.400000,713.440000,699.200000,5.0,False,False,False
3,1e4188c276ca852a75feef8c5e3fc27e1f409c4d,jul,2017,cool,hold,Helena,704.176471,700.000000,700.000000,5.0,False,False,False
4,29f5966d7808882d6c76a0a2d7091c18b3587704,jul,2017,auto,auto,Bozeman,702.916667,700.791667,650.791667,9.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
33,e4ed8ab8303e5e1de4b181ce40545279719e6069,jul,2021,heat,hold,Bozeman,734.737113,680.000000,680.000000,10.0,False,False,False
34,f404081c89442a6b4699e93e43dc4bb72f9e2dc1,jul,2021,auto,hold,Great Falls,730.131274,730.928958,680.928958,55.0,False,False,False
35,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,jul,2021,cool,hold,Billings,659.458333,655.291667,654.833333,55.0,False,False,False
36,fd93662943beeb4603fd63aacbbbb7abb3183470,jul,2021,cool,hold,NASHUA,738.500000,769.791667,769.500000,30.0,False,False,False


In [133]:
MT_jul.to_csv("Scraper_Output/State_Month_Day/MT/MT_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/MT-day/2017-aug-day-MT.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a91496a0efdd5e852e400a057b99fb69b67eed62,2017-08-26T17:25:00Z,cool,hold,719,692,692,MT,Bozeman,10,False,False,False,Gas
1,a91496a0efdd5e852e400a057b99fb69b67eed62,2017-08-25T17:10:00Z,cool,hold,693,701,701,MT,Bozeman,10,False,False,False,Gas
2,a91496a0efdd5e852e400a057b99fb69b67eed62,2017-08-26T17:35:00Z,cool,hold,701,692,692,MT,Bozeman,10,False,False,False,Gas
3,037988cc24eaeaa08f7002dbc926d02c8f9c0f1c,2017-08-18T15:25:00Z,cool,hold,683,725,725,MT,Eureka,10,True,False,True,Electric
4,037988cc24eaeaa08f7002dbc926d02c8f9c0f1c,2017-08-12T15:55:00Z,cool,hold,693,724,724,MT,Eureka,10,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19565,6e0c9d625f3ce2c91fb0dace7c2f92867c27badb,2017-08-20T19:20:00Z,cool,hold,707,760,760,MT,Scobey,50,False,False,False,Gas
19566,6e0c9d625f3ce2c91fb0dace7c2f92867c27badb,2017-08-04T14:25:00Z,cool,hold,708,760,760,MT,Scobey,50,False,False,False,Gas
19567,6e0c9d625f3ce2c91fb0dace7c2f92867c27badb,2017-08-28T14:35:00Z,cool,hold,730,760,760,MT,Scobey,50,False,False,False,Gas
19568,6e0c9d625f3ce2c91fb0dace7c2f92867c27badb,2017-08-24T17:55:00Z,cool,hold,755,760,760,MT,Scobey,50,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/MT/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/MT-day/2018-aug-day-MT.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bbc0d748de67c33d4c70a501da752924a90777ee,2018-08-10T13:35:00Z,auto,auto,747,760,700,MT,Kalispell,45,True,False,False,Gas
1,bbc0d748de67c33d4c70a501da752924a90777ee,2018-08-08T12:00:00Z,auto,hold,755,770,700,MT,Kalispell,45,True,False,False,Gas
2,bbc0d748de67c33d4c70a501da752924a90777ee,2018-08-19T19:35:00Z,auto,auto,734,750,700,MT,Kalispell,45,True,False,False,Gas
3,bbc0d748de67c33d4c70a501da752924a90777ee,2018-08-08T13:15:00Z,auto,hold,750,770,700,MT,Kalispell,45,True,False,False,Gas
4,bbc0d748de67c33d4c70a501da752924a90777ee,2018-08-19T15:45:00Z,auto,auto,736,750,700,MT,Kalispell,45,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25131,b70abf6012a52a334b0b505e90e47e1d4bc1714e,2018-08-06T13:50:00Z,auto,auto,726,800,680,MT,Bozeman,0,True,False,False,Gas
25132,4089d9705650bdd49cd283f6660b95a5ed85aba7,2018-08-19T18:50:00Z,cool,hold,743,780,680,MT,missoula,5,False,False,False,Gas
25133,b70abf6012a52a334b0b505e90e47e1d4bc1714e,2018-08-06T15:50:00Z,auto,auto,723,800,680,MT,Bozeman,0,True,False,False,Gas
25134,b70abf6012a52a334b0b505e90e47e1d4bc1714e,2018-08-06T18:00:00Z,auto,auto,724,800,680,MT,Bozeman,0,True,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/MT/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/MT-day/2019-aug-day-MT.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,af3e698ef64b8dd05c26eac5bee213b0661dfbc7,2019-08-04 11:40:00 UTC,cool,auto,708,790,790,MT,Helena,59,False,False,False,Gas
1,470ef66ea25b7872c4c3059e614ac71ba471cde8,2019-08-21 15:00:00 UTC,cool,hold,700,682,682,MT,Billings,9,True,False,False,Gas
2,af3e698ef64b8dd05c26eac5bee213b0661dfbc7,2019-08-05 13:45:00 UTC,cool,auto,711,800,790,MT,Helena,59,False,False,False,Gas
3,af3e698ef64b8dd05c26eac5bee213b0661dfbc7,2019-08-04 16:20:00 UTC,cool,auto,709,790,790,MT,Helena,59,False,False,False,Gas
4,2f91f344abd5823f11abdfa57e08bb514aa09564,2019-08-10 19:50:00 UTC,cool,auto,748,750,749,MT,Billings,50,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56328,bbc0d748de67c33d4c70a501da752924a90777ee,2019-08-04 14:25:00 UTC,auto,auto,733,770,700,MT,Kalispell,45,True,False,False,Gas
56329,bbc0d748de67c33d4c70a501da752924a90777ee,2019-08-04 18:30:00 UTC,auto,auto,732,770,700,MT,Kalispell,45,True,False,False,Gas
56330,bbc0d748de67c33d4c70a501da752924a90777ee,2019-08-08 11:15:00 UTC,auto,auto,756,760,700,MT,Kalispell,45,True,False,False,Gas
56331,bbc0d748de67c33d4c70a501da752924a90777ee,2019-08-04 16:30:00 UTC,auto,auto,729,770,700,MT,Kalispell,45,True,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/MT/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/MT-day/2020-aug-day-MT.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5e65748abfa7413de4b81ab0d609cf2b3d225520,2020-08-10 15:45:00 UTC,cool,auto,727,750,717,MT,Missoula,5,False,False,False,Gas
1,5e65748abfa7413de4b81ab0d609cf2b3d225520,2020-08-24 18:05:00 UTC,cool,auto,751,750,717,MT,Missoula,5,False,False,False,Gas
2,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,2020-08-17 18:05:00 UTC,cool,hold,709,687,687,MT,Billings,55,False,False,False,Gas
3,5e65748abfa7413de4b81ab0d609cf2b3d225520,2020-08-14 14:05:00 UTC,cool,auto,712,750,717,MT,Missoula,5,False,False,False,Gas
4,5e65748abfa7413de4b81ab0d609cf2b3d225520,2020-08-07 17:40:00 UTC,cool,auto,746,750,717,MT,Missoula,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47827,29f5966d7808882d6c76a0a2d7091c18b3587704,2020-08-31 16:00:00 UTC,heat,hold,755,760,760,MT,Bozeman,9,True,False,False,Gas
47828,29f5966d7808882d6c76a0a2d7091c18b3587704,2020-08-31 15:35:00 UTC,heat,hold,756,760,760,MT,Bozeman,9,True,False,False,Gas
47829,29f5966d7808882d6c76a0a2d7091c18b3587704,2020-08-31 15:40:00 UTC,heat,hold,759,760,760,MT,Bozeman,9,True,False,False,Gas
47830,29f5966d7808882d6c76a0a2d7091c18b3587704,2020-08-31 15:45:00 UTC,heat,hold,759,760,760,MT,Bozeman,9,True,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/MT/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MT/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stacMToverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MT_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MT/aug/" + file)
    MT_aug = pd.concat([MT_aug, df])
    
MT_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,037988cc24eaeaa08f7002dbc926d02c8f9c0f1c,aug,2017,cool,auto,Eureka,698.184211,724.421053,734.736842,10.0,True,False,True
1,037988cc24eaeaa08f7002dbc926d02c8f9c0f1c,aug,2017,cool,hold,Eureka,692.443119,719.359608,719.305922,10.0,True,False,True
2,0bc51b95b2dc23713f611e182c689ad03dc2c300,aug,2017,cool,auto,Great Falls,705.645833,750.000000,680.000000,10.0,False,False,False
3,0bc51b95b2dc23713f611e182c689ad03dc2c300,aug,2017,cool,hold,Great Falls,709.226804,724.481100,720.621993,10.0,False,False,False
4,1e4188c276ca852a75feef8c5e3fc27e1f409c4d,aug,2017,cool,hold,Helena,741.703704,740.000000,740.000000,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,f469a99acb054dbddd08e713d682acf5035c42df,aug,2020,heat,auto,Helena,763.464509,650.000000,650.000000,49.0,False,False,False
69,f5101dfe9a0543a0dd3d8b1adbec2dd33f3047a8,aug,2020,heat,hold,Bozeman,661.529412,653.235294,641.911765,90.0,False,False,False
70,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,aug,2020,cool,auto,Billings,718.625000,680.000000,680.000000,55.0,False,False,False
71,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,aug,2020,cool,hold,Billings,697.089005,683.403141,682.429319,55.0,False,False,False


In [160]:
MT_aug.to_csv("Scraper_Output/State_Month_Day/MT/MT_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/MT-day/2017-dec-day-MT.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,7352a85776176198f0abb065b891160881b30270,2017-12-17T19:55:00Z,auto,hold,653,745,655,MT,Bozeman,15,False,False,False,Gas
2,7352a85776176198f0abb065b891160881b30270,2017-12-17T17:40:00Z,auto,hold,657,745,655,MT,Bozeman,15,False,False,False,Gas
4,7352a85776176198f0abb065b891160881b30270,2017-12-17T16:25:00Z,auto,hold,652,745,655,MT,Bozeman,15,False,False,False,Gas
5,7352a85776176198f0abb065b891160881b30270,2017-12-17T17:25:00Z,auto,hold,655,745,655,MT,Bozeman,15,False,False,False,Gas
6,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,2017-12-28T17:35:00Z,heat,hold,658,650,645,MT,Billings,55,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18741,0bc51b95b2dc23713f611e182c689ad03dc2c300,2017-12-30T12:05:00Z,heat,hold,696,680,680,MT,Great Falls,10,False,False,False,Gas
18742,0bc51b95b2dc23713f611e182c689ad03dc2c300,2017-12-30T13:10:00Z,heat,hold,674,680,680,MT,Great Falls,10,False,False,False,Gas
18743,0bc51b95b2dc23713f611e182c689ad03dc2c300,2017-12-30T13:55:00Z,heat,hold,628,680,680,MT,Great Falls,10,False,False,False,Gas
18744,0bc51b95b2dc23713f611e182c689ad03dc2c300,2017-12-30T14:10:00Z,heat,hold,634,680,680,MT,Great Falls,10,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/MT/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/MT-day/2018-dec-day-MT.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,34fbbdfdbc62b907f2cb4f26559df29b230273e3,2018-12-22 14:10:00 UTC,heat,hold,617,650,617,MT,Missoula,25,False,False,False,Gas
1,34fbbdfdbc62b907f2cb4f26559df29b230273e3,2018-12-22 19:45:00 UTC,heat,hold,618,650,617,MT,Missoula,25,False,False,False,Gas
2,06941505466c4069dc9b2cd6aaf6b800a4cd5d40,2018-12-09 13:55:00 UTC,heat,auto,677,730,662,MT,Fairfield,15,False,False,False,Gas
3,34fbbdfdbc62b907f2cb4f26559df29b230273e3,2018-12-22 13:25:00 UTC,heat,hold,618,650,617,MT,Missoula,25,False,False,False,Gas
4,34fbbdfdbc62b907f2cb4f26559df29b230273e3,2018-12-22 17:40:00 UTC,heat,hold,616,650,617,MT,Missoula,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33942,a5843cc963fec301306a65ed47b1063f1370c9c7,2018-12-09 19:15:00 UTC,heat,auto,701,700,700,MT,Billings,0,False,False,False,Gas
33943,a5843cc963fec301306a65ed47b1063f1370c9c7,2018-12-06 19:55:00 UTC,heat,hold,695,700,700,MT,Billings,0,False,False,False,Gas
33944,a5843cc963fec301306a65ed47b1063f1370c9c7,2018-12-13 13:45:00 UTC,heat,auto,703,700,700,MT,Billings,0,False,False,False,Gas
33945,bbc0d748de67c33d4c70a501da752924a90777ee,2018-12-04 12:50:00 UTC,heat,auto,697,750,700,MT,Kalispell,45,True,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/MT/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/MT-day/2019-dec-day-MT.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7e82f7bb0ad060bdb1e139bed8aa5d5f869cf054,2019-12-01 17:40:00 UTC,heat,auto,677,689,689,MT,East Helena,40,False,False,False,Gas
1,2f91f344abd5823f11abdfa57e08bb514aa09564,2019-12-19 14:55:00 UTC,heat,auto,715,723,718,MT,Billings,50,True,False,False,Gas
2,666c08e73f2bee116322fafbf2fc1b2972422b81,2019-12-31 13:55:00 UTC,auto,hold,710,775,714,MT,Bozeman,7,False,False,False,Gas
3,a6b5547e34405b7e11d145b6b81f37e2850b7cd9,2019-12-14 17:05:00 UTC,heat,auto,695,713,687,MT,Missoula,70,False,False,False,Gas
5,7e82f7bb0ad060bdb1e139bed8aa5d5f869cf054,2019-12-03 13:55:00 UTC,heat,hold,679,730,678,MT,East Helena,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43164,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2019-12-31 18:00:00 UTC,heat,hold,765,760,760,MT,Great Falls,70,False,False,False,Gas
43165,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2019-12-08 16:10:00 UTC,heat,hold,746,760,760,MT,Great Falls,70,False,False,False,Gas
43166,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2019-12-08 16:35:00 UTC,heat,hold,754,760,760,MT,Great Falls,70,False,False,False,Gas
43167,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2019-12-05 17:40:00 UTC,heat,hold,752,760,760,MT,Great Falls,70,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/MT/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/MT-day/2020-dec-day-MT.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7522a70780f2d3f66a683747547beccbafc53be2,2020-12-15 17:25:00 UTC,heat,hold,714,715,715,MT,Billings,30,True,False,False,Gas
1,77c344f9bb93b933ebc934a7ebba997d720726d5,2020-12-21 19:40:00 UTC,heat,hold,639,656,639,MT,Butte,90,False,False,False,Gas
2,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,2020-12-21 19:30:00 UTC,heat,hold,704,709,709,MT,Billings,55,False,False,False,Gas
3,7522a70780f2d3f66a683747547beccbafc53be2,2020-12-15 19:55:00 UTC,heat,hold,712,715,715,MT,Billings,30,True,False,False,Gas
4,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,2020-12-15 19:35:00 UTC,heat,hold,703,709,709,MT,Billings,55,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37131,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2020-12-25 16:55:00 UTC,heat,hold,745,750,750,MT,Great Falls,70,False,False,False,Gas
37132,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2020-12-25 17:45:00 UTC,heat,hold,746,750,750,MT,Great Falls,70,False,False,False,Gas
37133,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2020-12-30 16:00:00 UTC,heat,hold,750,750,750,MT,Great Falls,70,False,False,False,Gas
37134,98427a2f54b32505d3f1a3e9dfe4acfb78346762,2020-12-25 17:10:00 UTC,heat,hold,748,750,750,MT,Great Falls,70,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/MT/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MT/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stacMToverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MT_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MT/dec/" + file)
    MT_dec = pd.concat([MT_dec, df])
    
MT_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,019764d7b48dd9d6f1f4ad95451d7d0da2669405,dec,2017,auto,auto,Billings,709.260870,740.000000,690.000000,5.0,False,False,False
1,0728208cff5242993896da88a882e076094b765f,dec,2017,heat,hold,Billings,679.140000,681.865000,681.705000,50.0,False,False,False
2,0bc51b95b2dc23713f611e182c689ad03dc2c300,dec,2017,heat,auto,Great Falls,656.906977,657.837209,646.697674,10.0,False,False,False
3,0bc51b95b2dc23713f611e182c689ad03dc2c300,dec,2017,heat,hold,Great Falls,661.723810,668.552381,668.476190,10.0,False,False,False
4,23d834c190c3686c02c030ee0bf42003872bd591,dec,2017,heat,auto,Bozeman,669.857143,683.285714,671.142857,25.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,fd93662943beeb4603fd63aacbbbb7abb3183470,dec,2020,heat,auto,NASHUA,690.474684,728.227848,689.636076,30.0,False,False,False
72,fd93662943beeb4603fd63aacbbbb7abb3183470,dec,2020,heat,hold,NASHUA,703.777778,690.222222,690.000000,30.0,False,False,False
73,fec2ec7ae8c5ee7ae93fe16b022bc4c1fc6f2931,dec,2020,auto,auto,Kalispell,724.106481,740.000000,720.000000,0.0,True,False,False
74,fec2ec7ae8c5ee7ae93fe16b022bc4c1fc6f2931,dec,2020,auto,hold,Kalispell,706.644195,721.093633,701.093633,0.0,True,False,False


In [187]:
MT_dec.to_csv("Scraper_Output/State_Month_Day/MT/MT_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/MT/") if f.endswith(".csv")]

# files

In [189]:
# https://stacMToverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MT_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/MT/" + file)
    MT_all = pd.concat([MT_all, df])
    
MT_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,037988cc24eaeaa08f7002dbc926d02c8f9c0f1c,aug,2017,cool,auto,Eureka,698.184211,724.421053,734.736842,10.0,True,False,True
1,037988cc24eaeaa08f7002dbc926d02c8f9c0f1c,aug,2017,cool,hold,Eureka,692.443119,719.359608,719.305922,10.0,True,False,True
2,0bc51b95b2dc23713f611e182c689ad03dc2c300,aug,2017,cool,auto,Great Falls,705.645833,750.000000,680.000000,10.0,False,False,False
3,0bc51b95b2dc23713f611e182c689ad03dc2c300,aug,2017,cool,hold,Great Falls,709.226804,724.481100,720.621993,10.0,False,False,False
4,1e4188c276ca852a75feef8c5e3fc27e1f409c4d,aug,2017,cool,hold,Helena,741.703704,740.000000,740.000000,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
299,f404081c89442a6b4699e93e43dc4bb72f9e2dc1,jun,2021,auto,hold,Great Falls,721.327862,730.421420,680.421420,55.0,False,False,False
300,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,jun,2021,auto,hold,Billings,679.107143,667.160714,617.160714,55.0,False,False,False
301,f74a23b378011473f8b32b3fc2efbbb3b6a9e098,jun,2021,cool,hold,Billings,664.848837,655.116279,654.720930,55.0,False,False,False
302,fd93662943beeb4603fd63aacbbbb7abb3183470,jun,2021,cool,hold,NASHUA,745.318584,741.955752,740.814159,30.0,False,False,False


In [190]:
MT_all.to_csv("Scraper_Output/State_Month_Day/MT_all_day.csv", header=True, index=False)

In [191]:
# DatachecMT to maMTe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['MT']
Unique jan_2018: ['MT']
Unique jan_2019: ['MT']
Unique jan_2020: ['MT']
Unique jan_2021: ['MT']
Unique feb_2017: ['MT']
Unique feb_2018: ['MT']
Unique feb_2019: ['MT']
Unique feb_2020: ['MT']
Unique feb_2021: ['MT']
Unique jun_2017: ['MT']
Unique jun_2018: ['MT']
Unique jun_2019: ['MT']
Unique jun_2020: ['MT']
Unique jun_2021: ['MT']
Unique jul_2017: ['MT']
Unique jul_2018: ['MT']
Unique jul_2019: ['MT']
Unique jul_2020: ['MT']
Unique jul_2021: ['MT']
Unique aug_2017: ['MT']
Unique aug_2018: ['MT']
Unique aug_2019: ['MT']
Unique aug_2020: ['MT']
Unique dec_2017: ['MT']
Unique dec_2018: ['MT']
Unique dec_2019: ['MT']
Unique dec_2020: ['MT']
