# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/MI-day/2017-jan-day-MI.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ad19ae1956e87668b749e90d543d532d34555d58,2017-01-29 19:35:00 UTC,heat,hold,737,735,735,MI,Madison Heights,65,False,False,False,Gas
1,10e61e397a5d9a68dc43147680cb834274e41872,2017-01-17 19:50:00 UTC,auto,hold,677,728,678,MI,Adrian township,0,False,False,False,Gas
2,38ad425c93bdc72f7ca081eb9f2c0fd8b6312c2f,2017-01-29 14:55:00 UTC,heat,auto,726,730,730,MI,Troy,40,False,False,False,Gas
3,432f6f06dc2e38068ad5dc6aa817192461c3738e,2017-01-13 12:10:00 UTC,heat,auto,693,780,700,MI,Grand Rapids,0,False,False,False,Gas
4,22ac047ae22605d6ead8be541dc4b5c5b1a3a161,2017-01-07 13:50:00 UTC,auto,auto,706,745,695,MI,Grand Rapids,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
494436,ff1cfe6dffc630d53081b6347a085fcea22730b3,2017-01-23 16:40:00 UTC,heat,auto,675,680,680,MI,Oshtemo,0,False,False,False,Gas
494437,ff1cfe6dffc630d53081b6347a085fcea22730b3,2017-01-18 18:15:00 UTC,heat,auto,684,680,680,MI,Oshtemo,0,False,False,False,Gas
494438,ff1cfe6dffc630d53081b6347a085fcea22730b3,2017-01-09 14:40:00 UTC,heat,auto,673,680,680,MI,Oshtemo,0,False,False,False,Gas
494439,ff1cfe6dffc630d53081b6347a085fcea22730b3,2017-01-18 17:55:00 UTC,heat,auto,683,680,680,MI,Oshtemo,0,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
00600479ce78c1cb4bfc08723e8887e839314ecc,Jan,2017,auto,auto,hudsonville,690.139108,760.342957,692.624672,26.0,False,False,False
00600479ce78c1cb4bfc08723e8887e839314ecc,Jan,2017,auto,hold,hudsonville,688.643216,763.737018,690.457705,26.0,False,False,False
00db9748e4d744f28a4c91a2efeb5b727f1a848b,Jan,2017,heat,auto,Detroit,653.377778,651.577778,650.288889,120.0,False,False,False
02264938df7633eea3cb607151fc3ea38bdd0aa8,Jan,2017,heat,hold,Dearborn,648.150943,650.000000,610.000000,75.0,False,False,False
02aee59ee8e07df99cf4874cfb2c66bc3cc4e525,Jan,2017,heat,auto,Grand rapids,656.088083,685.440415,662.248705,110.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
ff88a1efc4955d699189eefc2c94260eb37fc06e,Jan,2017,heat,auto,Grand Rapids Twp,713.000000,752.000000,680.000000,0.0,True,False,False
ff88a1efc4955d699189eefc2c94260eb37fc06e,Jan,2017,heat,hold,Grand Rapids Twp,699.352941,698.823529,698.823529,0.0,True,False,False
ff8f480b88d8cb7a6d77522c6130c3cfeb64e417,Jan,2017,auto,hold,Walker,677.564815,731.064815,681.027778,0.0,False,False,False
ffac41481c0347b1c1f383ddbe947c7983a652b0,Jan,2017,heat,auto,Royal Oak,658.477778,650.000000,650.000000,105.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/MI/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/MI-day/2018-jan-day-MI.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4606e4a908154e8498aa7be903d58da51b936fc6,2018-01-13 16:25:00 UTC,heat,auto,674,797,680,MI,Sterling Heights,27,False,False,False,Gas
1,966ffa3f6e6283bddcebf2077c219ec9a6a28432,2018-01-02 12:25:00 UTC,heat,hold,678,704,704,MI,Ypsilanti,20,True,False,False,Gas
2,1e9ccee258afdfc801a8bd57e275549c75e775fe,2018-01-12 19:00:00 UTC,heat,hold,682,689,689,MI,Orion,30,False,False,False,Gas
3,5fea367881bdaa941cc3926e1a3d0ff705133d97,2018-01-23 13:25:00 UTC,heat,hold,702,687,687,MI,Trenton,60,False,False,False,Gas
4,f41638e440a10ac8c47163020bd02b5fb4caf2f8,2018-01-27 13:55:00 UTC,heat,hold,688,685,685,MI,Chesterfield,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1403682,15b17a67ae95c23e07190d9785407308b7b02737,2018-01-20 14:00:00 UTC,heat,hold,700,765,705,MI,Grosse Pointe Woods,70,False,False,False,Gas
1403683,bf1bdb360212931ef5210614c8b3e9b05fd256e2,2018-01-13 14:50:00 UTC,auto,hold,687,765,695,MI,Grand Rapids,9,False,False,True,Electric
1403684,bf1bdb360212931ef5210614c8b3e9b05fd256e2,2018-01-09 18:20:00 UTC,auto,hold,668,765,665,MI,Grand Rapids,9,False,False,True,Electric
1403685,15b17a67ae95c23e07190d9785407308b7b02737,2018-01-13 15:10:00 UTC,auto,hold,698,765,705,MI,Grosse Pointe Woods,70,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/MI/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/MI-day/2019-jan-day-MI.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,8aa219ddaf34ecf5d37c7449fecf829840ab31d5,2019-01-20 18:00:00 UTC,heat,hold,732,715,715,MI,Midland,40,False,False,False,Gas
2,27b9ce7758f3e0c12db3e0debafa32f069d5041e,2019-01-15 19:25:00 UTC,auto,hold,713,768,718,MI,Troy,50,False,False,False,Gas
4,62188f6092cf33be122b3cad17c2859f282330eb,2019-01-11 16:50:00 UTC,heat,hold,711,715,715,MI,Charlotte,88,False,False,False,Gas
5,4e34d7d9afcaa863a78dc0bec8c252c729bd21c0,2019-01-01 17:30:00 UTC,heat,hold,714,717,717,MI,Flushing,20,False,False,False,Gas
6,37d9a775d0c41d8023887046f3816808c3545ffd,2019-01-16 18:55:00 UTC,heat,hold,726,725,725,MI,Dearborn,100,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2587360,5e07058774553efea36ff6ab6546f886ee65e824,2019-01-25 12:35:00 UTC,heat,auto,759,760,760,MI,White Cloud,40,False,False,False,Gas
2587361,0cab4446842ac76227612e36bab1d1f8a600a562,2019-01-12 12:25:00 UTC,heat,hold,759,760,760,MI,Kentwood,58,False,False,False,Gas
2587362,28fca1e28a675e8524472af8cef8e41c1f198528,2019-01-26 18:55:00 UTC,heat,auto,684,760,690,MI,Ann Arbor,15,False,False,False,Gas
2587363,589608bc15a74964045f881baabca2acd9057866,2019-01-23 15:00:00 UTC,auto,auto,655,760,660,MI,Farmington Hills,85,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/MI/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/MI-day/2020-jan-day-MI.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8a1ffc5de736f0acb228ded4071e47986d85dae2,2020-01-31 11:40:00 UTC,auto,auto,698,755,705,MI,Rochester Hills,30,False,False,False,Gas
1,23b343def9c091c340e3949efb2963816a0c6b03,2020-01-03 13:20:00 UTC,auto,hold,679,746,676,MI,Berrien Springs,10,False,False,False,Gas
2,8a970eb552215627cc000fb4eb3e8e2280292d37,2020-01-19 19:00:00 UTC,auto,hold,682,746,686,MI,Harrison Township,0,False,False,False,Gas
3,33fb7d7e2a015768424e459a001543d187578207,2020-01-31 16:10:00 UTC,heat,hold,731,727,727,MI,Flushing,20,False,False,False,Gas
4,f3741a55aa81ef51b0dfd5ec45f19052f8adaaff,2020-01-21 13:05:00 UTC,heat,hold,689,682,682,MI,romulus,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2802987,acb5160f387134a2e528d81b6a8e766c5fe2111e,2020-01-05 14:35:00 UTC,auto,auto,675,760,710,MI,Holt,0,True,False,False,Gas
2802988,717c1ad16b440abd7ee59396df3479cf7c8802e0,2020-01-01 16:25:00 UTC,auto,hold,708,760,710,MI,Madison Heights,60,False,False,False,Gas
2802989,2f9d95d1d5b9e334e29cf2c13e75708f0a79e049,2020-01-25 08:35:00 UTC,heat,hold,757,760,760,MI,Ann Arbor,59,False,False,False,Gas
2802990,bd76c90318e35fb747ab6b0f3558baccd5dc32f2,2020-01-23 18:50:00 UTC,auto,hold,690,760,690,MI,Ada,0,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/MI/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/MI-day/2021-jan-day-MI.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a231991a793f6e9a0412208239f9b79033a7075b,2021-01-13 19:15:00 UTC,heat,hold,683,721,688,MI,Grand Rapids,30,False,False,False,Gas
1,0980b398034424a971e2fc1de75ca626baf31f44,2021-01-06 18:10:00 UTC,heat,hold,675,689,689,MI,Ann Arbor,10,False,False,False,Gas
2,83a6dc9509915e5c00acac3b1853a3d772916c44,2021-01-31 17:30:00 UTC,auto,hold,731,810,760,MI,Saginaw,0,False,False,False,Gas
3,137ec0a0d76ddf7654dc65c481ba0d54171d54d9,2021-01-10 19:10:00 UTC,auto,hold,729,782,732,MI,Rochester Hills,35,False,False,False,Gas
4,62e3a1f2d52900c1126892e6e1e2002d2880f7d5,2021-01-14 11:50:00 UTC,auto,hold,695,795,695,MI,Oakland,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1818543,e539613d4ea771aaa6ece7117272fccbfdb41d83,2021-01-12 17:20:00 UTC,heat,hold,757,760,760,MI,Hazel Park,60,True,False,False,Gas
1818544,a7df7c8215a6dc141973d9e4af25ce2798bb1cec,2021-01-21 13:30:00 UTC,heat,hold,754,760,760,MI,Canton,0,True,False,False,Gas
1818545,9ed487bbc9dcd06f7e6c5bdcf712354488f46f62,2021-01-01 11:05:00 UTC,auto,hold,690,760,700,MI,Linden,15,False,False,False,Gas
1818546,9bb8ba38375493b4930bd9301d35b231c46cb5b6,2021-01-28 15:50:00 UTC,auto,hold,680,760,680,MI,Grandville,37,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/MI/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MI/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MI_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MI/jan/" + file)
    MI_jan = pd.concat([MI_jan, df])
    
MI_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00600479ce78c1cb4bfc08723e8887e839314ecc,Jan,2017,auto,auto,hudsonville,690.139108,760.342957,692.624672,26.0,False,False,False
1,00600479ce78c1cb4bfc08723e8887e839314ecc,Jan,2017,auto,hold,hudsonville,688.643216,763.737018,690.457705,26.0,False,False,False
2,00db9748e4d744f28a4c91a2efeb5b727f1a848b,Jan,2017,heat,auto,Detroit,653.377778,651.577778,650.288889,120.0,False,False,False
3,02264938df7633eea3cb607151fc3ea38bdd0aa8,Jan,2017,heat,hold,Dearborn,648.150943,650.000000,610.000000,75.0,False,False,False
4,02aee59ee8e07df99cf4874cfb2c66bc3cc4e525,Jan,2017,heat,auto,Grand rapids,656.088083,685.440415,662.248705,110.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2067,ff7537a785495395b799af4fb248c92ddb4fded1,Jan,2021,heat,hold,Alpena,677.269864,680.000000,680.000000,0.0,False,False,False
2068,ff8f480b88d8cb7a6d77522c6130c3cfeb64e417,Jan,2021,heat,hold,Walker,669.206897,691.000000,691.000000,0.0,False,False,False
2069,ffb40dac5e44e2063620351c2ca1b6b420a7de46,Jan,2021,heat,hold,Northville,688.626943,694.953368,694.145078,15.0,True,False,False
2070,ffcbc545479ab87baa3bbe5a2f2fa7b1dd2f89e9,Jan,2021,heat,hold,Grand Rapids,714.875000,720.000000,720.000000,60.0,False,False,False


In [34]:
MI_jan.to_csv("Scraper_Output/State_Month_Day/MI/MI_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/MI-day/2017-feb-day-MI.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,226d1c799db697c5246a4b51a61e1f81e1482b1f,2017-02-20 16:10:00 UTC,heat,auto,693,680,680,MI,Mundy,40,False,False,False,Gas
1,03ef35a1aeba7059dc8dad8dc6475c3b111d97ee,2017-02-16 17:40:00 UTC,heat,auto,730,740,740,MI,Oxford,15,False,False,False,Gas
2,4cfadfb8d59c1f948387b94d6555c9ff8b926c2f,2017-02-17 11:10:00 UTC,heat,hold,630,680,630,MI,Clarkston,20,False,False,False,Gas
3,994123002a0b8bb100831933d056c7459ac35593,2017-02-07 18:00:00 UTC,heat,hold,765,760,760,MI,Midland,10,False,False,False,Gas
4,582d580817e19a836ef0177deae833ba57725ffd,2017-02-12 18:10:00 UTC,auto,auto,640,820,630,MI,Kentwood,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
475883,ff1cfe6dffc630d53081b6347a085fcea22730b3,2017-02-21 14:20:00 UTC,heat,auto,676,680,680,MI,Oshtemo,0,False,False,False,Gas
475884,ff1cfe6dffc630d53081b6347a085fcea22730b3,2017-02-18 14:10:00 UTC,heat,auto,677,680,680,MI,Oshtemo,0,False,False,False,Gas
475885,ff1cfe6dffc630d53081b6347a085fcea22730b3,2017-02-06 13:50:00 UTC,heat,auto,674,680,680,MI,Oshtemo,0,False,False,False,Gas
475886,ff1cfe6dffc630d53081b6347a085fcea22730b3,2017-02-12 19:20:00 UTC,heat,auto,685,680,680,MI,Oshtemo,0,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/MI/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/MI-day/2018-feb-day-MI.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,23fc69c7e605f939dbc834ca713e70ae0e7b492e,2018-02-03 18:45:00 UTC,heat,hold,715,715,715,MI,Grand Rapids,65,False,False,False,Gas
2,2b6e0c2eafd23313406e9cde72b71a549e197bc5,2018-02-05 16:50:00 UTC,heat,hold,696,698,698,MI,saint joseph,0,False,False,False,Gas
3,5284b11f3af6ebeb331d19448109bea5bfa7c404,2018-02-19 09:10:00 UTC,heat,hold,637,640,640,MI,Brighton township,5,False,False,False,Gas
4,ffac41481c0347b1c1f383ddbe947c7983a652b0,2018-02-10 18:35:00 UTC,heat,hold,686,695,695,MI,Royal Oak,105,False,False,False,Gas
5,88e7b9d6e309a8eacd6112d54a0a646f0c3c4c88,2018-02-18 13:35:00 UTC,auto,auto,723,775,725,MI,Grand Rapids,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1308440,dde82dba9c470d84ac14addc6aec0738feec549b,2018-02-15 18:15:00 UTC,auto,hold,695,760,700,MI,Shelby Township,10,False,False,False,Gas
1308441,d790c11b0383e48db6cbe620a83ab0e5e1617773,2018-02-03 17:15:00 UTC,heat,auto,682,760,680,MI,Linden,17,True,False,False,Gas
1308442,0119e0242041f03e3fe3e12ca532b508adf1d916,2018-02-06 14:15:00 UTC,auto,hold,689,760,690,MI,Novi,10,False,False,False,Gas
1308443,f3e6e580a2311463b8cb46016824ca49905675c7,2018-02-15 14:50:00 UTC,auto,hold,690,760,690,MI,Rochester Hills,35,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/MI/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/MI-day/2019-feb-day-MI.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,50d61af90c794428d45ea95725f04dfef72b0541,2019-02-10 19:35:00 UTC,heat,auto,674,702,680,MI,Rockford,20,False,False,False,Gas
2,5284b11f3af6ebeb331d19448109bea5bfa7c404,2019-02-13 10:40:00 UTC,heat,auto,620,610,610,MI,Brighton township,5,False,False,False,Gas
3,82f7001e0808b0a75484e74df34894d12cebbc54,2019-02-26 19:20:00 UTC,heat,hold,701,705,705,MI,Flushing,20,False,False,False,Gas
4,fba1c47e53560ed63bf052289a873cadee74eb92,2019-02-23 18:00:00 UTC,heat,hold,729,725,725,MI,Hartland,45,False,False,False,Gas
6,474774c1172e8a470743ddc26007c4fc5418e31f,2019-02-12 13:30:00 UTC,heat,hold,697,810,700,MI,Monroe,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1808295,b9d78de5bd7ca3ecad3e97187b04a5a067d1a6af,2019-02-21 16:40:00 UTC,heat,auto,669,760,630,MI,mason,5,False,False,False,Gas
1808296,2927a79b27a8943b4b9ee6e7a1475ba0efe45b30,2019-02-01 19:40:00 UTC,heat,hold,784,760,760,MI,Romulus,0,False,False,False,Gas
1808297,bd76c90318e35fb747ab6b0f3558baccd5dc32f2,2019-02-05 11:05:00 UTC,auto,hold,695,760,690,MI,Ada,0,False,False,False,Gas
1808298,28feed7eeb351cc773cc919611c69788558db9e6,2019-02-08 13:10:00 UTC,auto,hold,653,760,660,MI,Howell,10,True,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/MI/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/MI-day/2020-feb-day-MI.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,00b337c132e76bbb2fc1ca41821f708d2145d03f,2020-02-16 15:20:00 UTC,heat,auto,682,687,620,MI,Grand Rapids,30,False,False,False,Gas
1,448a89d47a9fc16cf4cdb1e13b9722173efe43e5,2020-02-13 15:55:00 UTC,heat,hold,714,718,718,MI,Flat Rock,20,True,False,False,Gas
2,40332b343ba060ef151cce414397141b5ec0bf10,2020-02-10 16:00:00 UTC,heat,hold,711,716,716,MI,Wyoming,7,False,False,False,Gas
3,400b20b14b6e1c2b8e1c1fbd81435f024a26a2e2,2020-02-21 19:30:00 UTC,heat,hold,701,704,704,MI,Niles Township,15,False,False,False,Gas
4,446258689af625de2d5362c3b39b92cd279d5bdb,2020-02-05 15:25:00 UTC,heat,hold,618,651,620,MI,Jenison,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2517205,a3ffa2741850c5432fac92e7dda8d70642587e99,2020-02-11 19:20:00 UTC,auto,hold,705,760,710,MI,Delhi,0,False,False,False,Gas
2517206,e1bfdca1697089cdc1605194a7b8af4eb95e4de2,2020-02-18 12:05:00 UTC,heat,auto,615,760,620,MI,Grosse Pointe Farms,60,False,False,False,Gas
2517207,0f31638d1e6433eb413241cee286b065c1324e99,2020-02-12 15:25:00 UTC,auto,auto,709,760,710,MI,Hudsonville,18,False,False,False,Gas
2517208,dbfe70eed7403a5e03be4bde88619782f67ea191,2020-02-18 14:05:00 UTC,heat,auto,728,760,730,MI,Muskegon,0,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/MI/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/MI-day/2021-feb-day-MI.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3a831a2d9ff5467fae49d822a7c82a7aed719ed9,2021-02-10 13:30:00 UTC,heat,hold,737,735,735,MI,Dexter,90,False,False,False,Gas
1,40e76f0d5ec549bb7dbb29c053ff5a97e7b87c1c,2021-02-28 11:15:00 UTC,auto,hold,667,752,672,MI,Ypsilanti,47,True,False,False,Gas
3,f445f5bc6411a49666397a60f77f5d6f56def4a5,2021-02-20 17:55:00 UTC,heat,hold,699,698,698,MI,Grand Rapids,20,False,False,False,Gas
5,210965fd4135d5c08122575de67df30f83485923,2021-02-22 12:20:00 UTC,auto,hold,691,822,702,MI,Ann Arbor,0,True,False,False,Gas
6,57f1d8b678489fac920eecaedb3c45dc550efa68,2021-02-16 13:25:00 UTC,auto,hold,705,756,706,MI,Northville,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1524897,9d64ec5bbb71e383a4b79492b62faa9d2f692046,2021-02-11 10:20:00 UTC,auto,hold,687,760,690,MI,Saint Joseph,10,False,False,False,Gas
1524898,6fcfd94fb3227e715c11ba08601a6e8d34fb5a6f,2021-02-05 12:40:00 UTC,auto,hold,686,760,700,MI,Greenville,29,True,False,False,Gas
1524899,acb5160f387134a2e528d81b6a8e766c5fe2111e,2021-02-14 18:00:00 UTC,auto,hold,680,760,680,MI,Holt,0,True,False,False,Gas
1524900,534ea2ff591d2e8bc94a6a262005f60b0a14e2cb,2021-02-07 17:45:00 UTC,auto,hold,651,760,660,MI,Grandville,69,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/MI/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MI/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MI_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MI/feb/" + file)
    MI_feb = pd.concat([MI_feb, df])
    
MI_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00600479ce78c1cb4bfc08723e8887e839314ecc,feb,2017,auto,auto,hudsonville,680.430723,745.978916,682.243976,26.0,False,False,False
1,00600479ce78c1cb4bfc08723e8887e839314ecc,feb,2017,auto,hold,hudsonville,685.635741,769.982565,688.016189,26.0,False,False,False
2,00600479ce78c1cb4bfc08723e8887e839314ecc,feb,2017,heat,auto,hudsonville,678.114206,676.871866,676.871866,26.0,False,False,False
3,00600479ce78c1cb4bfc08723e8887e839314ecc,feb,2017,heat,hold,hudsonville,686.146930,686.769737,686.769737,26.0,False,False,False
4,00db9748e4d744f28a4c91a2efeb5b727f1a848b,feb,2017,heat,auto,Detroit,662.707317,650.000000,617.219512,120.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1948,ff7537a785495395b799af4fb248c92ddb4fded1,feb,2021,heat,hold,Alpena,677.074725,679.980220,679.980220,0.0,False,False,False
1949,ff8f480b88d8cb7a6d77522c6130c3cfeb64e417,feb,2021,heat,hold,Walker,693.836364,700.000000,700.000000,0.0,False,False,False
1950,ffb40dac5e44e2063620351c2ca1b6b420a7de46,feb,2021,heat,hold,Northville,682.628571,688.219048,684.447619,15.0,True,False,False
1951,ffcbc545479ab87baa3bbe5a2f2fa7b1dd2f89e9,feb,2021,heat,hold,Grand Rapids,713.156812,717.722365,717.722365,60.0,False,False,False


In [67]:
MI_feb.to_csv("Scraper_Output/State_Month_Day/MI/MI_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/MI-day/2017-jun-day-MI.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,7e5a65810b77b9fb543bb14086f9a69ad25d3003,2017-06-03 19:45:00 UTC,auto,hold,718,715,665,MI,Ovid,110,False,False,False,Gas
2,ba12f750672118dc7abd84da561b4360919e0697,2017-06-06 11:55:00 UTC,heat,hold,777,806,752,MI,Ann Arbor,20,False,False,False,Gas
3,a4211d2e9b628fdf7cd1f79f7e8a389e5c8dd343,2017-06-06 15:30:00 UTC,auto,hold,730,770,665,MI,Rochester,20,True,False,False,Gas
4,56deda1284d4b68f6c5ac985b0844d9d1a47215d,2017-06-01 16:55:00 UTC,cool,hold,709,735,735,MI,Birmingham,69,True,False,False,Gas
5,ba12f750672118dc7abd84da561b4360919e0697,2017-06-06 15:30:00 UTC,heat,hold,762,806,752,MI,Ann Arbor,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
726403,5c647f748e60218c1c2d315e1937895feb9ea404,2017-06-24 14:30:00 UTC,cool,hold,793,760,760,MI,Grand Rapids,0,False,False,False,Gas
726404,2fbb8da18376c9e154eb4f1d2e2fe0f1b27094ef,2017-06-22 13:45:00 UTC,cool,hold,744,760,760,MI,Macomb,15,False,False,False,Gas
726405,ff1cfe6dffc630d53081b6347a085fcea22730b3,2017-06-16 17:50:00 UTC,cool,auto,720,740,760,MI,Oshtemo,0,False,False,False,Gas
726406,a34a14b392664047d17a773c35337c5d442d380e,2017-06-11 17:10:00 UTC,cool,hold,750,760,760,MI,Shelby Township,10,True,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/MI/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/MI-day/2018-jun-day-MI.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4f42eb9f3a4230bae518688a5980782835eb8fdb,2018-06-01 18:35:00 UTC,auto,hold,748,745,695,MI,Ypsilanti,20,False,False,False,Gas
2,26ec4d0c3ff57617be19098866924ff3ad8e9979,2018-06-30 14:30:00 UTC,cool,auto,735,730,657,MI,Sterling Heights,0,False,False,False,Gas
3,b9d957eac2648402cffa52e275d343ed710b118a,2018-06-23 12:30:00 UTC,cool,hold,696,697,697,MI,Southfield,50,False,False,False,Gas
4,4f42eb9f3a4230bae518688a5980782835eb8fdb,2018-06-15 16:20:00 UTC,auto,hold,749,745,695,MI,Ypsilanti,20,False,False,False,Gas
5,62735de1265516d82890f61012be7f74bbf83801,2018-06-26 19:30:00 UTC,cool,hold,721,725,725,MI,Sterling Heights,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1719924,a732e356c2cc248aca472c7365936b710125e147,2018-06-23 16:25:00 UTC,cool,hold,731,760,760,MI,Rochester Hills,30,False,False,False,Gas
1719925,a068a32b052abc51abcb251e727c210deec685e8,2018-06-30 13:00:00 UTC,cool,auto,755,760,760,MI,Brighton,30,False,False,False,Gas
1719926,24c55f3755923115b0ebe4079302e10f36efd488,2018-06-12 10:35:00 UTC,cool,hold,721,760,760,MI,Comstock Park,0,False,False,False,Gas
1719927,1783f2fbc9eea49c1e38f03c6d682e3d6925e9e5,2018-06-24 16:50:00 UTC,cool,hold,757,760,760,MI,Madison Heights,59,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/MI/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/MI-day/2019-jun-day-MI.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,40e76f0d5ec549bb7dbb29c053ff5a97e7b87c1c,2019-06-07 15:00:00 UTC,auto,hold,697,722,652,MI,Ypsilanti,47,True,False,False,Gas
1,1eb1e068d2fdf70b6d44e8a848212a09defda631,2019-06-24 11:50:00 UTC,auto,hold,709,715,625,MI,Dundee,15,True,False,False,Gas
3,e990556ad4ad23f37ec5a27e1f16aefb60ed5ae4,2019-06-30 17:00:00 UTC,cool,hold,758,760,739,MI,Allen Park,60,False,False,False,Gas
4,b9d957eac2648402cffa52e275d343ed710b118a,2019-06-02 18:30:00 UTC,cool,hold,685,712,712,MI,Southfield,50,False,False,False,Gas
6,156ca7488ad8cbdb7ec99c24344b286be8c05f5a,2019-06-18 13:30:00 UTC,auto,hold,721,762,712,MI,Grosse Pointe Woods,70,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2338100,3783c9b709d0120e2b422c480b97acac1e65f85e,2019-06-20 07:30:00 UTC,cool,auto,750,760,760,MI,Ypsilanti,0,False,False,False,Gas
2338101,62cbe15ef092681020627f14251edbf3627e0db2,2019-06-29 16:55:00 UTC,cool,hold,759,760,760,MI,Cannon,0,False,False,False,Gas
2338102,bb52ae269aac999a9936ef9929a3a8f5e7442c81,2019-06-22 13:15:00 UTC,cool,auto,722,760,760,MI,Dearborn,80,False,False,False,Gas
2338103,4f888e676196d9128332560f7ae445a10ffefb03,2019-06-29 16:10:00 UTC,cool,hold,765,760,760,MI,Highland Township,45,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/MI/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/MI-day/2020-jun-day-MI.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,de48f0ebe81ed69cb7c87c5234dc8877d0403108,2020-06-11 16:20:00 UTC,cool,auto,703,700,696,MI,Northville township,15,False,False,False,Gas
3,c7ae963671cc3a65b82816cebb02c662089a2346,2020-06-30 12:35:00 UTC,cool,hold,739,740,737,MI,Grand Rapids,40,True,False,False,Gas
4,f5446a363ed73ce30bf26f9fcaffae5ff0d078fd,2020-06-23 19:55:00 UTC,cool,auto,715,728,728,MI,Jenison,9,True,False,False,Gas
5,8b1919827fe0fa16117503bf80baeea54e7897bc,2020-06-29 14:50:00 UTC,auto,hold,770,781,721,MI,Carleton,40,False,False,False,Gas
6,10e61e397a5d9a68dc43147680cb834274e41872,2020-06-29 14:20:00 UTC,cool,hold,706,699,699,MI,Adrian township,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2626119,30d457c1e3c81ccb56cb260e4952e9ffe326c02b,2020-06-30 17:20:00 UTC,cool,auto,764,760,760,MI,Northville,70,False,False,False,Gas
2626120,74471457813fe06fe3580495b2c10b4132fd2f36,2020-06-27 12:45:00 UTC,cool,hold,754,760,760,MI,warren,50,True,False,False,Gas
2626121,f4d1664b5c9a3036ec5ab79b60beb80abc803b8d,2020-06-28 15:20:00 UTC,cool,hold,712,760,760,MI,Dexter,0,False,False,False,Gas
2626122,38ad425c93bdc72f7ca081eb9f2c0fd8b6312c2f,2020-06-07 16:55:00 UTC,cool,auto,747,780,760,MI,Troy,40,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/MI/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/MI-day/2021-jun-day-MI.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d06230874c226a7a28e69ab5e2855d370aa6a1f9,2021-06-09 13:10:00 UTC,cool,hold,743,745,745,MI,Wyoming,60,True,False,False,Gas
1,61a21e5e0d880f9fd2b08aef37181b8adf9ed07e,2021-06-19 14:30:00 UTC,cool,hold,772,788,788,MI,Troy,30,True,False,False,Gas
2,06cc09f5638c7cda9948d91b254a35ced3e57107,2021-06-19 12:55:00 UTC,auto,hold,715,715,655,MI,Vicksburg,40,False,False,False,Gas
4,6938a07e7d9347536a8ce830d534420c4f9cb336,2021-06-16 13:20:00 UTC,cool,hold,701,760,712,MI,Rochester Hills,0,True,False,False,Gas
5,359f508c3b635b3a57859f4d64cf15aeba02643b,2021-06-13 19:05:00 UTC,cool,hold,767,765,765,MI,Berrien Springs,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1694776,1468a9390c76481cee7c0873b33b0066d4934b1b,2021-06-09 18:20:00 UTC,cool,hold,757,760,760,MI,Ada,40,False,False,False,Gas
1694777,5fea367881bdaa941cc3926e1a3d0ff705133d97,2021-06-15 16:50:00 UTC,cool,hold,723,760,760,MI,Trenton,60,False,False,False,Gas
1694778,a6762d60a38f5b302825355eeb788b32e1b0fb99,2021-06-10 15:00:00 UTC,cool,hold,727,760,760,MI,Bloomfield Hills,50,False,False,False,Gas
1694779,b64b904a9f1f1aeb266010a30b6cb4b0bf7a9163,2021-06-25 13:50:00 UTC,cool,hold,753,760,760,MI,New Hudson,0,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/MI/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MI/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MI_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MI/jun/" + file)
    MI_jun = pd.concat([MI_jun, df])
    
MI_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00600479ce78c1cb4bfc08723e8887e839314ecc,jun,2017,auto,auto,hudsonville,702.877193,703.065789,629.802632,26.0,False,False,False
1,00600479ce78c1cb4bfc08723e8887e839314ecc,jun,2017,auto,hold,hudsonville,707.962500,717.575000,636.729167,26.0,False,False,False
2,00600479ce78c1cb4bfc08723e8887e839314ecc,jun,2017,cool,auto,hudsonville,719.682353,708.129412,653.058824,26.0,False,False,False
3,00600479ce78c1cb4bfc08723e8887e839314ecc,jun,2017,cool,hold,hudsonville,698.391304,697.847826,697.847826,26.0,False,False,False
4,00db9748e4d744f28a4c91a2efeb5b727f1a848b,jun,2017,cool,auto,Detroit,700.458333,780.000000,670.000000,120.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2616,ff88017cb252da4e0ea9e3a8e8448f633b98127d,jun,2021,auto,hold,Hudsonville,767.491803,757.131148,702.327869,5.0,False,False,False
2617,ff88017cb252da4e0ea9e3a8e8448f633b98127d,jun,2021,cool,hold,Hudsonville,777.083333,770.000000,770.000000,5.0,False,False,False
2618,ff8f480b88d8cb7a6d77522c6130c3cfeb64e417,jun,2021,cool,hold,Walker,731.517544,736.315789,736.315789,0.0,False,False,False
2619,ffb40dac5e44e2063620351c2ca1b6b420a7de46,jun,2021,cool,hold,Northville,719.800000,707.809524,701.380952,15.0,True,False,False


In [100]:
MI_jun.to_csv("Scraper_Output/State_Month_Day/MI/MI_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/MI-day/2017-jul-day-MI.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a4211d2e9b628fdf7cd1f79f7e8a389e5c8dd343,2017-07-28 19:25:00 UTC,auto,hold,748,788,617,MI,Rochester,20,True,False,False,Gas
1,0f87f890c5c669df4b4a04e82380734df3300185,2017-07-31 12:00:00 UTC,cool,hold,782,800,800,MI,Kalamazoo,65,False,False,False,Gas
3,a7d6dbfa4850174e618549312adfa15db2927c3e,2017-07-09 18:05:00 UTC,cool,hold,691,685,685,MI,Georgetown,0,False,False,False,Gas
4,f3741a55aa81ef51b0dfd5ec45f19052f8adaaff,2017-07-27 11:30:00 UTC,auto,hold,736,735,675,MI,romulus,40,False,False,False,Gas
5,8ddbd5841458f35ae49e5e9f695b381b9167092c,2017-07-31 15:30:00 UTC,cool,hold,703,762,762,MI,Grand Rapids,39,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
921807,1468a9390c76481cee7c0873b33b0066d4934b1b,2017-07-28 11:20:00 UTC,cool,hold,735,760,760,MI,Ada,40,False,False,False,Gas
921808,11b80dcee59194081a5969287531111a543cfec3,2017-07-01 11:05:00 UTC,cool,auto,742,740,760,MI,Ada,40,False,False,False,Gas
921809,11b80dcee59194081a5969287531111a543cfec3,2017-07-04 18:35:00 UTC,cool,hold,759,760,760,MI,Ada,40,False,False,False,Gas
921810,f7cec3db6d1969890038225e2b4d5cb14cbd14e0,2017-07-13 13:20:00 UTC,cool,hold,763,760,760,MI,Oak Park,87,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/MI/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/MI-day/2018-jul-day-MI.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d9189597d236296343ef627b142d05cdf679a116,2018-07-14 18:15:00 UTC,cool,hold,699,695,695,MI,Lansing,60,False,False,False,Gas
2,62cbe15ef092681020627f14251edbf3627e0db2,2018-07-23 17:15:00 UTC,cool,hold,744,745,745,MI,Cannon,0,False,False,False,Gas
3,56deda1284d4b68f6c5ac985b0844d9d1a47215d,2018-07-13 18:55:00 UTC,cool,hold,701,715,715,MI,Birmingham,69,True,False,False,Gas
4,4f42eb9f3a4230bae518688a5980782835eb8fdb,2018-07-31 13:40:00 UTC,auto,hold,749,745,645,MI,Ypsilanti,20,False,False,False,Gas
6,aa5d7549b1daeabb5ea00ce2b7f630655223bcc5,2018-07-07 14:55:00 UTC,auto,hold,617,675,625,MI,Howell township,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2094962,c37ef53a70ea7f24125863e9ce131b6d5256c67e,2018-07-07 18:35:00 UTC,cool,auto,759,760,760,MI,Northville,70,False,False,False,Gas
2094963,e8dccbc90d3b2406105f0053afa8c944f1a83227,2018-07-08 11:15:00 UTC,cool,hold,722,760,760,MI,Dearborn Heights,66,True,False,False,Gas
2094964,c009654fed8938c72f180913400901dc98900ea3,2018-07-17 17:30:00 UTC,cool,auto,746,760,760,MI,Waterford,65,False,False,False,Gas
2094965,38ad425c93bdc72f7ca081eb9f2c0fd8b6312c2f,2018-07-29 17:05:00 UTC,cool,auto,764,790,760,MI,Troy,40,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/MI/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/MI-day/2019-jul-day-MI.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,220afc4bb048df1839486e3734d214774ca5442f,2019-07-21 19:15:00 UTC,cool,auto,726,707,692,MI,Kalamazoo,5,False,False,False,Gas
1,f3741a55aa81ef51b0dfd5ec45f19052f8adaaff,2019-07-11 17:20:00 UTC,cool,hold,710,702,702,MI,romulus,40,False,False,False,Gas
2,b7784d1b7dcfba2d630a0cc2a70472b474cdfed3,2019-07-23 19:25:00 UTC,auto,hold,709,715,665,MI,Grawn,39,True,False,False,Gas
5,8025a4a9048595e3c40c6754126236ac3ae30c0c,2019-07-19 17:40:00 UTC,cool,hold,763,760,743,MI,Troy,39,True,False,False,Gas
6,77c6a6af0663014d661a100b37bd9d53f18ba61a,2019-07-03 19:20:00 UTC,cool,hold,738,757,757,MI,Grand Rapids,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3051505,0084e53f254c4a9c8c3cec4179a1cf2292643b6d,2019-07-26 13:10:00 UTC,cool,auto,732,760,760,MI,Clawson,69,True,False,False,Gas
3051506,acd7ad5210a22e5d2e851c4f6b12eb8c19d511e6,2019-07-18 15:50:00 UTC,cool,hold,756,760,760,MI,Trenton Area,40,True,False,False,Gas
3051507,fe1e1f12bdd5da20e797de74ba2b057fcb230520,2019-07-08 12:50:00 UTC,cool,auto,743,759,760,MI,Sterling Heights,20,False,False,False,Gas
3051508,8e23d5622bf822b585acefa95f9f31ac95e174df,2019-07-06 18:20:00 UTC,cool,hold,765,760,760,MI,Warren,60,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/MI/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/MI-day/2020-jul-day-MI.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8480444f9cfb9e47a2b0545e57613ec21060e38e,2020-07-28 09:50:00 UTC,cool,hold,738,731,681,MI,Livonia,30,False,False,False,Gas
1,448a89d47a9fc16cf4cdb1e13b9722173efe43e5,2020-07-01 12:30:00 UTC,cool,auto,725,722,654,MI,Flat Rock,20,True,False,False,Gas
2,ff1cfe6dffc630d53081b6347a085fcea22730b3,2020-07-15 19:35:00 UTC,cool,auto,734,770,754,MI,Oshtemo,0,False,False,False,Gas
3,d96d5baea729a26b9e84ee96f27c2ecbb95b07cf,2020-07-24 13:00:00 UTC,cool,hold,720,739,739,MI,Warren,50,True,False,False,Gas
4,f7efda2a7187607d2cd6ed99aa706438677bec63,2020-07-23 11:25:00 UTC,auto,hold,751,731,681,MI,Redford Township,55,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3072932,465e9821a6b1744ac6e7af3673433c5fe3a7c67e,2020-07-30 18:30:00 UTC,cool,auto,806,830,760,MI,Wayland,5,False,False,False,Gas
3072933,9b8f7b7659cdc06623f0f8ca9efdd4e0409ecf1f,2020-07-27 14:10:00 UTC,cool,auto,736,760,760,MI,Livonia,57,False,False,False,Gas
3072934,8b503d13443d08e65b5bb10a970d6e6d50621509,2020-07-29 12:40:00 UTC,cool,hold,713,760,760,MI,Grand Rapids,9,True,False,False,Gas
3072935,33995573d71698b59e0214b3b03cbb97ece658f5,2020-07-03 17:35:00 UTC,cool,hold,758,760,760,MI,Grosse Pointe Park,90,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/MI/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/MI-day/2021-jul-day-MI.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,dae3e6d6e7f732b2cef1e46fbbca7d64cd0989c4,2021-07-19 16:40:00 UTC,auto,hold,757,762,642,MI,Houghton Lake,40,False,False,False,Gas
3,210965fd4135d5c08122575de67df30f83485923,2021-07-29 14:25:00 UTC,auto,hold,682,685,635,MI,Ann Arbor,0,True,False,False,Gas
4,4765fb83b39e93472eeb91357e6bee9b94446278,2021-07-05 10:35:00 UTC,cool,hold,760,760,751,MI,Garden City,69,True,False,False,Gas
7,aec1f33983924632569a925fe8c996cd715b97bd,2021-07-12 16:10:00 UTC,cool,hold,717,738,738,MI,West Bloomfield,40,False,False,False,Gas
8,93370d47c9e26a1aaf92fe5e1dce059d15181602,2021-07-28 19:05:00 UTC,auto,hold,717,715,625,MI,Petersburg,18,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1721333,0f18b0477377db78240634a572ec161057548dee,2021-07-25 13:10:00 UTC,cool,hold,757,760,760,MI,Midland,30,False,False,False,Gas
1721334,244d9fa9aabf02063d5b62ec7cb75af767570882,2021-07-27 11:20:00 UTC,cool,hold,747,760,760,MI,Lawton,20,False,False,False,Gas
1721335,7a87a366dd788cd022edf011035c123fb89b698a,2021-07-07 17:50:00 UTC,cool,hold,764,760,760,MI,Grand Rapids,90,False,False,False,Gas
1721336,112ac04b07ee16dbf756b204f61472e91bd2bbf5,2021-07-29 16:15:00 UTC,cool,hold,755,760,760,MI,Ann Arbor,0,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/MI/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MI/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MI_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MI/jul/" + file)
    MI_jul = pd.concat([MI_jul, df])
    
MI_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00600479ce78c1cb4bfc08723e8887e839314ecc,jul,2017,cool,auto,hudsonville,694.842213,693.247951,694.774590,26.0,False,False,False
1,00600479ce78c1cb4bfc08723e8887e839314ecc,jul,2017,cool,hold,hudsonville,700.793333,697.866667,697.866667,26.0,False,False,False
2,00db9748e4d744f28a4c91a2efeb5b727f1a848b,jul,2017,cool,auto,Detroit,743.663366,779.564356,670.000000,120.0,False,False,False
3,00db9748e4d744f28a4c91a2efeb5b727f1a848b,jul,2017,cool,hold,Detroit,732.151515,728.040404,723.878788,120.0,False,False,False
4,02264938df7633eea3cb607151fc3ea38bdd0aa8,jul,2017,cool,hold,Dearborn,747.151515,747.340909,747.022727,75.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2193,ff88017cb252da4e0ea9e3a8e8448f633b98127d,jul,2021,auto,hold,Hudsonville,761.666667,750.000000,720.000000,5.0,False,False,False
2194,ff8f480b88d8cb7a6d77522c6130c3cfeb64e417,jul,2021,cool,hold,Walker,705.769784,701.985612,701.726619,0.0,False,False,False
2195,ffb40dac5e44e2063620351c2ca1b6b420a7de46,jul,2021,cool,hold,Northville,704.468085,692.531915,691.132979,15.0,True,False,False
2196,ffcb8d62daf5ec1d36f529d945707637f77d313b,jul,2021,auto,hold,Harbor Springs,737.536036,738.734234,678.918919,15.0,False,False,False


In [133]:
MI_jul.to_csv("Scraper_Output/State_Month_Day/MI/MI_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/MI-day/2017-aug-day-MI.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,85a33e63f22ad13072b976d9edcc0b297bfaac5c,2017-08-18 15:35:00 UTC,auto,hold,695,695,645,MI,Lyon,0,False,False,False,Gas
1,fba1c47e53560ed63bf052289a873cadee74eb92,2017-08-17 11:20:00 UTC,cool,hold,731,735,735,MI,Hartland,45,False,False,False,Gas
3,a285b2c4d7f889095661003fcfeba3b2edeec697,2017-08-02 18:00:00 UTC,auto,hold,741,775,615,MI,Waterford Township,37,False,False,False,Gas
4,78af5c64d258ffc2d3dfc53a5d4719f70ffb888a,2017-08-27 19:35:00 UTC,cool,hold,704,695,695,MI,Thomas,60,False,False,False,Gas
5,8165178c41d27d2564b34a87aa2152d48f703db1,2017-08-10 16:55:00 UTC,cool,hold,742,743,743,MI,Royal Oak,5,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
917710,f445f5bc6411a49666397a60f77f5d6f56def4a5,2017-08-22 12:15:00 UTC,cool,hold,760,760,760,MI,Grand Rapids,20,False,False,False,Gas
917711,501efb738a1a5de796b1b7a04acf5d9e52a2f399,2017-08-26 14:05:00 UTC,cool,hold,705,760,760,MI,Cedar Springs,7,False,False,False,Gas
917712,0b4a5314ece6ec138cda3542410399bec551f25a,2017-08-21 12:55:00 UTC,cool,hold,725,760,760,MI,Wyoming,0,False,False,False,Gas
917713,f19031d1488425af8b3f593136d31622832206ba,2017-08-17 11:05:00 UTC,cool,hold,741,760,760,MI,Bingham,20,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/MI/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/MI-day/2018-aug-day-MI.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d9ec71c8086759c7e1fb3dab3e49bd318dff9e42,2018-08-27 12:55:00 UTC,cool,hold,691,685,685,MI,Plymouth,0,False,False,False,Gas
1,ba12f750672118dc7abd84da561b4360919e0697,2018-08-16 14:30:00 UTC,heat,hold,801,788,788,MI,Ann Arbor,20,False,False,False,Gas
4,bd76c90318e35fb747ab6b0f3558baccd5dc32f2,2018-08-18 09:50:00 UTC,cool,hold,742,765,705,MI,Ada,0,False,False,False,Gas
5,b3784461b992f775be4551c5a721a03a7ad30583,2018-08-12 12:55:00 UTC,auto,hold,720,735,655,MI,Byron Center,5,False,False,False,Gas
6,582d580817e19a836ef0177deae833ba57725ffd,2018-08-25 15:05:00 UTC,auto,hold,712,708,658,MI,Kentwood,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2177672,e990556ad4ad23f37ec5a27e1f16aefb60ed5ae4,2018-08-18 15:25:00 UTC,cool,auto,757,760,760,MI,Allen Park,60,False,False,False,Gas
2177673,f767c803b38a72361e7557fc0cb19bc8c8e00105,2018-08-01 14:15:00 UTC,cool,hold,766,760,760,MI,Elmwood,50,False,False,False,Gas
2177674,baacb6edae9fefeb4d84116b893edc9773c719e0,2018-08-12 12:35:00 UTC,cool,hold,704,760,760,MI,Hartland,30,False,False,False,Gas
2177675,b5f99434fb8f0236de1066eba0231cf094af394e,2018-08-20 19:45:00 UTC,cool,hold,757,760,760,MI,Grand Rapids,0,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/MI/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/MI-day/2019-aug-day-MI.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,75e624a731bbb495aad176d3e77e2cdf5e8b73ed,2019-08-13 15:55:00 UTC,auto,hold,767,765,675,MI,Oak Park,60,False,False,False,Gas
2,e640eb72ae373cba63cadf24f48291f799dbfd5c,2019-08-10 08:30:00 UTC,cool,hold,660,650,657,MI,Grand Rapids MI,20,False,False,False,Gas
3,fc9e0f4cdbf97e2ff322d8cdd20bfb8f5b730dfa,2019-08-04 16:25:00 UTC,cool,hold,703,721,721,MI,Grand Rapids,10,False,False,False,Gas
4,3101f961b5c67c33485a2b6e45d8b59c8ee7ee88,2019-08-31 12:55:00 UTC,cool,hold,720,742,742,MI,New Baltimore,20,False,False,False,Gas
6,8a970eb552215627cc000fb4eb3e8e2280292d37,2019-08-01 16:00:00 UTC,auto,hold,694,701,641,MI,Harrison Township,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2733327,44ad2bac629f948fc401ca9edd775c352c625f8c,2019-08-11 19:40:00 UTC,cool,hold,766,760,760,MI,Grand Rapids,10,False,False,False,Gas
2733328,a78d52e1d90bbaa71bff1a11790e19965c80f27a,2019-08-19 12:50:00 UTC,cool,hold,761,760,760,MI,Jenison,30,False,False,False,Gas
2733329,ad2a5f5374b1656db2489be7e8bd0a86e9a1ee11,2019-08-22 14:35:00 UTC,cool,hold,710,760,760,MI,Wyoming,20,False,False,False,Gas
2733330,7e6efdb1e3af44d91944cad67b45e153e99cc017,2019-08-19 15:00:00 UTC,cool,auto,755,760,760,MI,Dearborn,80,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/MI/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/MI-day/2020-aug-day-MI.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f5446a363ed73ce30bf26f9fcaffae5ff0d078fd,2020-08-06 16:50:00 UTC,cool,auto,735,754,754,MI,Jenison,9,True,False,False,Gas
1,fd202e1c334977b9f0a4415c99c1eb7fc96d966b,2020-08-21 13:05:00 UTC,cool,auto,707,800,721,MI,Ada,10,False,False,False,Gas
2,f7efda2a7187607d2cd6ed99aa706438677bec63,2020-08-09 15:55:00 UTC,cool,auto,766,780,752,MI,Redford Township,55,True,False,False,Gas
3,fd202e1c334977b9f0a4415c99c1eb7fc96d966b,2020-08-05 15:20:00 UTC,cool,auto,704,830,721,MI,Ada,10,False,False,False,Gas
4,9f358ee0e11a22350785ab6e22c690d5eef3a06f,2020-08-10 11:00:00 UTC,cool,hold,754,752,752,MI,Flushing,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2718860,81d17e4a34f0adcb4bef888be5be810e7e92d224,2020-08-22 15:45:00 UTC,cool,hold,753,760,760,MI,Wyoming,40,True,False,False,Gas
2718861,62e3a1f2d52900c1126892e6e1e2002d2880f7d5,2020-08-14 17:55:00 UTC,cool,auto,767,760,760,MI,Oakland,30,False,False,False,Gas
2718862,114832ad4b720cbc8ca541dfc2016a9d9b356272,2020-08-25 17:30:00 UTC,cool,hold,765,760,760,MI,Huntington Woods,0,False,False,False,Gas
2718863,0b4a5314ece6ec138cda3542410399bec551f25a,2020-08-30 18:20:00 UTC,cool,hold,727,760,760,MI,Wyoming,0,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/MI/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MI/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MI_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MI/aug/" + file)
    MI_aug = pd.concat([MI_aug, df])
    
MI_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00600479ce78c1cb4bfc08723e8887e839314ecc,aug,2017,auto,auto,hudsonville,708.625000,680.000000,630.000000,26.0,False,False,False
1,00600479ce78c1cb4bfc08723e8887e839314ecc,aug,2017,cool,auto,hudsonville,692.083333,670.000000,680.000000,26.0,False,False,False
2,00600479ce78c1cb4bfc08723e8887e839314ecc,aug,2017,cool,hold,hudsonville,686.888889,675.000000,675.000000,26.0,False,False,False
3,01702af22d0cb064c81f7d18afde95cff8df32b2,aug,2017,auto,auto,South Lyon,764.013605,796.176871,746.122449,5.0,False,False,False
4,01702af22d0cb064c81f7d18afde95cff8df32b2,aug,2017,auto,hold,South Lyon,746.287671,764.794521,702.691781,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3814,ffcb8d62daf5ec1d36f529d945707637f77d313b,aug,2020,auto,auto,Harbor Springs,742.481481,743.437037,690.000000,15.0,False,False,False
3815,ffcbc545479ab87baa3bbe5a2f2fa7b1dd2f89e9,aug,2020,cool,auto,Grand Rapids,746.720379,779.369668,737.545024,60.0,False,False,False
3816,ffcbc545479ab87baa3bbe5a2f2fa7b1dd2f89e9,aug,2020,cool,hold,Grand Rapids,745.985075,742.753731,742.753731,60.0,False,False,False
3817,ffd1ec21cc18ac0f56b8d450684031791e00ad9d,aug,2020,cool,auto,Canton,703.662382,700.000000,700.000000,0.0,True,False,False


In [160]:
MI_aug.to_csv("Scraper_Output/State_Month_Day/MI/MI_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/MI-day/2017-dec-day-MI.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1c4195c504c866ee319a0957de7b34cbc1664ac8,2017-12-29 19:05:00 UTC,heat,hold,693,695,695,MI,Macomb,17,False,False,False,Gas
1,c61a894a54074d077844fbcacec3b8ef8c9cb10b,2017-12-26 13:00:00 UTC,auto,hold,734,785,735,MI,Livonia,67,True,False,False,Gas
2,c61a894a54074d077844fbcacec3b8ef8c9cb10b,2017-12-06 13:35:00 UTC,auto,hold,750,775,715,MI,Livonia,67,True,False,False,Gas
3,03ef35a1aeba7059dc8dad8dc6475c3b111d97ee,2017-12-19 14:50:00 UTC,heat,auto,721,737,737,MI,Oxford,15,False,False,False,Gas
4,8fa1754b6d8043887f8fbfe02490050cb1290d16,2017-12-09 16:25:00 UTC,heat,hold,681,655,655,MI,Hudsonville,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1414971,a39d10eedf20298e10f47311c22554b509caef5a,2017-12-08 13:35:00 UTC,auto,hold,693,760,700,MI,Addison Township,30,False,False,False,Gas
1414972,f141c7f03607b77bd51da0ca53b03abfa8b4d5df,2017-12-31 15:35:00 UTC,auto,hold,658,760,650,MI,Canton,15,False,False,False,Gas
1414973,6fb25d113e810e31f287bd15e2ec76a9d7eaf27e,2017-12-06 18:55:00 UTC,heat,auto,713,760,710,MI,,0,True,False,False,Gas
1414974,6fb25d113e810e31f287bd15e2ec76a9d7eaf27e,2017-12-31 14:45:00 UTC,heat,auto,702,760,710,MI,,0,True,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/MI/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/MI-day/2018-dec-day-MI.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,caacb22137823a48c3554c038d1614fd2587ca74,2018-12-28 13:45:00 UTC,heat,auto,637,738,644,MI,Grand Rapids,68,False,False,False,Gas
1,5284b11f3af6ebeb331d19448109bea5bfa7c404,2018-12-28 11:50:00 UTC,heat,auto,641,630,630,MI,Brighton township,5,False,False,False,Gas
2,50e54cee9aa115584bc0799fe54fddb45983f054,2018-12-21 13:30:00 UTC,heat,auto,656,840,630,MI,Novi,5,False,False,False,Gas
4,68710e0e823e795f1f2a65fc6d2902268ca19e17,2018-12-19 17:35:00 UTC,heat,hold,689,684,684,MI,Saginaw,0,True,False,False,Gas
5,fa7f070c88296c5ce801ddccb2774dcb617364da,2018-12-21 14:05:00 UTC,heat,hold,673,675,675,MI,Traverse City,45,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2457183,2654b57012487133083e84d9f64ae23299c7c333,2018-12-22 10:50:00 UTC,heat,auto,752,760,750,MI,Sterling Heights,0,False,False,False,Gas
2457184,e62b1be593168f06b66a0ca6cff664a226fd94c3,2018-12-14 17:30:00 UTC,auto,auto,684,760,640,MI,Riverview,0,False,False,False,Gas
2457185,d59959495532c055b437600e1ee40d7b50513053,2018-12-05 14:10:00 UTC,auto,auto,690,760,690,MI,Kalamazoo,30,False,False,False,Gas
2457186,8f332eaa5c70adb82193c0bd911a966f1c4b4911,2018-12-24 11:35:00 UTC,heat,auto,676,760,680,MI,Ypsilanti Township,45,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/MI/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/MI-day/2019-dec-day-MI.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,12e8e54b801604e1fecdaeec6ff90ddf26932f90,2019-12-02 17:35:00 UTC,auto,auto,730,840,730,MI,Grosse Pointe Woods,80,False,False,False,Gas
1,3121086142ddf80ddcb666fb3a64d242b5101c59,2019-12-29 14:55:00 UTC,heat,hold,681,685,685,MI,Lake Orion,20,False,False,False,Gas
2,c5e9cde7a4d9871bc69adf9b44bfd9e709a4d9ab,2019-12-04 17:50:00 UTC,auto,auto,698,840,700,MI,Washington,30,False,False,False,Gas
3,40332b343ba060ef151cce414397141b5ec0bf10,2019-12-03 19:20:00 UTC,heat,hold,715,716,716,MI,Wyoming,7,False,False,False,Gas
4,f3abacb55285d371704bafd9518a702abfd59076,2019-12-15 10:45:00 UTC,heat,auto,722,810,730,MI,Shelby Township,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2889597,09c1b102fde49c22cfc359b090c906e2832481ce,2019-12-13 15:00:00 UTC,auto,hold,674,760,680,MI,Southgate,60,False,False,False,Gas
2889598,5e07058774553efea36ff6ab6546f886ee65e824,2019-12-30 12:10:00 UTC,heat,hold,768,760,760,MI,White Cloud,40,False,False,False,Gas
2889599,080e61a68e9eac0d8a14d7342e6718635b15d81a,2019-12-31 19:05:00 UTC,auto,hold,708,760,710,MI,Hudsonville,0,False,False,False,Gas
2889600,b64b904a9f1f1aeb266010a30b6cb4b0bf7a9163,2019-12-06 16:45:00 UTC,heat,auto,756,760,760,MI,New Hudson,0,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/MI/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/MI-day/2020-dec-day-MI.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,210965fd4135d5c08122575de67df30f83485923,2020-12-03 19:05:00 UTC,auto,hold,691,752,692,MI,Ann Arbor,0,True,False,False,Gas
1,0cb9217b4fd591d1312abce19668626ff2538dee,2020-12-09 18:45:00 UTC,heat,hold,731,727,727,MI,New Hudson,5,False,False,False,Gas
2,7769268b73d63dcbfa447867fb0790190d0b5810,2020-12-20 18:15:00 UTC,auto,hold,724,775,725,MI,Royal Oak,79,False,False,False,Gas
3,910d57a2753bb5bd7c26ace2d78539bb1627985d,2020-12-04 13:55:00 UTC,heat,hold,670,675,675,MI,Flat Rock Area,25,False,False,False,Gas
4,62188f6092cf33be122b3cad17c2859f282330eb,2020-12-06 18:40:00 UTC,auto,hold,735,830,780,MI,Charlotte,88,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2503803,997ca47e040024abe24b42d3a7cb889c7080834c,2020-12-08 18:55:00 UTC,auto,hold,696,765,695,MI,Southgate,0,True,False,False,Gas
2503804,3704ca014cc30ac767eb11ce477ecd6c8c273abc,2020-12-16 13:45:00 UTC,auto,hold,698,765,700,MI,Saint Clair Shores,25,False,False,False,Gas
2503805,3704ca014cc30ac767eb11ce477ecd6c8c273abc,2020-12-04 17:15:00 UTC,auto,hold,717,765,715,MI,Saint Clair Shores,25,False,False,False,Gas
2503806,df5ed9b35ea434b2d40894c51ef1c21447b22056,2020-12-07 19:20:00 UTC,auto,hold,710,765,715,MI,Traverse City,30,True,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/MI/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MI/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MI_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MI/dec/" + file)
    MI_dec = pd.concat([MI_dec, df])
    
MI_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,001d65da87242dc0b8bb7013ec8efe01c813e750,dec,2017,heat,auto,Lake Orion,670.480769,694.086538,673.990385,57.0,False,False,False
1,001d65da87242dc0b8bb7013ec8efe01c813e750,dec,2017,heat,hold,Lake Orion,682.444444,684.777778,684.000000,57.0,False,False,False
2,005ed661c2e61c794259a9ea59fa635198f73779,dec,2017,heat,hold,Gerrish Township,671.733877,676.360638,675.755004,0.0,False,False,False
3,00600479ce78c1cb4bfc08723e8887e839314ecc,dec,2017,auto,auto,hudsonville,697.177945,781.604010,697.333333,26.0,False,False,False
4,0118fe69328c51c2015c2ad424735592d918ed94,dec,2017,heat,hold,Detroit,691.731844,695.458101,694.441341,100.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3515,ffb40dac5e44e2063620351c2ca1b6b420a7de46,dec,2020,heat,hold,Northville,683.914000,689.280000,688.120000,15.0,True,False,False
3516,ffcb8d62daf5ec1d36f529d945707637f77d313b,dec,2020,heat,hold,Harbor Springs,678.926829,680.329268,679.987805,15.0,False,False,False
3517,ffcbc545479ab87baa3bbe5a2f2fa7b1dd2f89e9,dec,2020,heat,auto,Grand Rapids,703.583333,709.283333,709.283333,60.0,False,False,False
3518,ffcbc545479ab87baa3bbe5a2f2fa7b1dd2f89e9,dec,2020,heat,hold,Grand Rapids,694.827273,692.000000,692.000000,60.0,False,False,False


In [187]:
MI_dec.to_csv("Scraper_Output/State_Month_Day/MI/MI_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/MI/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MI_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/MI/" + file)
    MI_all = pd.concat([MI_all, df])
    
MI_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00600479ce78c1cb4bfc08723e8887e839314ecc,aug,2017,auto,auto,hudsonville,708.625000,680.000000,630.000000,26.0,False,False,False
1,00600479ce78c1cb4bfc08723e8887e839314ecc,aug,2017,cool,auto,hudsonville,692.083333,670.000000,680.000000,26.0,False,False,False
2,00600479ce78c1cb4bfc08723e8887e839314ecc,aug,2017,cool,hold,hudsonville,686.888889,675.000000,675.000000,26.0,False,False,False
3,01702af22d0cb064c81f7d18afde95cff8df32b2,aug,2017,auto,auto,South Lyon,764.013605,796.176871,746.122449,5.0,False,False,False
4,01702af22d0cb064c81f7d18afde95cff8df32b2,aug,2017,auto,hold,South Lyon,746.287671,764.794521,702.691781,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16490,ff88017cb252da4e0ea9e3a8e8448f633b98127d,jun,2021,auto,hold,Hudsonville,767.491803,757.131148,702.327869,5.0,False,False,False
16491,ff88017cb252da4e0ea9e3a8e8448f633b98127d,jun,2021,cool,hold,Hudsonville,777.083333,770.000000,770.000000,5.0,False,False,False
16492,ff8f480b88d8cb7a6d77522c6130c3cfeb64e417,jun,2021,cool,hold,Walker,731.517544,736.315789,736.315789,0.0,False,False,False
16493,ffb40dac5e44e2063620351c2ca1b6b420a7de46,jun,2021,cool,hold,Northville,719.800000,707.809524,701.380952,15.0,True,False,False


In [190]:
MI_all.to_csv("Scraper_Output/State_Month_Day/MI_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mMIe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['MI']
Unique jan_2018: ['MI']
Unique jan_2019: ['MI']
Unique jan_2020: ['MI']
Unique jan_2021: ['MI']
Unique feb_2017: ['MI']
Unique feb_2018: ['MI']
Unique feb_2019: ['MI']
Unique feb_2020: ['MI']
Unique feb_2021: ['MI']
Unique jun_2017: ['MI']
Unique jun_2018: ['MI']
Unique jun_2019: ['MI']
Unique jun_2020: ['MI']
Unique jun_2021: ['MI']
Unique jul_2017: ['MI']
Unique jul_2018: ['MI']
Unique jul_2019: ['MI']
Unique jul_2020: ['MI']
Unique jul_2021: ['MI']
Unique aug_2017: ['MI']
Unique aug_2018: ['MI']
Unique aug_2019: ['MI']
Unique aug_2020: ['MI']
Unique dec_2017: ['MI']
Unique dec_2018: ['MI']
Unique dec_2019: ['MI']
Unique dec_2020: ['MI']
