# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/IL-day/2017-jan-day-IL.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8782a219c6b750ac84a31b2113f75ed3eab5a0a4,2017-01-04 11:25:00 UTC,auto,auto,718,765,715,IL,Gurnee,35,False,False,False,Gas
1,e44bf90c04b68320279d555643d8b525918a43c1,2017-01-20 17:35:00 UTC,heat,hold,690,691,691,IL,Chicago,60,False,False,False,Gas
2,c3cd16178379c210bd60f6365499326440668e91,2017-01-02 19:10:00 UTC,heat,hold,718,709,709,IL,Algonquin,20,False,False,False,Gas
3,83468ce3fccb3920cf3e4f1c86f5310313476275,2017-01-17 15:25:00 UTC,auto,auto,733,791,741,IL,Addison,0,False,False,False,Gas
4,e44bf90c04b68320279d555643d8b525918a43c1,2017-01-23 16:50:00 UTC,heat,hold,687,691,691,IL,Chicago,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
946992,878bbd036b547c40e032c99d6ad05a0f7d56afde,2017-01-17 16:00:00 UTC,heat,auto,758,760,760,IL,Chicago,10,False,False,False,Gas
946993,e35687bfbdf0ecc9d45b26ae7bf231bdb80dc97a,2017-01-08 18:20:00 UTC,heat,auto,666,760,670,IL,Bloomington,25,False,False,False,Gas
946994,36b54cb4cb45212a4681284b03abf4570662d876,2017-01-18 10:10:00 UTC,heat,auto,750,760,760,IL,Chicago,0,False,False,False,Gas
946995,0c089cbd03ce349580a1d82ff5537dd9160b6f64,2017-01-08 17:05:00 UTC,auto,auto,691,760,690,IL,Champaign,0,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0010ea447dc3807b33daad2477d1cc02a71571fa,Jan,2017,auto,auto,Chicago,698.972222,770.000000,700.000000,5.0,False,False,False
0010ea447dc3807b33daad2477d1cc02a71571fa,Jan,2017,auto,hold,Chicago,707.190476,770.000000,708.571429,5.0,False,False,False
00726ed8e6a1f4f9bd85c248752de358dc529426,Jan,2017,auto,auto,Chicago,637.527778,820.000000,620.000000,100.0,False,False,False
00ab82671ae4437af38aaacbab99cc76c30a8568,Jan,2017,auto,auto,Troy,698.419375,780.000000,700.000000,35.0,False,False,False
00ab82671ae4437af38aaacbab99cc76c30a8568,Jan,2017,heat,auto,Troy,689.633333,708.000000,691.000000,35.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fee0c3a994497357a0cee4fa4617cda25ec1d8de,Jan,2017,auto,hold,Hoffman Estates,645.333333,730.000000,680.000000,40.0,False,False,False
ff55378debba06993749cdcb2c444de0a1de2479,Jan,2017,auto,auto,Vernon Hills,645.000000,797.000000,655.000000,30.0,False,False,False
ff7df2b69d038dfeba3de02a403664f1460aba96,Jan,2017,heat,auto,Chicago,706.642857,710.000000,710.000000,95.0,False,False,False
ff9c4b83c127c974334826057c0eca1ad0bf5448,Jan,2017,heat,auto,Hanover Park,707.271955,671.722380,671.722380,35.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/IL/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/IL-day/2018-jan-day-IL.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d89780a5c83043d64599896b93bdbf94ee31c16a,2018-01-13 19:50:00 UTC,heat,hold,713,705,705,IL,Des Plaines,20,False,False,False,Gas
1,4a959d597285199239b6d7f5427c03ab4c441933,2018-01-25 13:30:00 UTC,heat,hold,673,675,675,IL,Deerfield,0,False,False,False,Gas
2,b8407f145e5a7904fcfbaf0018c57e44b5509552,2018-01-24 18:45:00 UTC,heat,hold,730,729,729,IL,Darien,30,False,False,False,Gas
5,1bc710491d09a5af667f546e8581ac39240ef884,2018-01-20 16:35:00 UTC,auto,hold,749,840,730,IL,chicago,10,False,False,False,Gas
6,e063ba03fda205e19ddd0a9114836a586f1bee5d,2018-01-10 15:35:00 UTC,auto,hold,680,830,680,IL,Bolingbrook,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2643117,75ba8363612804cf52ba1dbff9b25bf52ad1f98b,2018-01-27 17:25:00 UTC,auto,hold,690,765,675,IL,Gurnee,35,False,False,False,Gas
2643118,75ba8363612804cf52ba1dbff9b25bf52ad1f98b,2018-01-23 14:20:00 UTC,auto,auto,711,765,715,IL,Gurnee,35,False,False,False,Gas
2643119,590f51ca3f5caf0589d1d7dc1f2dde23e3a9995b,2018-01-05 12:30:00 UTC,auto,hold,678,765,695,IL,Montgomery,5,True,False,False,Gas
2643120,9359c750ee5e504f7b014e9f88da45656eb68c12,2018-01-03 18:35:00 UTC,auto,hold,716,765,715,IL,Chicago,9,True,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/IL/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/IL-day/2019-jan-day-IL.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,67ba845d02650285ef1f84238c754d79d850f54f,2019-01-03 19:45:00 UTC,heat,hold,641,650,640,IL,Wilmette,115,False,False,False,Gas
1,42d2e856b058ab2afa1d769ccddf26901cc0798e,2019-01-26 18:55:00 UTC,heat,auto,688,720,690,IL,Barrington,50,False,False,False,Gas
2,94a582b4f7c4a62c61c58d37dcd8cadd44cde39f,2019-01-07 12:55:00 UTC,heat,hold,687,690,690,IL,Peoria,60,False,False,False,Gas
3,8f83cd22a12c04b48296f37c01a3f8a0fbcc0feb,2019-01-14 16:35:00 UTC,auto,auto,748,810,750,IL,Naperville,20,False,False,False,Gas
4,1fe528b3c52d229ccb90f237bb3ca379a1354fa2,2019-01-18 16:05:00 UTC,heat,hold,721,720,720,IL,Lincolnwood,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4127869,43a47491018bd78a91bc5db60d8e16b0c8ed6601,2019-01-30 14:35:00 UTC,heat,auto,710,720,710,IL,Schaumburg,35,False,False,False,Gas
4127870,61e3025aaf7fec9374b2f1af1571440b32cd4867,2019-01-12 14:40:00 UTC,auto,auto,638,800,640,IL,Roscoe,10,False,False,False,Gas
4127871,c4ab7635072e027975ad164bb84ab879ab292352,2019-01-18 12:50:00 UTC,heat,auto,690,820,700,IL,Orland Park,25,True,False,False,Gas
4127872,f1a486660a1a8aa1c3a61deccf092ccd4089dfd0,2019-01-01 16:10:00 UTC,heat,auto,725,720,720,IL,Brookfield,80,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/IL/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/IL-day/2020-jan-day-IL.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,df07ec72285cb3e29b4f80bc1e573c0dd4e21fda,2020-01-06 08:20:00 UTC,heat,auto,730,730,730,IL,Chicago,0,False,False,False,Gas
1,baeeb7d86514c5a0d1c8d1991861986428d2b908,2020-01-08 17:15:00 UTC,auto,hold,692,770,690,IL,Hanover Park,50,False,False,False,Gas
2,db8f6884aa79663a55414ca6c7ed43c2f1ae925c,2020-01-20 18:30:00 UTC,auto,auto,682,740,690,IL,New Lenox,70,False,False,False,Gas
3,e2fb73e174bc810e56b466b96b56e6b27c51ec29,2020-01-22 15:15:00 UTC,heat,auto,683,690,690,IL,Chicago,0,False,False,False,Gas
4,04d864a2de1141617be25acf17e45229845f86ad,2020-01-01 13:00:00 UTC,heat,hold,718,720,720,IL,Chicago,100,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4327169,e44ea136ea7ebe9e3e7304e5d042e2d5fb4fff2a,2020-01-30 14:40:00 UTC,heat,hold,685,700,700,IL,Bartlett,30,False,False,False,Gas
4327170,8aade8ed079757e77fc0332cbec2413eedb502bc,2020-01-05 18:30:00 UTC,heat,hold,649,650,650,IL,Parkersburg,0,False,False,False,Gas
4327171,9947f17e13ac084b4fbafe16597156d094b30a41,2020-01-05 17:55:00 UTC,heat,auto,716,760,720,IL,Chicago,0,False,False,False,Gas
4327172,467dcb359d36ae233794f820d962abf0faa3a609,2020-01-21 15:00:00 UTC,heat,hold,704,710,710,IL,Chicago,55,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/IL/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/IL-day/2021-jan-day-IL.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,09980458ebb703491b71b9cdf91b17de1a2d355c,2021-01-07 17:20:00 UTC,heat,hold,695,707,707,IL,Montgomery,20,True,False,False,Gas
1,51e57900e45cc27d40d3b2a7c7889f1d4093d5bf,2021-01-16 19:25:00 UTC,heat,hold,636,640,640,IL,Rockford,70,True,False,False,Gas
2,ca4c716f1eef8fb8780713828fada9db3fb4f20e,2021-01-25 17:55:00 UTC,heat,hold,682,694,690,IL,Chicago,66,False,False,False,Gas
3,9f42f9a054e8bd0d6f49f2ef8b3e76c197227899,2021-01-31 14:50:00 UTC,auto,hold,709,797,707,IL,Chicago,117,False,False,False,Gas
4,2eab44d90abec3929e47332c78fa941873890d76,2021-01-28 08:40:00 UTC,heat,hold,702,687,687,IL,Buffalo Grove,48,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2580134,b4344a395a132a8051b62e463399faa0529cb192,2021-01-04 19:20:00 UTC,heat,hold,758,760,760,IL,Rockford,80,False,False,False,Gas
2580135,85d465aee3b3d8c6b40362d1ce57c337c8799d34,2021-01-12 18:00:00 UTC,cool,hold,602,760,760,IL,Pekin,0,False,False,False,Gas
2580136,0cd367e477c045dd8a960e6ada2e7d617248180d,2021-01-31 10:35:00 UTC,auto,hold,694,760,700,IL,Springfield,40,False,False,False,Gas
2580137,baeeb7d86514c5a0d1c8d1991861986428d2b908,2021-01-25 16:20:00 UTC,auto,hold,670,760,670,IL,Hanover Park,50,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/IL/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IL/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IL_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IL/jan/" + file)
    IL_jan = pd.concat([IL_jan, df])
    
IL_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0010ea447dc3807b33daad2477d1cc02a71571fa,Jan,2017,auto,auto,Chicago,698.972222,770.000000,700.000000,5.0,False,False,False
1,0010ea447dc3807b33daad2477d1cc02a71571fa,Jan,2017,auto,hold,Chicago,707.190476,770.000000,708.571429,5.0,False,False,False
2,00726ed8e6a1f4f9bd85c248752de358dc529426,Jan,2017,auto,auto,Chicago,637.527778,820.000000,620.000000,100.0,False,False,False
3,00ab82671ae4437af38aaacbab99cc76c30a8568,Jan,2017,auto,auto,Troy,698.419375,780.000000,700.000000,35.0,False,False,False
4,00ab82671ae4437af38aaacbab99cc76c30a8568,Jan,2017,heat,auto,Troy,689.633333,708.000000,691.000000,35.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2978,ff850beb55574380a7267c8dc034948fe7e03465,Jan,2021,heat,hold,Chicago,746.130841,750.785047,749.663551,0.0,False,False,False
2979,ff871c58b8da73a6dcaab352b09eccd97c74392f,Jan,2021,heat,hold,Carol Stream,743.250923,745.708487,745.708487,0.0,False,False,False
2980,ff9c4b83c127c974334826057c0eca1ad0bf5448,Jan,2021,heat,hold,Hanover Park,717.156028,702.078014,694.333333,35.0,False,False,False
2981,ffb8d298fb8a6ec102607d24292aa1212022b338,Jan,2021,heat,hold,McHenry,684.998913,690.753261,689.780435,20.0,False,False,True


In [34]:
IL_jan.to_csv("Scraper_Output/State_Month_Day/IL/IL_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/IL-day/2017-feb-day-IL.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7b5f4d5749b4a549c3e67c5b7639308cc19e4588,2017-02-18 14:30:00 UTC,auto,hold,724,776,726,IL,Chicago,66,False,False,False,Gas
1,c85d41f08f6f31893c1046d3acd55649e4b663ea,2017-02-01 15:20:00 UTC,auto,hold,709,810,710,IL,Grayslake,16,False,False,False,Gas
2,a6955cb960808e9b1b50f135535540072ed8c8a7,2017-02-27 18:00:00 UTC,auto,auto,742,795,745,IL,Hanover Park,50,False,False,False,Gas
3,e372d517816adf5f07288167b1fcb4e596cc6e0b,2017-02-27 15:00:00 UTC,heat,hold,682,685,685,IL,Mount Prospect,5,False,False,False,Gas
4,676a09ab35338340c579a9487c5e3c1afee1a9e6,2017-02-03 17:15:00 UTC,heat,hold,634,640,640,IL,Leland Grove,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
816519,3bc95bd3032a3a912241fdd29321c91265ab81f9,2017-02-13 19:30:00 UTC,auto,hold,703,760,700,IL,Chicago,16,False,False,False,Gas
816520,ca3376ace2d4bf462ebb0b2f5d4c512c15174feb,2017-02-11 16:40:00 UTC,auto,hold,706,760,680,IL,Chicago,0,True,False,False,Gas
816521,ac5ab5a0eb1402b00fcacf1de7cfdf785f86cd55,2017-02-16 16:10:00 UTC,heat,auto,712,760,720,IL,Bolingbrook,50,False,False,False,Gas
816522,3bc95bd3032a3a912241fdd29321c91265ab81f9,2017-02-24 14:30:00 UTC,auto,hold,698,760,700,IL,Chicago,16,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/IL/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/IL-day/2018-feb-day-IL.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0e9b5ba51dbddffa9ba7d3e4e97694e971f6916a,2018-02-21 18:25:00 UTC,auto,hold,707,775,705,IL,Roscoe,15,False,False,False,Gas
1,84519c83acbcd5d575704c83c858ea684bab090d,2018-02-10 15:50:00 UTC,auto,auto,701,765,715,IL,Elgin,25,False,False,False,Gas
3,bb79342a3fca28711c37d01efdb0bb70b0495f63,2018-02-02 15:55:00 UTC,auto,hold,686,735,685,IL,Libertyville,40,False,False,False,Gas
4,efcefe6e2b283b82224ea374b5498face71229d4,2018-02-22 16:25:00 UTC,heat,hold,674,675,675,IL,Pingree Grove,10,False,False,False,Gas
5,39d29830fc33f46ca706897acd02a6ede13d0a52,2018-02-05 13:30:00 UTC,heat,hold,734,735,735,IL,Waukegan,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2463997,e23d9c3a669a30ee628e11dfffeeb831163e886d,2018-02-03 14:55:00 UTC,auto,auto,713,760,710,IL,Shorewood,0,False,False,False,Gas
2463998,9a2314007942ee8e8e5a5d5212f841b5fa98bb03,2018-02-08 14:45:00 UTC,auto,auto,677,760,680,IL,Chicago,15,False,False,False,Gas
2463999,682026463117b13aaeb9abd843611b51f8c417ae,2018-02-13 19:10:00 UTC,auto,auto,666,760,670,IL,Mount Prospect,90,False,False,False,Gas
2464000,bfebc5dd62d38438acc89343a1fd5b5933a7ad54,2018-02-04 19:10:00 UTC,heat,hold,755,760,760,IL,Chicago,0,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/IL/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/IL-day/2019-feb-day-IL.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,75ba8363612804cf52ba1dbff9b25bf52ad1f98b,2019-02-01 15:55:00 UTC,auto,auto,697,810,700,IL,Gurnee,35,False,False,False,Gas
1,5a409679afc3e037f3054904293e1d7472073bc2,2019-02-11 17:55:00 UTC,auto,hold,690,745,695,IL,Washington,80,False,False,False,Gas
2,93f6b7c0db2d184f93dc8cd721a00cd35155dc1e,2019-02-05 15:15:00 UTC,auto,hold,702,765,705,IL,Elmhurst,16,False,False,False,Gas
3,dc5ac03eff51d0a8dbc9e79424f623c2e8141155,2019-02-04 15:55:00 UTC,auto,auto,684,785,655,IL,Norridge,60,False,False,False,Gas
4,46e3367e8ba9f4d9e44304e304aae6645c2fe936,2019-02-01 16:45:00 UTC,heat,hold,710,739,739,IL,Huntley,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2840828,d89f68023c47bedb4cc02fb0c415d6ecc946df7c,2019-02-01 18:30:00 UTC,heat,auto,744,760,750,IL,Bolingbrook,5,False,False,False,Gas
2840829,2426d70bc25ac972cc3f9511f941d47e8918d4aa,2019-02-17 14:05:00 UTC,auto,hold,639,760,640,IL,Addison,60,False,False,False,Gas
2840830,f60fcbfc90116e255d4bc65f95286d5d07b4d740,2019-02-01 14:35:00 UTC,heat,auto,701,760,700,IL,Palatine,67,False,False,False,Gas
2840831,d64cb7c69a01a7576ed6eed51278235da031d306,2019-02-26 18:55:00 UTC,heat,auto,759,760,760,IL,Westmont,40,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/IL/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/IL-day/2020-feb-day-IL.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,724af8c53d6ef88347211853b3a90a141303a9c0,2020-02-10 18:55:00 UTC,heat,hold,673,679,679,IL,Chicago,120,True,False,False,Gas
1,03de156cd3b88416316440eb398f4625b5252455,2020-02-11 19:45:00 UTC,heat,auto,705,700,700,IL,Glenview,0,False,False,False,Gas
2,3fa6220040e67b30fdd67c7e5470efbb1fa9bfa0,2020-02-07 13:30:00 UTC,heat,hold,706,710,710,IL,Bloomington,60,True,False,False,Gas
3,8a7167763d6360a8a24b7afcc218f47e5497b302,2020-02-23 18:40:00 UTC,heat,hold,730,730,730,IL,Chicago,5,False,False,False,Gas
4,c789e8e4425c9970148510b3bad06dfd7628bee7,2020-02-23 18:20:00 UTC,auto,hold,737,840,730,IL,Lisle,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3878336,228c9c292245be20d4c54014457f83131954d2fd,2020-02-05 12:50:00 UTC,heat,auto,671,730,670,IL,Lockport,120,False,False,False,Gas
3878337,03de156cd3b88416316440eb398f4625b5252455,2020-02-08 13:45:00 UTC,heat,auto,681,680,680,IL,Glenview,0,False,False,False,Gas
3878338,6a24a315f62d7c2489025b63f24a9c06a1485bf4,2020-02-10 18:55:00 UTC,heat,hold,705,710,710,IL,Breese,117,False,False,False,Gas
3878339,1db2844d617ac8b0a76e23eb10a82bc4bc27177a,2020-02-26 18:05:00 UTC,heat,hold,734,740,740,IL,Chicago,110,True,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/IL/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/IL-day/2021-feb-day-IL.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a8ba6197909039dab483060efaa57a1359ff3dbb,2021-02-27 07:10:00 UTC,auto,hold,664,830,620,IL,Highland Park,0,False,False,False,Gas
1,27876cc9f84ca94e3c3a7884d12637d07f596448,2021-02-20 18:55:00 UTC,auto,hold,713,767,697,IL,Chicago,30,False,False,False,Gas
2,f05a673ba4aa208709e24869752a9ad033bc42bc,2021-02-16 15:25:00 UTC,heat,hold,687,689,689,IL,Lake Barrington,45,False,False,False,Gas
3,bc93333d3041c6de9c13d8624b2f337b2fd24e28,2021-02-06 18:55:00 UTC,heat,hold,728,737,737,IL,Arlington Heights,0,False,False,False,Gas
4,c8623f45f1fe7038d40c087b0a8e27c12599eb45,2021-02-28 11:20:00 UTC,auto,hold,665,754,670,IL,McHenry,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2273678,506896c036fb33d97f03c97cb8706b9bc663de81,2021-02-08 13:50:00 UTC,auto,hold,688,760,690,IL,Savoy,10,False,False,False,Gas
2273679,7d2d533fcabb443ac54b196d416e20ab015f3ba8,2021-02-19 15:40:00 UTC,auto,hold,700,760,700,IL,Naperville,35,False,False,False,Gas
2273680,5ab1643450c4aded49fb0397ec13502b8aaf59f3,2021-02-26 13:30:00 UTC,auto,hold,712,760,710,IL,Elgin,0,False,False,False,Gas
2273681,1d24bbd88223261463efde24d37a18570e9fb998,2021-02-15 12:35:00 UTC,heat,hold,755,760,760,IL,Wood River,70,True,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/IL/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IL/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IL_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IL/feb/" + file)
    IL_feb = pd.concat([IL_feb, df])
    
IL_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0010ea447dc3807b33daad2477d1cc02a71571fa,feb,2017,auto,hold,Chicago,697.833333,770.000000,700.000000,5.0,False,False,False
1,0010ea447dc3807b33daad2477d1cc02a71571fa,feb,2017,heat,hold,Chicago,722.916667,720.000000,720.000000,5.0,False,False,False
2,00726ed8e6a1f4f9bd85c248752de358dc529426,feb,2017,auto,auto,Chicago,676.666667,770.166667,679.750000,100.0,False,False,False
3,00ab82671ae4437af38aaacbab99cc76c30a8568,feb,2017,heat,auto,Troy,688.456731,700.000000,690.000000,35.0,False,False,False
4,00ab82671ae4437af38aaacbab99cc76c30a8568,feb,2017,heat,hold,Troy,699.298718,700.000000,700.000000,35.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2963,ff7b7715c3868f2d70b2ab4daef692a5363e2a11,feb,2021,heat,hold,Arlington Heights,697.372163,700.429652,700.429652,35.0,True,False,False
2964,ff850beb55574380a7267c8dc034948fe7e03465,feb,2021,heat,hold,Chicago,739.106452,737.438710,737.409677,0.0,False,False,False
2965,ff871c58b8da73a6dcaab352b09eccd97c74392f,feb,2021,heat,hold,Carol Stream,743.234043,744.510638,744.510638,0.0,False,False,False
2966,ff9c4b83c127c974334826057c0eca1ad0bf5448,feb,2021,heat,hold,Hanover Park,726.622549,718.411765,718.176471,35.0,False,False,False


In [67]:
IL_feb.to_csv("Scraper_Output/State_Month_Day/IL/IL_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/IL-day/2017-jun-day-IL.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,488a06da6af122e6a93372f0dec8971281d8d784,2017-06-28 19:05:00 UTC,auto,hold,728,755,705,IL,Lisle,20,False,False,False,Gas
3,488a06da6af122e6a93372f0dec8971281d8d784,2017-06-28 12:15:00 UTC,auto,hold,724,755,705,IL,Lisle,20,False,False,False,Gas
4,9632b1b5f4ebe4fe080cacaea089f450519fa864,2017-06-08 14:05:00 UTC,cool,hold,744,775,775,IL,Chicago,50,False,False,False,Gas
6,59a607c871c363c854a4f20fbe39ad921293358e,2017-06-14 16:55:00 UTC,auto,auto,730,695,645,IL,Orland Township,17,False,False,False,Gas
7,946c901a7f70d412c33179dd4048307ffb043e13,2017-06-04 09:30:00 UTC,auto,hold,664,665,615,IL,Cary,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1394756,3258e4b8a232d3e3bce631913382d45aac0791b8,2017-06-17 17:25:00 UTC,cool,auto,766,770,760,IL,Aurora,35,True,False,False,Gas
1394757,5b4533b70af9c98fdb900681c00e5508059aa36a,2017-06-15 12:00:00 UTC,cool,hold,743,760,760,IL,Clarendon Hills,116,False,False,False,Gas
1394758,0c36cb406f424ea7a7f3936276e816f7927420c0,2017-06-22 19:45:00 UTC,cool,hold,759,760,760,IL,Riverside,120,False,False,False,Gas
1394759,79bd8d43f22b117d36ba0f1640c0a5657ade10e4,2017-06-10 18:55:00 UTC,cool,hold,765,760,760,IL,Wonder Lake,30,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/IL/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/IL-day/2018-jun-day-IL.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c64de3d2915f81b466255b355735a8e181c4b5ee,2018-06-24 11:40:00 UTC,auto,hold,734,761,698,IL,Champaign,40,False,False,False,Gas
1,3bff97fff4aaabb7bc4dd2a3b42a89403741c247,2018-06-01 17:35:00 UTC,auto,hold,726,715,625,IL,Crete,0,False,False,False,Gas
2,05353f85a169498e7439f101f618e0afc4fc1ee0,2018-06-25 16:30:00 UTC,cool,hold,743,755,755,IL,Homer Glen,30,False,False,False,Gas
4,d13cbfc9ee5610d793a2e7f6ceada7ce9bf4be7c,2018-06-24 17:45:00 UTC,cool,auto,733,719,719,IL,Mundelein,0,False,False,False,Gas
5,a6c581e771759fcaddccb116af520ec53b2047d2,2018-06-29 14:05:00 UTC,cool,auto,776,794,608,IL,Aurora,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2971260,ed44823ff9919be60103e7d46e4a526538cef9a1,2018-06-13 11:35:00 UTC,cool,hold,787,760,760,IL,Wilmette,10,False,False,False,Gas
2971261,9b087161c3d2ffdc9dd900c1537d5551ee14245d,2018-06-06 14:25:00 UTC,cool,hold,762,760,760,IL,Chatham,38,False,False,False,Gas
2971262,4c9c41ad6fde10787873c50eb23e43becf18a5f2,2018-06-29 13:00:00 UTC,cool,auto,773,770,760,IL,Palatine,25,True,False,False,Gas
2971263,7230593c7f7503fed3bdd27bcf8ad62823a66804,2018-06-20 18:35:00 UTC,cool,hold,769,760,760,IL,Franklin Park,120,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/IL/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/IL-day/2019-jun-day-IL.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,87d937286bf5a598f79c342d836fd91780f1c73d,2019-06-25 10:30:00 UTC,cool,auto,695,730,730,IL,Manhattan,90,False,False,False,Gas
1,36b54cb4cb45212a4681284b03abf4570662d876,2019-06-07 18:20:00 UTC,cool,auto,756,760,760,IL,Chicago,0,False,False,False,Gas
2,ecd87073a4953d3272aa41effed80d6c469af995,2019-06-11 12:10:00 UTC,heat,auto,705,680,680,IL,Lake In The Hills,30,False,False,False,Gas
3,676a09ab35338340c579a9487c5e3c1afee1a9e6,2019-06-23 15:55:00 UTC,auto,hold,698,700,650,IL,Leland Grove,60,False,False,False,Gas
4,25714ead5aa0fa1bdf44483d9aa79ff4a8f39252,2019-06-07 13:40:00 UTC,auto,hold,738,745,695,IL,Aurora,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4028724,30fc9399410cd1da38ca119fae40541165b9081d,2019-06-04 16:10:00 UTC,auto,hold,745,763,713,IL,Streator,65,False,False,False,Gas
4028725,903a31184d5089a201b05a03fba0c848e48df876,2019-06-12 19:45:00 UTC,cool,auto,740,740,740,IL,Shiloh,40,True,False,True,Electric
4028726,fe86f5afdc00703608c29149fa6f944afc5301da,2019-06-26 17:00:00 UTC,cool,hold,724,720,720,IL,Chicago,0,False,False,False,Gas
4028727,9a9fea69a1632de1155bf5ce686b46b8667a24f1,2019-06-17 14:20:00 UTC,heat,auto,713,670,670,IL,Bradford,110,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/IL/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/IL-day/2020-jun-day-IL.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,533da33169868eef19caea73c32822ffc0cad843,2020-06-26 15:30:00 UTC,cool,auto,714,710,680,IL,West Chicago,48,False,False,False,Gas
1,b7717e0902078440d0742957aa599830466dcd24,2020-06-20 19:00:00 UTC,cool,auto,681,680,710,IL,Decatur,50,False,False,False,Gas
2,616440838e54cc16f52f3e03b9413fc94dec49d7,2020-06-01 13:25:00 UTC,cool,hold,712,740,740,IL,Joliet,29,False,False,False,Gas
3,b3eb485c35394fd0da26f2672aeafc5876200602,2020-06-25 19:25:00 UTC,cool,hold,720,719,678,IL,Chicago,19,True,False,False,Gas
4,7ef2cd3a36e7299e503523b539e48412993aab46,2020-06-09 12:15:00 UTC,cool,hold,654,650,650,IL,Chicago,9,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4268284,47bab0638e1b5cb2810a4dcea7f2a3462003ceaf,2020-06-09 12:00:00 UTC,cool,hold,712,710,710,IL,Champaign,0,False,False,False,Gas
4268285,0ef1c8386b204313d0252b3b5f004acb8bdaff3d,2020-06-22 13:55:00 UTC,auto,hold,698,700,640,IL,Geneva,10,False,False,False,Gas
4268286,4ab08747198793a32fb8a4c127f230ea1ee456d3,2020-06-03 19:45:00 UTC,cool,hold,760,770,770,IL,Oak Lawn,40,False,False,False,Gas
4268287,aa89c81a2c008e476c2957a8133025fbf15bf57e,2020-06-08 19:05:00 UTC,cool,hold,742,740,740,IL,Oswego,19,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/IL/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/IL-day/2021-jun-day-IL.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b9957439693eea69aeea02918706d4153a3a9e37,2021-06-14 15:40:00 UTC,cool,hold,733,729,729,IL,McHenry,20,False,False,True,Electric
1,b024a873d6858823be77d7fe70ba680be927400c,2021-06-09 18:35:00 UTC,auto,hold,742,735,635,IL,Inverness,10,False,False,False,Gas
2,6b4c11c2de2ada7d70ee43cfe6ae903d50adf1ee,2021-06-13 13:55:00 UTC,cool,hold,755,755,755,IL,chicago,20,False,False,False,Gas
3,28599c64c379906a8eda04ab0949dc8c799e458e,2021-06-14 17:35:00 UTC,cool,hold,732,727,727,IL,Chillicothe,15,False,False,False,Gas
4,65fbb905bea0c5b0d2c861b55781895bd10ccbee,2021-06-28 15:30:00 UTC,cool,hold,699,697,697,IL,South Elgin,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2624490,fd28715c6e2ab99d792d579958ee5fdc55a873bb,2021-06-27 15:25:00 UTC,cool,hold,761,760,760,IL,Naperville,0,False,False,False,Gas
2624491,b76a4b31cee5b507a84095ef4c5e5dfe04c7a069,2021-06-30 13:40:00 UTC,cool,hold,728,760,760,IL,Arlington Heights,40,False,False,False,Gas
2624492,0b5c510c4b74c0ee1b312c4233752c7056c5ec6a,2021-06-22 12:55:00 UTC,cool,hold,750,760,760,IL,Kewanee,40,False,False,False,Gas
2624493,b3b6ec2eef0b557bacf331b4fefbf0d818153ee8,2021-06-11 14:10:00 UTC,cool,hold,771,760,760,IL,Orland Park,20,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/IL/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IL/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IL_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IL/jun/" + file)
    IL_jun = pd.concat([IL_jun, df])
    
IL_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0010ea447dc3807b33daad2477d1cc02a71571fa,jun,2017,auto,auto,Chicago,729.279412,763.705882,678.632353,5.0,False,False,False
1,0010ea447dc3807b33daad2477d1cc02a71571fa,jun,2017,auto,hold,Chicago,742.054054,750.000000,670.000000,5.0,False,False,False
2,0010ea447dc3807b33daad2477d1cc02a71571fa,jun,2017,cool,auto,Chicago,744.000000,740.000000,670.000000,5.0,False,False,False
3,0010ea447dc3807b33daad2477d1cc02a71571fa,jun,2017,cool,hold,Chicago,720.291667,720.000000,720.000000,5.0,False,False,False
4,00726ed8e6a1f4f9bd85c248752de358dc529426,jun,2017,auto,hold,Chicago,732.937500,722.812500,661.125000,100.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3686,ff871c58b8da73a6dcaab352b09eccd97c74392f,jun,2021,auto,hold,Carol Stream,751.200000,772.148148,711.081481,0.0,False,False,False
3687,ff9c4b83c127c974334826057c0eca1ad0bf5448,jun,2021,cool,hold,Hanover Park,740.541491,741.421941,741.514768,35.0,False,False,False
3688,ffb8d298fb8a6ec102607d24292aa1212022b338,jun,2021,cool,hold,McHenry,703.812749,698.549801,698.183267,20.0,False,False,True
3689,fffad25219fec06f6ad101ceda18ea4e00693b2c,jun,2021,auto,hold,Wilmette,692.341463,760.000000,670.000000,110.0,True,False,False


In [100]:
IL_jun.to_csv("Scraper_Output/State_Month_Day/IL/IL_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/IL-day/2017-jul-day-IL.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cad8a2440b11b82f1607cf74a1003e57b2a0049c,2017-07-09 18:15:00 UTC,auto,hold,717,705,655,IL,Montgomery,0,False,False,False,Gas
1,b4120d4f47d6115181651daa3c4a80d96782d42e,2017-07-23 15:25:00 UTC,cool,hold,723,725,725,IL,Highland Park,20,False,False,False,Gas
2,09b5abfb7b7ac38727160cbd2d1c63096bce7136,2017-07-03 18:15:00 UTC,cool,hold,781,788,761,IL,Oak Park,10,False,False,False,Gas
3,f0f91ed02adf8bff3c833da4a49c6f0997d82d80,2017-07-23 17:55:00 UTC,auto,auto,747,745,695,IL,Palatine,20,False,False,False,Gas
4,2147db0d331e9337e5692f68ab2dc1f9c5ce0f5c,2017-07-27 15:30:00 UTC,cool,hold,724,695,695,IL,Chicago,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1802756,8631aed7daf8474e62db37c8ff95676642769624,2017-07-21 11:00:00 UTC,cool,hold,742,760,760,IL,Palatine,25,False,False,False,Gas
1802757,8b6cfe2a313871b137989449712671477cc6bb75,2017-07-24 15:15:00 UTC,cool,auto,758,760,760,IL,Chicago,0,True,False,False,Gas
1802758,83730624e9d27a50ebb3867a87dbf0287aa0cd8d,2017-07-09 15:00:00 UTC,cool,hold,713,760,760,IL,Lincolnshire,55,False,False,False,Gas
1802759,0583fedc7b77c150ff49355f10bd9717d86ee323,2017-07-22 19:00:00 UTC,cool,hold,763,760,760,IL,Berkeley,50,True,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/IL/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/IL-day/2018-jul-day-IL.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0ff224cc321499923ffb994e072b36089487d102,2018-07-13 10:30:00 UTC,auto,hold,706,705,655,IL,Chicago,100,False,False,False,Gas
1,d89780a5c83043d64599896b93bdbf94ee31c16a,2018-07-24 11:15:00 UTC,cool,hold,737,735,735,IL,Des Plaines,20,False,False,False,Gas
2,1f1af87b21d94007698db3e18216f21178da51a7,2018-07-30 14:05:00 UTC,cool,hold,716,735,735,IL,Normal,10,True,False,False,Gas
3,b606d1c5e434d57a80ccacdbb45b55cd07e396bf,2018-07-28 17:45:00 UTC,cool,hold,779,830,800,IL,Schaumburg,5,False,False,False,Gas
4,9afab41a1cd1d7b77d3ae1cfe53849a9d9db2b18,2018-07-26 07:45:00 UTC,cool,auto,764,760,725,IL,Chicago,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3554163,63ae17af36a004403ce5d201b33d98848c344f28,2018-07-01 16:10:00 UTC,cool,hold,764,760,760,IL,Thornton,40,False,False,False,Gas
3554164,c4037e8aec9cab9f3f021c3793ef294edf197a47,2018-07-15 17:45:00 UTC,cool,auto,760,760,760,IL,Chicago,50,False,False,False,Gas
3554165,0dff8ae8efa96bad8a2a6451a82d54d72870f2a6,2018-07-16 17:45:00 UTC,cool,hold,763,760,760,IL,Algonquin,0,False,False,False,Gas
3554166,22b62065617b55d128a59421606b6a21c3208568,2018-07-27 17:55:00 UTC,cool,auto,728,760,760,IL,Joliet,0,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/IL/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/IL-day/2019-jul-day-IL.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8a31883e7fac6e19a6f66c278bbfa834afc8d838,2019-07-02 19:50:00 UTC,cool,hold,746,740,740,IL,Palatine,37,True,False,False,Gas
1,11d958beb1e3395917cc0802cc099806f61865c0,2019-07-11 16:00:00 UTC,cool,hold,774,770,770,IL,Chicago,0,True,False,False,Gas
2,3bc95bd3032a3a912241fdd29321c91265ab81f9,2019-07-08 19:20:00 UTC,auto,hold,721,720,660,IL,Chicago,16,False,False,False,Gas
3,5bfa3c7e41270a4941ae42549a8073c816f64663,2019-07-01 14:15:00 UTC,cool,hold,754,750,750,IL,Chicago,18,False,False,False,Gas
4,a320bfa2cb442b0a99e7d6bc44e1fd763cbde512,2019-07-14 15:10:00 UTC,cool,auto,721,720,720,IL,Schaumburg,45,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5056440,10a920522708607d3931709f5328bbd13841499e,2019-07-19 18:55:00 UTC,cool,hold,729,730,730,IL,glen ellyn,35,True,False,False,Gas
5056441,1d5eeb493d756b8a26f785854c4b80d8591509aa,2019-07-26 13:40:00 UTC,cool,hold,694,710,710,IL,Algonquin,10,False,False,False,Gas
5056442,29118cae2610c7a3b1883abec239a6d4411fbf49,2019-07-01 18:20:00 UTC,auto,hold,748,740,690,IL,Streamwood,19,True,False,False,Gas
5056443,9d342bda0914fbba423bb9559da43d1e29777d58,2019-07-23 15:40:00 UTC,cool,hold,742,740,740,IL,Cottage Hills,79,True,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/IL/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/IL-day/2020-jul-day-IL.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f74e3658fa69b75b5af33b4ae8643de4e34fbded,2020-07-18 18:05:00 UTC,auto,auto,710,710,658,IL,Gurnee,65,False,False,False,Gas
1,ce46bb8cd7c978f97bee0b1933094e0a9fe980cb,2020-07-03 07:20:00 UTC,cool,auto,702,700,700,IL,pingree grove,5,False,False,False,Gas
2,add43d984bb7eaa099728f4d7d73243959ed08a1,2020-07-28 15:50:00 UTC,cool,hold,723,720,720,IL,Blue Island,90,False,False,False,Gas
3,c0c515a1db3adf4aeaa2477ed510bdbe52cbe12e,2020-07-05 12:20:00 UTC,cool,auto,782,780,780,IL,Decatur,69,True,False,False,Gas
4,3948d150d6223750be1b12b3ad37448ce009d5f5,2020-07-14 14:05:00 UTC,cool,auto,735,730,730,IL,Roscoe,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4827149,69d7f394a48167ce9b73b0e0670db69f57acec79,2020-07-05 16:25:00 UTC,cool,hold,762,752,752,IL,Wheaton,0,False,False,False,Gas
4827150,457d349b856dedba849573ed6d0b2970fb338f98,2020-07-28 18:25:00 UTC,cool,hold,753,750,750,IL,Pekin,0,False,False,False,Gas
4827151,13853fc5328659a5407d860d2ea771ab0b83251a,2020-07-16 11:50:00 UTC,cool,auto,755,760,732,IL,Peoria,80,True,False,False,Gas
4827152,cbed4a98b71bba3c5d5b4edd79357236c4f64952,2020-07-30 10:10:00 UTC,auto,hold,753,750,690,IL,Chicago,0,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/IL/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/IL-day/2021-jul-day-IL.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,33f0926952e3109e37799f9cdcae55c373ccb8af,2021-07-01 16:10:00 UTC,cool,hold,754,750,727,IL,Lisle,40,False,False,False,Gas
1,09c557e5d7d86bdc3748a2db492304612f71a6d3,2021-07-18 16:55:00 UTC,cool,hold,731,722,722,IL,Joliet,48,False,False,False,Gas
2,2d60c815b7c84b259bd733f4302b8e6503f6bbe9,2021-07-05 08:30:00 UTC,cool,hold,698,648,648,IL,Chicago,120,False,False,False,Gas
3,ab50520d3d990a56ffe1a04e50731bd2a3481e03,2021-07-08 12:45:00 UTC,auto,hold,738,740,610,IL,Westchester,60,False,False,False,Gas
4,c58128dabb3f5198097c2c6d5a0a641e68b929e2,2021-07-21 13:40:00 UTC,auto,hold,682,696,646,IL,Wonder Lake,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2639399,36e374fa86b178ba9cb995f04c328c758d7557f4,2021-07-16 17:30:00 UTC,cool,hold,746,760,760,IL,Joliet,10,True,False,False,Gas
2639400,6bb4770876673e200d68fcbd1f5c9a308da1a8ec,2021-07-04 19:30:00 UTC,cool,hold,758,760,760,IL,Skokie,66,False,False,False,Gas
2639401,26d62ecff6d390c29de8d3abe9c631bd5a8374c7,2021-07-17 16:05:00 UTC,cool,hold,769,760,760,IL,Wheaton,10,False,False,False,Gas
2639402,433375de751b31764e6209a5c52171290ed4bad2,2021-07-02 15:05:00 UTC,cool,hold,757,760,760,IL,Carpentersville,5,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/IL/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IL/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IL_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IL/jul/" + file)
    IL_jul = pd.concat([IL_jul, df])
    
IL_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0010ea447dc3807b33daad2477d1cc02a71571fa,jul,2017,auto,auto,Chicago,727.361111,736.666667,678.333333,5.0,False,False,False
1,0010ea447dc3807b33daad2477d1cc02a71571fa,jul,2017,cool,hold,Chicago,725.160000,720.440000,720.440000,5.0,False,False,False
2,00726ed8e6a1f4f9bd85c248752de358dc529426,jul,2017,auto,auto,Chicago,737.916667,820.000000,620.000000,100.0,False,False,False
3,00726ed8e6a1f4f9bd85c248752de358dc529426,jul,2017,auto,hold,Chicago,739.205882,739.264706,643.852941,100.0,False,False,False
4,009a4e3ad98bde4c1849be0b615965844b6aea4e,jul,2017,cool,auto,Elgin,760.482456,809.824561,699.640351,15.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3118,ff871c58b8da73a6dcaab352b09eccd97c74392f,jul,2021,auto,hold,Carol Stream,748.989796,739.255102,685.612245,0.0,False,False,False
3119,ff9c4b83c127c974334826057c0eca1ad0bf5448,jul,2021,cool,hold,Hanover Park,733.952381,732.360248,732.486542,35.0,False,False,False
3120,ffb8d298fb8a6ec102607d24292aa1212022b338,jul,2021,cool,hold,McHenry,702.745501,706.485861,706.377892,20.0,False,False,True
3121,ffb8d298fb8a6ec102607d24292aa1212022b338,jul,2021,heat,hold,McHenry,649.142857,660.000000,660.000000,20.0,False,False,True


In [133]:
IL_jul.to_csv("Scraper_Output/State_Month_Day/IL/IL_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/IL-day/2017-aug-day-IL.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b2c818fa031b0d49eec392fe0062575d651d442d,2017-08-22 12:05:00 UTC,cool,hold,718,716,716,IL,Chicago,0,False,False,True,Electric
1,77b317ace44adab28e9556d02050880344eb9e9b,2017-08-22 18:00:00 UTC,auto,hold,710,705,645,IL,Geneva,45,False,False,False,Gas
2,a2785d8d64bd3940f875f22e5f341b41f2258319,2017-08-18 11:30:00 UTC,cool,hold,735,735,735,IL,Plainfield,20,True,False,False,Gas
3,91d9dd24f41909477626156745b04fbcaddd7f59,2017-08-06 16:20:00 UTC,auto,auto,749,750,610,IL,Chicago,0,False,False,False,Gas
4,20a95e98c34e462bc7414dea9a586d3a871f2f3e,2017-08-15 12:25:00 UTC,cool,hold,706,705,705,IL,Arlington Heights,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1687780,0c698e5e346e41bd0b23c3517153c37789df9ca3,2017-08-13 15:00:00 UTC,cool,hold,719,760,760,IL,Wilmette,10,False,False,False,Gas
1687781,e44bf90c04b68320279d555643d8b525918a43c1,2017-08-11 19:45:00 UTC,cool,auto,737,734,760,IL,Chicago,60,False,False,False,Gas
1687782,8ca3175a5f9cb7c31df30f8ccdbfee60bd9831a7,2017-08-13 16:10:00 UTC,cool,auto,754,750,760,IL,Skokie,0,False,False,False,Gas
1687783,c17fac980b1b2772d6e328adeaa2a3676c500e61,2017-08-14 15:50:00 UTC,cool,hold,761,760,760,IL,Romeoville,0,True,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/IL/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/IL-day/2018-aug-day-IL.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,764518949f19c3ad36b524ca839f143bb9981d84,2018-08-03 19:25:00 UTC,auto,hold,759,755,705,IL,Hickory Hills,50,False,False,False,Gas
1,e1cdec737c8ebbbc5c459d1d696070d72e127134,2018-08-15 19:50:00 UTC,cool,auto,774,770,745,IL,Western Springs,45,False,False,False,Gas
2,aab2535470c87a67ed55a9832da3dde6d3def03c,2018-08-04 17:00:00 UTC,cool,hold,731,725,725,IL,Springfield,96,False,False,False,Gas
3,ab7d2b6f2e0543db5ef8ea3f3a69d69f3170c5a5,2018-08-07 15:50:00 UTC,cool,hold,742,755,755,IL,Antioch,70,False,False,False,Gas
4,6e8f6b9acf0e2d5a6ccf0d5ced96ba9d5e658589,2018-08-18 18:35:00 UTC,auto,hold,803,790,701,IL,Mount Prospect,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3649392,c7046ca51f9ba7ce2a7e09411170f178e4cec544,2018-08-16 17:30:00 UTC,cool,hold,790,760,760,IL,Chicago,100,True,False,False,Gas
3649393,3a7306a6d5df091fefbe0eb4997e69e51de4f490,2018-08-25 16:40:00 UTC,cool,hold,762,760,760,IL,Ottawa,30,False,False,False,Gas
3649394,4e70fd51dee4c27919441a6db9a42d013d7255d7,2018-08-03 18:45:00 UTC,cool,hold,760,760,760,IL,Bolingbrook,10,False,False,False,Gas
3649395,467dcb359d36ae233794f820d962abf0faa3a609,2018-08-30 16:35:00 UTC,cool,hold,678,760,760,IL,Chicago,55,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/IL/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/IL-day/2019-aug-day-IL.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3796bde8c5c1ba823e13c00473edc9122ed619af,2019-08-20 17:15:00 UTC,cool,hold,748,760,760,IL,Northbrook,46,False,False,False,Gas
1,4ab6e342b8b275c4bde17fc30a761d51312c74fb,2019-08-16 16:25:00 UTC,cool,hold,742,740,740,IL,Chicago,15,False,False,False,Gas
2,9025e5d312d9c348cc665cff5f75688e0666955e,2019-08-05 17:15:00 UTC,cool,auto,693,680,680,IL,Spring Valley,20,True,False,False,Gas
3,e512dd5247ad753e066464f468e39bc0222e41ff,2019-08-25 19:35:00 UTC,cool,hold,772,780,780,IL,Lombard,70,False,False,False,Gas
4,2f4e8bdcc371f619024ae2c2331c3c4392e16cac,2019-08-06 13:15:00 UTC,cool,auto,775,770,770,IL,Decatur,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4598304,d8117c39e204ebcdc44a2605342919c4aa2ca311,2019-08-09 17:45:00 UTC,auto,hold,716,710,660,IL,Washington,87,False,False,False,Gas
4598305,98d001d32b4f40a1b06d16b0743fd6a6841b6448,2019-08-13 18:20:00 UTC,cool,hold,750,700,700,IL,Danville,29,True,False,False,Gas
4598306,325c96c6554a7c1b3f5c561e101ecc2fd4f76978,2019-08-02 11:55:00 UTC,auto,auto,695,770,680,IL,Beach Park,30,False,False,False,Gas
4598307,ab24e5086dc1b6bf96c25da42af8075d687b796c,2019-08-07 14:55:00 UTC,cool,auto,753,752,725,IL,Vernon Hills,47,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/IL/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/IL-day/2020-aug-day-IL.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c341163f903b21dacdc32e7561a460766a99820c,2020-08-04 14:00:00 UTC,cool,auto,742,780,780,IL,Wood Dale,15,False,False,False,Gas
1,eaf740bbd654994840519f23147978d33b5d4668,2020-08-04 15:35:00 UTC,auto,hold,735,735,685,IL,Clarendon Hills,20,False,False,False,Gas
2,67bd5f8eab617ca046932f375c987147130a955e,2020-08-26 13:50:00 UTC,cool,hold,783,780,780,IL,DeKalb,30,False,False,False,Gas
3,9f4b3d5e3d237aec497f480d9990ae4bcbcb617f,2020-08-22 15:25:00 UTC,cool,hold,755,750,750,IL,Joliet,15,False,False,False,Gas
4,3c19e4dbb3027bbff03786aec25b68831c3dc61e,2020-08-20 15:40:00 UTC,cool,hold,742,740,740,IL,Elk Grove Village,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4471720,16ced495f6c887243ecebcd370eeda3a47de3bdc,2020-08-26 15:35:00 UTC,cool,hold,678,675,675,IL,Tinley Park,60,False,False,False,Gas
4471721,da24b040a0d591f84db269acf143765ef55513dc,2020-08-18 13:40:00 UTC,auto,hold,736,730,670,IL,Chicago,20,True,False,False,Gas
4471722,d4a12d620637ddc0ea349ae13a1d46b3ac54b043,2020-08-26 17:10:00 UTC,cool,hold,702,700,700,IL,Rolling Meadows,50,True,False,False,Gas
4471723,f349ccad78102102b810b55f26083dae266b411d,2020-08-24 09:30:00 UTC,cool,hold,746,740,740,IL,Columbia,60,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/IL/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IL/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IL_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IL/aug/" + file)
    IL_aug = pd.concat([IL_aug, df])
    
IL_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0010ea447dc3807b33daad2477d1cc02a71571fa,aug,2017,auto,auto,Chicago,730.480000,739.880000,671.213333,5.0,False,False,False
1,0010ea447dc3807b33daad2477d1cc02a71571fa,aug,2017,auto,hold,Chicago,738.208333,753.333333,670.000000,5.0,False,False,False
2,0010ea447dc3807b33daad2477d1cc02a71571fa,aug,2017,cool,auto,Chicago,736.707547,747.169811,740.000000,5.0,False,False,False
3,0010ea447dc3807b33daad2477d1cc02a71571fa,aug,2017,cool,hold,Chicago,737.458333,740.000000,740.000000,5.0,False,False,False
4,002ba06a2fbc44349736a758c25eb6755d270f15,aug,2017,cool,auto,Niles,758.636364,760.466403,740.944664,60.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5818,ff9c4b83c127c974334826057c0eca1ad0bf5448,aug,2020,cool,auto,Hanover Park,698.164323,681.168764,657.410067,35.0,False,False,False
5819,ff9c4b83c127c974334826057c0eca1ad0bf5448,aug,2020,cool,hold,Hanover Park,700.621212,680.525253,679.858586,35.0,False,False,False
5820,ffb8d298fb8a6ec102607d24292aa1212022b338,aug,2020,auxHeatOnly,hold,McHenry,675.222222,680.888889,680.888889,20.0,False,False,True
5821,ffb8d298fb8a6ec102607d24292aa1212022b338,aug,2020,cool,auto,McHenry,705.118064,717.606848,712.612751,20.0,False,False,True


In [160]:
IL_aug.to_csv("Scraper_Output/State_Month_Day/IL/IL_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/IL-day/2017-dec-day-IL.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0567a0c2f4118e07ca9696d47facabb47f13337b,2017-12-30 15:45:00 UTC,heat,hold,693,695,695,IL,Rockford,20,False,False,False,Gas
1,8473a0678de7d36d4e92105d09ba024fb94ac633,2017-12-13 16:55:00 UTC,auto,hold,717,765,715,IL,Orland Park,0,False,False,False,Gas
2,4cea4cee6de713b0a7b8bc74b42ae2398d00ede1,2017-12-22 15:40:00 UTC,heat,hold,716,695,695,IL,Lake Zurich,0,False,False,False,Gas
3,27510028ed9939097a3a830f2de47011c44e8b6f,2017-12-19 13:30:00 UTC,heat,hold,674,676,676,IL,Des Plaines,10,False,False,False,Gas
4,69d7f394a48167ce9b73b0e0670db69f57acec79,2017-12-02 15:35:00 UTC,heat,hold,748,752,752,IL,Wheaton,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2505337,a6955cb960808e9b1b50f135535540072ed8c8a7,2017-12-06 14:35:00 UTC,auto,hold,711,760,700,IL,Hanover Park,50,False,False,False,Gas
2505338,8a762f2f1388a0eebc2c6b513b963af8a71be30b,2017-12-23 18:00:00 UTC,auto,hold,687,760,690,IL,Morris,47,False,False,False,Gas
2505339,78478f394aa7b96803e1023c519844514dfc4b03,2017-12-03 18:00:00 UTC,auto,hold,711,760,710,IL,Chicago,0,False,False,False,Gas
2505340,43bbc78f7f7484b4c2a3c626e7c8cae5391cdc3d,2017-12-18 14:05:00 UTC,heat,hold,708,760,710,IL,Des Plaines,70,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/IL/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/IL-day/2018-dec-day-IL.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5eb9875b26c28021f661dbf57628231e4caff086,2018-12-24 18:15:00 UTC,heat,hold,654,650,630,IL,Northfield,35,False,False,False,Gas
1,f5e6edc1431555d7b6e9f29fb8d6593beb5abe7d,2018-12-30 18:10:00 UTC,heat,hold,675,680,680,IL,Rockford,50,False,False,False,Gas
2,550e655492d25b067750c3da708f93670497113f,2018-12-12 19:10:00 UTC,auto,hold,669,760,670,IL,Orland Hills,20,False,False,False,Gas
3,818340911e3a5fda8c8d416f8423a19a4c7765e7,2018-12-01 18:50:00 UTC,auto,hold,694,750,700,IL,Brookfield,70,False,False,False,Gas
4,3f554ae4e46a6c110a5e44189fb36f3b0b1fdcd0,2018-12-20 15:40:00 UTC,heat,hold,691,680,680,IL,Arlington Heights,45,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3878589,6c619741abed3561861163e0137c6d52cecb0438,2018-12-20 12:05:00 UTC,heat,hold,691,690,690,IL,Libertyville,50,False,False,False,Gas
3878590,1d5eeb493d756b8a26f785854c4b80d8591509aa,2018-12-26 10:25:00 UTC,heat,auto,627,650,630,IL,Algonquin,10,False,False,False,Gas
3878591,4ae9c307787b1df37cc42e2576db9b045c353c2f,2018-12-04 15:50:00 UTC,heat,auto,689,690,690,IL,Frankfort,40,False,False,False,Gas
3878592,0407217809ad4ff83e1394d43e02bb72ee9e6028,2018-12-24 19:30:00 UTC,heat,auto,697,750,700,IL,Gurnee,40,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/IL/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/IL-day/2019-dec-day-IL.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4126f19d8b05ccd949b0fefd16ff37de7b9801f0,2019-12-15 18:35:00 UTC,heat,hold,697,700,700,IL,McHenry,20,True,False,False,Gas
1,4f5b40fb320bb44fd9f339de800bf47eb4237403,2019-12-23 16:50:00 UTC,heat,hold,713,710,710,IL,Chicago,48,True,False,False,Gas
2,fabbd3cc437507c1a82f554ea430fb9583f2c0cd,2019-12-06 18:30:00 UTC,heat,auto,712,716,690,IL,Batavia,20,False,False,False,Gas
3,4268a9c149be623fa5a8bbf1927db4a781e81bb1,2019-12-10 19:25:00 UTC,heat,hold,723,720,720,IL,Evanston,56,False,False,False,Gas
4,8b9702201f18f7c989543a0721f793904290c54f,2019-12-18 17:45:00 UTC,auto,hold,649,715,655,IL,Chicago,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4391903,705894b5b3a6171ca863fb98e8d5f115b6040d2e,2019-12-07 15:05:00 UTC,heat,auto,677,680,680,IL,Forest Park,0,True,False,False,Gas
4391904,4a93370b7b20738c72dbaad6d9470ee6108f0bac,2019-12-03 12:30:00 UTC,heat,hold,720,720,720,IL,Chicago,120,False,False,False,Gas
4391905,d4c9b202c53eac87b91cec23cdca44c037628463,2019-12-31 18:15:00 UTC,heat,hold,715,720,720,IL,Crete,49,True,False,False,Gas
4391906,265d64619e3685bd9e34bdefbb7e0cc1c4901405,2019-12-20 13:30:00 UTC,heat,hold,655,660,660,IL,Shorewood,20,True,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/IL/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/IL-day/2020-dec-day-IL.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1e634775c4128fc9b46924fe84ac8ba229846098,2020-12-20 18:20:00 UTC,heat,auto,666,820,680,IL,Chicago,60,False,False,False,Gas
1,f9d98d80cb44bee2e11d7453cf34f22a3f99ca3f,2020-12-12 15:35:00 UTC,auto,auto,754,840,750,IL,Naperville,29,False,False,False,Gas
2,ca4c716f1eef8fb8780713828fada9db3fb4f20e,2020-12-06 16:10:00 UTC,heat,auto,673,694,680,IL,Chicago,66,False,False,False,Gas
4,7e188311d65888436b8e5799264dcf26ce203492,2020-12-06 18:10:00 UTC,heat,hold,648,652,652,IL,Palatine,60,True,False,False,Gas
5,f7c229a1a4b765e9c4778954737d8d40dd920696,2020-12-11 14:45:00 UTC,heat,auto,711,722,720,IL,Wilmette,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3811196,09b6bff6520b4adc9198dfddbebd5461adc87aad,2020-12-02 16:00:00 UTC,auto,hold,667,760,670,IL,Western Springs,27,False,False,False,Gas
3811197,457d349b856dedba849573ed6d0b2970fb338f98,2020-12-18 19:40:00 UTC,cool,hold,623,760,760,IL,Pekin,0,False,False,False,Gas
3811198,06d1fff86a7f5bc7f22e3bfbb60c7ed3374a2811,2020-12-25 15:50:00 UTC,auto,hold,714,760,710,IL,Chicago,10,False,False,False,Gas
3811199,a0bd4ef76fd923d8053204d7c0125c5cb9132aea,2020-12-14 18:25:00 UTC,auto,hold,710,760,710,IL,Cary,0,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/IL/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IL/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IL_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IL/dec/" + file)
    IL_dec = pd.concat([IL_dec, df])
    
IL_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0010ea447dc3807b33daad2477d1cc02a71571fa,dec,2017,auto,hold,Chicago,700.243902,770.000000,700.109756,5.0,False,False,False
1,0010ea447dc3807b33daad2477d1cc02a71571fa,dec,2017,heat,auto,Chicago,697.666667,700.000000,700.000000,5.0,False,False,False
2,002ba06a2fbc44349736a758c25eb6755d270f15,dec,2017,heat,auto,Niles,731.167464,733.753589,731.540670,60.0,False,False,False
3,002ba06a2fbc44349736a758c25eb6755d270f15,dec,2017,heat,hold,Niles,732.558418,733.745891,728.924922,60.0,False,False,False
4,003564d98607c25fc8a655b397103a9f54c99bb6,dec,2017,heat,auto,Decatur,645.666667,720.000000,713.455556,10.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5351,ff871c58b8da73a6dcaab352b09eccd97c74392f,dec,2020,heat,hold,Carol Stream,731.603261,732.864130,732.864130,0.0,False,False,False
5352,ff9c4b83c127c974334826057c0eca1ad0bf5448,dec,2020,heat,auto,Hanover Park,712.993892,727.410995,686.485166,35.0,False,False,False
5353,ff9c4b83c127c974334826057c0eca1ad0bf5448,dec,2020,heat,hold,Hanover Park,691.411215,695.535826,693.323988,35.0,False,False,False
5354,ffb8d298fb8a6ec102607d24292aa1212022b338,dec,2020,heat,auto,McHenry,680.556575,684.391437,684.336391,20.0,False,False,True


In [187]:
IL_dec.to_csv("Scraper_Output/State_Month_Day/IL/IL_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/IL/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IL_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/IL/" + file)
    IL_all = pd.concat([IL_all, df])
    
IL_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0010ea447dc3807b33daad2477d1cc02a71571fa,aug,2017,auto,auto,Chicago,730.480000,739.880000,671.213333,5.0,False,False,False
1,0010ea447dc3807b33daad2477d1cc02a71571fa,aug,2017,auto,hold,Chicago,738.208333,753.333333,670.000000,5.0,False,False,False
2,0010ea447dc3807b33daad2477d1cc02a71571fa,aug,2017,cool,auto,Chicago,736.707547,747.169811,740.000000,5.0,False,False,False
3,0010ea447dc3807b33daad2477d1cc02a71571fa,aug,2017,cool,hold,Chicago,737.458333,740.000000,740.000000,5.0,False,False,False
4,002ba06a2fbc44349736a758c25eb6755d270f15,aug,2017,cool,auto,Niles,758.636364,760.466403,740.944664,60.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
25424,ff871c58b8da73a6dcaab352b09eccd97c74392f,jun,2021,auto,hold,Carol Stream,751.200000,772.148148,711.081481,0.0,False,False,False
25425,ff9c4b83c127c974334826057c0eca1ad0bf5448,jun,2021,cool,hold,Hanover Park,740.541491,741.421941,741.514768,35.0,False,False,False
25426,ffb8d298fb8a6ec102607d24292aa1212022b338,jun,2021,cool,hold,McHenry,703.812749,698.549801,698.183267,20.0,False,False,True
25427,fffad25219fec06f6ad101ceda18ea4e00693b2c,jun,2021,auto,hold,Wilmette,692.341463,760.000000,670.000000,110.0,True,False,False


In [190]:
IL_all.to_csv("Scraper_Output/State_Month_Day/IL_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mILe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['IL']
Unique jan_2018: ['IL']
Unique jan_2019: ['IL']
Unique jan_2020: ['IL']
Unique jan_2021: ['IL']
Unique feb_2017: ['IL']
Unique feb_2018: ['IL']
Unique feb_2019: ['IL']
Unique feb_2020: ['IL']
Unique feb_2021: ['IL']
Unique jun_2017: ['IL']
Unique jun_2018: ['IL']
Unique jun_2019: ['IL']
Unique jun_2020: ['IL']
Unique jun_2021: ['IL']
Unique jul_2017: ['IL']
Unique jul_2018: ['IL']
Unique jul_2019: ['IL']
Unique jul_2020: ['IL']
Unique jul_2021: ['IL']
Unique aug_2017: ['IL']
Unique aug_2018: ['IL']
Unique aug_2019: ['IL']
Unique aug_2020: ['IL']
Unique dec_2017: ['IL']
Unique dec_2018: ['IL']
Unique dec_2019: ['IL']
Unique dec_2020: ['IL']
