# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/IA-day/2017-jan-day-IA.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9cdf07bf05c68999fed6b3c4735c869da3f96167,2017-01-01 12:55:00 UTC,heat,auto,692,806,661,IA,Olin,100,False,False,False,Gas
1,618a1cd58726e0ba7ae41f37ed080b89db721ddd,2017-01-15 13:55:00 UTC,heat,hold,705,711,698,IA,Polk City,5,False,False,False,Gas
2,e2cc939214361dc28cd1e2baeb1aa4fbc6aafaab,2017-01-15 19:40:00 UTC,heat,hold,668,713,630,IA,Hudson,55,False,False,False,Gas
3,db3615fbc0e098d59cffc392a0120c88f521f47c,2017-01-07 13:35:00 UTC,heat,auto,655,686,686,IA,Marshalltown,0,False,False,False,Gas
4,db3615fbc0e098d59cffc392a0120c88f521f47c,2017-01-17 14:35:00 UTC,heat,auto,672,684,684,IA,Marshalltown,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74113,f19e2d05f34852989558aabaa00ffd2be2201bb5,2017-01-10 15:20:00 UTC,auto,hold,696,760,710,IA,West Des Moines,60,False,False,False,Gas
74114,f19e2d05f34852989558aabaa00ffd2be2201bb5,2017-01-05 17:15:00 UTC,auto,hold,704,760,710,IA,West Des Moines,60,False,False,False,Gas
74115,f19e2d05f34852989558aabaa00ffd2be2201bb5,2017-01-10 18:25:00 UTC,auto,hold,708,760,710,IA,West Des Moines,60,False,False,False,Gas
74116,f19e2d05f34852989558aabaa00ffd2be2201bb5,2017-01-10 16:10:00 UTC,auto,hold,704,760,710,IA,West Des Moines,60,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,Jan,2017,heat,auto,Waverly,697.352022,706.525735,706.525735,10.0,False,False,False
0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,Jan,2017,heat,hold,Waverly,698.974736,708.679788,708.679788,10.0,False,False,False
0219b339a36deb299cb64ff76ba40ac7563cf3af,Jan,2017,auto,hold,Marion,666.954545,770.000000,669.909091,15.0,False,False,False
04748c1d6262989d882b588289f9ea9f2ea31f34,Jan,2017,heat,auto,Council Bluffs,683.478261,689.826087,686.304348,60.0,False,False,False
04748c1d6262989d882b588289f9ea9f2ea31f34,Jan,2017,heat,hold,Council Bluffs,654.029083,661.953020,656.261745,60.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fb8938c98ae20844354501d57ec0090a7f0979c8,Jan,2017,heat,auto,Merrill,687.383562,727.397260,727.082192,10.0,False,False,True
fb8938c98ae20844354501d57ec0090a7f0979c8,Jan,2017,heat,hold,Merrill,686.116564,688.668712,686.404908,10.0,False,False,True
fec7ad1565dc1d0150265f38966867f7c57496e8,Jan,2017,heat,auto,Urbandale,688.832013,689.947702,689.889065,10.0,False,False,False
fec7ad1565dc1d0150265f38966867f7c57496e8,Jan,2017,heat,hold,Urbandale,687.269895,689.580058,689.491371,10.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/IA/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/IA-day/2018-jan-day-IA.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,26b74f9c7029445f7320f9c318d952fe98ccb17f,2018-01-02 15:05:00 UTC,heat,hold,664,680,680,IA,Iowa City,27,False,False,False,Gas
1,05e484e2c3f0c714409b8a5c7a04cbbab3d6b4b6,2018-01-15 17:55:00 UTC,heat,auto,672,680,680,IA,Iowa City,0,False,False,False,Gas
2,26b74f9c7029445f7320f9c318d952fe98ccb17f,2018-01-02 14:35:00 UTC,heat,hold,686,680,680,IA,Iowa City,27,False,False,False,Gas
3,26b74f9c7029445f7320f9c318d952fe98ccb17f,2018-01-23 17:55:00 UTC,heat,auto,631,680,630,IA,Iowa City,27,False,False,False,Gas
4,26b74f9c7029445f7320f9c318d952fe98ccb17f,2018-01-22 18:45:00 UTC,heat,hold,664,680,630,IA,Iowa City,27,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185074,d98988d852ac3404849b51f3c49df701ccad3115,2018-01-17 16:00:00 UTC,heat,hold,714,720,720,IA,Clive,27,False,False,False,Gas
185075,d98988d852ac3404849b51f3c49df701ccad3115,2018-01-02 19:45:00 UTC,heat,hold,717,720,720,IA,Clive,27,False,False,False,Gas
185076,d98988d852ac3404849b51f3c49df701ccad3115,2018-01-03 13:20:00 UTC,heat,hold,718,720,720,IA,Clive,27,False,False,False,Gas
185077,d98988d852ac3404849b51f3c49df701ccad3115,2018-01-22 19:40:00 UTC,heat,hold,718,720,720,IA,Clive,27,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/IA/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/IA-day/2019-jan-day-IA.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b9e2ef215771757d91ab7840842181cba2025493,2019-01-05 12:35:00 UTC,heat,hold,682,685,685,IA,Le Claire,0,False,False,False,Gas
1,2a85265c90ed94623c27c031d21b8288f1d5553b,2019-01-09 14:45:00 UTC,heat,hold,666,669,669,IA,Carlisle,50,True,False,False,Gas
2,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2019-01-07 13:40:00 UTC,heat,hold,667,663,663,IA,Oakland,40,True,False,True,Electric
3,3f968fefc702e3f14565b0d49ef25f5abed49826,2019-01-31 15:05:00 UTC,auto,auto,727,840,730,IA,Iowa Falls,65,False,False,False,Gas
4,9cdf07bf05c68999fed6b3c4735c869da3f96167,2019-01-25 11:00:00 UTC,heat,hold,694,771,699,IA,Olin,100,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
282443,93a0a18c24e8d84735c5c98072cfe010d6531c79,2019-01-27 19:20:00 UTC,auto,auto,660,760,650,IA,West Des Moines,0,False,False,False,Gas
282444,93a0a18c24e8d84735c5c98072cfe010d6531c79,2019-01-05 18:30:00 UTC,auto,hold,696,760,650,IA,West Des Moines,0,False,False,False,Gas
282445,93a0a18c24e8d84735c5c98072cfe010d6531c79,2019-01-27 16:25:00 UTC,auto,auto,643,760,650,IA,West Des Moines,0,False,False,False,Gas
282446,a8a880818c48bea1a228fe52df5ad29ab280a3a0,2019-01-05 15:40:00 UTC,auto,auto,689,760,700,IA,West Des Moines,45,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/IA/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/IA-day/2020-jan-day-IA.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2020-01-17 07:20:00 UTC,heat,hold,664,655,655,IA,Oakland,40,True,False,True,Electric
1,3e804b7c50c47f1d1577f4359e15a3b510b878ae,2020-01-24 19:20:00 UTC,heat,auto,639,640,640,IA,Marshalltown,0,True,False,False,Gas
2,b5b653bbc302fbf165ceb3506bce80292651c749,2020-01-20 11:30:00 UTC,heat,hold,702,703,703,IA,Dyersville,50,False,False,False,Gas
3,b5b653bbc302fbf165ceb3506bce80292651c749,2020-01-14 12:55:00 UTC,heat,hold,674,683,683,IA,Dyersville,50,False,False,False,Gas
4,b5b653bbc302fbf165ceb3506bce80292651c749,2020-01-22 12:50:00 UTC,heat,hold,663,673,673,IA,Dyersville,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
349796,1bc2af7ffc3a1003d6cd351658e08f42be4879f1,2020-01-23 19:30:00 UTC,heat,auto,659,760,650,IA,Council Bluffs,15,True,False,False,Gas
349797,8c945733d3c9bf887b7428e38a394ca325b5703e,2020-01-31 14:45:00 UTC,auto,auto,708,760,710,IA,West Des Moines,5,False,False,False,Gas
349798,8c945733d3c9bf887b7428e38a394ca325b5703e,2020-01-31 15:10:00 UTC,auto,auto,704,760,710,IA,West Des Moines,5,False,False,False,Gas
349799,8c945733d3c9bf887b7428e38a394ca325b5703e,2020-01-31 14:35:00 UTC,auto,auto,710,760,710,IA,West Des Moines,5,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/IA/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/IA-day/2021-jan-day-IA.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,f3ad4dff0220c79819a8b985c9b92e67ffbf3c93,2021-01-12 12:55:00 UTC,heat,hold,745,749,749,IA,Minden,19,False,False,False,Gas
2,115abc163f2cee34744f73c4ef4e706404e8d6ca,2021-01-03 16:25:00 UTC,heat,hold,711,717,717,IA,Holland,120,False,False,False,Gas
3,387cc17b3a7631fab770fc7513e8ed49c51de085,2021-01-26 19:55:00 UTC,auto,hold,693,805,685,IA,Sumner,105,False,False,False,Gas
4,55060d4461379c6a0dd3503944c975a5282575cc,2021-01-03 19:25:00 UTC,auto,hold,666,688,668,IA,Blue Grass,9,True,False,False,Gas
5,55060d4461379c6a0dd3503944c975a5282575cc,2021-01-04 18:45:00 UTC,auto,hold,706,718,698,IA,Blue Grass,9,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205221,9752aa08304d8b3bfce5b55b9a19ea5f3888ab7a,2021-01-03 15:10:00 UTC,auto,hold,706,760,710,IA,Sioux City,10,False,False,False,Gas
205222,9752aa08304d8b3bfce5b55b9a19ea5f3888ab7a,2021-01-12 12:45:00 UTC,auto,hold,702,760,710,IA,Sioux City,10,False,False,False,Gas
205223,9752aa08304d8b3bfce5b55b9a19ea5f3888ab7a,2021-01-14 12:35:00 UTC,auto,hold,704,760,710,IA,Sioux City,10,False,False,False,Gas
205224,9752aa08304d8b3bfce5b55b9a19ea5f3888ab7a,2021-01-12 13:05:00 UTC,auto,hold,709,760,710,IA,Sioux City,10,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/IA/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IA/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IA_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IA/jan/" + file)
    IA_jan = pd.concat([IA_jan, df])
    
IA_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,Jan,2017,heat,auto,Waverly,697.352022,706.525735,706.525735,10.0,False,False,False
1,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,Jan,2017,heat,hold,Waverly,698.974736,708.679788,708.679788,10.0,False,False,False
2,0219b339a36deb299cb64ff76ba40ac7563cf3af,Jan,2017,auto,hold,Marion,666.954545,770.000000,669.909091,15.0,False,False,False
3,04748c1d6262989d882b588289f9ea9f2ea31f34,Jan,2017,heat,auto,Council Bluffs,683.478261,689.826087,686.304348,60.0,False,False,False
4,04748c1d6262989d882b588289f9ea9f2ea31f34,Jan,2017,heat,hold,Council Bluffs,654.029083,661.953020,656.261745,60.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
281,fb41a0af656a964b21c262055f90f9f6bab23319,Jan,2021,auto,hold,Iowa City,708.936771,740.000000,710.000000,10.0,False,False,False
282,fb8938c98ae20844354501d57ec0090a7f0979c8,Jan,2021,heat,hold,Merrill,689.994652,697.379679,697.379679,10.0,False,False,True
283,fd3151094f47b028063bbc5319e34de07f7bb894,Jan,2021,auto,hold,Clarinda,694.926547,751.452285,697.408329,117.0,False,False,False
284,fdc4237ba28b6852387dd87acd303bdb70d5a981,Jan,2021,heat,hold,Johnston,678.626506,680.000000,680.000000,25.0,False,False,False


In [34]:
IA_jan.to_csv("Scraper_Output/State_Month_Day/IA/IA_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/IA-day/2017-feb-day-IA.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,db3615fbc0e098d59cffc392a0120c88f521f47c,2017-02-04 17:25:00 UTC,heat,hold,654,697,697,IA,Marshalltown,0,False,False,False,Gas
5,60908ee4cb080fe8eab56bef806f96f09237eb2d,2017-02-16 18:00:00 UTC,heat,auto,687,659,659,IA,Perry,20,True,False,False,Gas
16,db3615fbc0e098d59cffc392a0120c88f521f47c,2017-02-04 18:35:00 UTC,heat,hold,695,697,697,IA,Marshalltown,0,False,False,False,Gas
17,618a1cd58726e0ba7ae41f37ed080b89db721ddd,2017-02-15 12:25:00 UTC,heat,hold,686,721,690,IA,Polk City,5,False,False,False,Gas
18,9cdf07bf05c68999fed6b3c4735c869da3f96167,2017-02-06 11:05:00 UTC,heat,auto,661,679,679,IA,Olin,100,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63050,09457d11bf174c8689162e31090972cdc42638e0,2017-02-06 12:10:00 UTC,heat,auto,717,760,720,IA,Council Bluffs,15,False,False,False,Gas
63051,09457d11bf174c8689162e31090972cdc42638e0,2017-02-06 11:55:00 UTC,heat,auto,722,760,720,IA,Council Bluffs,15,False,False,False,Gas
63052,09457d11bf174c8689162e31090972cdc42638e0,2017-02-06 12:55:00 UTC,heat,auto,724,760,720,IA,Council Bluffs,15,False,False,False,Gas
63053,09457d11bf174c8689162e31090972cdc42638e0,2017-02-06 11:35:00 UTC,heat,auto,715,760,720,IA,Council Bluffs,15,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/IA/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/IA-day/2018-feb-day-IA.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ea86d78c6516627457a46dd2f40350ffbf02904f,2018-02-05 18:45:00 UTC,auto,hold,731,767,717,IA,Calamus,10,True,False,False,Gas
1,a946396c7089e0bc0d1e7a47d9d5916508a78a91,2018-02-01 13:55:00 UTC,heat,hold,672,685,685,IA,Hiawatha,0,False,False,False,Gas
2,ea86d78c6516627457a46dd2f40350ffbf02904f,2018-02-21 07:45:00 UTC,cool,hold,733,744,744,IA,Calamus,10,True,False,False,Gas
3,ea86d78c6516627457a46dd2f40350ffbf02904f,2018-02-12 08:15:00 UTC,auto,auto,701,755,705,IA,Calamus,10,True,False,False,Gas
4,ea86d78c6516627457a46dd2f40350ffbf02904f,2018-02-05 19:55:00 UTC,auto,hold,732,767,717,IA,Calamus,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
168464,340eed7d6fe24222b37f77710f5a3f3b5cb34707,2018-02-01 17:20:00 UTC,auto,auto,702,760,710,IA,Council Bluffs,117,False,False,False,Gas
168465,340eed7d6fe24222b37f77710f5a3f3b5cb34707,2018-02-01 16:15:00 UTC,auto,auto,706,760,710,IA,Council Bluffs,117,False,False,False,Gas
168466,340eed7d6fe24222b37f77710f5a3f3b5cb34707,2018-02-01 17:25:00 UTC,auto,auto,708,760,710,IA,Council Bluffs,117,False,False,False,Gas
168467,93a0a18c24e8d84735c5c98072cfe010d6531c79,2018-02-25 15:25:00 UTC,auto,hold,651,760,650,IA,West Des Moines,0,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/IA/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/IA-day/2019-feb-day-IA.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b17e6d5535d0fe4c02e6e7a3ff609069f344ea09,2019-02-17 17:15:00 UTC,heat,auto,692,801,690,IA,Irwin,90,False,False,False,Gas
1,b17e6d5535d0fe4c02e6e7a3ff609069f344ea09,2019-02-08 15:30:00 UTC,heat,auto,684,801,690,IA,Irwin,90,False,False,False,Gas
2,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2019-02-14 18:50:00 UTC,heat,hold,663,668,668,IA,Oakland,40,True,False,True,Electric
3,b17e6d5535d0fe4c02e6e7a3ff609069f344ea09,2019-02-12 10:40:00 UTC,heat,auto,686,801,690,IA,Irwin,90,False,False,False,Gas
4,be340c3e85d37c9be71d1dbb6e65cccfb806e417,2019-02-13 12:10:00 UTC,heat,hold,718,712,712,IA,Danville,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
218199,820aaf5cb08a64473ecc63617828766be91d80bf,2019-02-05 12:20:00 UTC,auto,hold,699,760,700,IA,West Des Moines,50,False,False,False,Gas
218200,820aaf5cb08a64473ecc63617828766be91d80bf,2019-02-02 18:25:00 UTC,auto,hold,693,760,700,IA,West Des Moines,50,False,False,False,Gas
218201,8c945733d3c9bf887b7428e38a394ca325b5703e,2019-02-08 18:15:00 UTC,auto,hold,687,760,690,IA,West Des Moines,5,False,False,False,Gas
218202,820aaf5cb08a64473ecc63617828766be91d80bf,2019-02-02 14:40:00 UTC,auto,hold,695,760,700,IA,West Des Moines,50,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/IA/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/IA-day/2020-feb-day-IA.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3e804b7c50c47f1d1577f4359e15a3b510b878ae,2020-02-01 19:15:00 UTC,heat,hold,638,640,640,IA,Marshalltown,0,True,False,False,Gas
1,ef000107b4aea93eda4a8ad7052a569124c59486,2020-02-18 19:50:00 UTC,heat,hold,656,640,640,IA,Boone,5,False,False,False,Gas
2,d36d923dd7afafed7e9b818f553b9f3515b807d2,2020-02-08 19:00:00 UTC,auto,auto,727,810,710,IA,Marshalltown,0,False,False,False,Gas
3,db3615fbc0e098d59cffc392a0120c88f521f47c,2020-02-03 18:35:00 UTC,heat,auto,707,718,690,IA,Marshalltown,0,False,False,False,Gas
4,3e804b7c50c47f1d1577f4359e15a3b510b878ae,2020-02-01 16:05:00 UTC,heat,hold,636,640,640,IA,Marshalltown,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
323282,d8745177d49c76b530d5007b49ccb8d9012fc7f7,2020-02-01 16:00:00 UTC,heat,auto,708,760,690,IA,West Des Moines,5,True,False,True,Electric
323283,d8745177d49c76b530d5007b49ccb8d9012fc7f7,2020-02-01 19:35:00 UTC,heat,auto,699,760,690,IA,West Des Moines,5,True,False,True,Electric
323284,d8745177d49c76b530d5007b49ccb8d9012fc7f7,2020-02-01 17:50:00 UTC,heat,auto,701,760,690,IA,West Des Moines,5,True,False,True,Electric
323285,d8745177d49c76b530d5007b49ccb8d9012fc7f7,2020-02-01 15:55:00 UTC,heat,auto,709,760,690,IA,West Des Moines,5,True,False,True,Electric


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/IA/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/IA-day/2021-feb-day-IA.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2021-02-28 12:15:00 UTC,auxHeatOnly,hold,656,662,662,IA,Oakland,40,True,False,True,Electric
2,ea86d78c6516627457a46dd2f40350ffbf02904f,2021-02-12 17:20:00 UTC,heat,hold,726,731,731,IA,Calamus,10,True,False,False,Gas
3,ea86d78c6516627457a46dd2f40350ffbf02904f,2021-02-15 16:15:00 UTC,heat,hold,728,731,731,IA,Calamus,10,True,False,False,Gas
4,fb8938c98ae20844354501d57ec0090a7f0979c8,2021-02-11 16:55:00 UTC,heat,hold,674,675,675,IA,Merrill,10,False,False,True,Electric
5,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2021-02-27 16:10:00 UTC,auxHeatOnly,hold,661,665,665,IA,Oakland,40,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187276,b9f68c22df2ccec4f7c9a8cd4db33d5b019de21f,2021-02-28 19:40:00 UTC,auto,hold,688,760,690,IA,North Liberty,10,False,False,False,Gas
187277,b9f68c22df2ccec4f7c9a8cd4db33d5b019de21f,2021-02-26 15:15:00 UTC,auto,hold,684,760,690,IA,North Liberty,10,False,False,False,Gas
187278,b9f68c22df2ccec4f7c9a8cd4db33d5b019de21f,2021-02-28 16:30:00 UTC,auto,hold,685,760,690,IA,North Liberty,10,False,False,False,Gas
187279,b9f68c22df2ccec4f7c9a8cd4db33d5b019de21f,2021-02-27 15:05:00 UTC,auto,hold,687,760,690,IA,North Liberty,10,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/IA/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IA/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IA_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IA/feb/" + file)
    IA_feb = pd.concat([IA_feb, df])
    
IA_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,feb,2017,heat,auto,Waverly,702.193920,711.691824,711.131027,10.0,False,False,False
1,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,feb,2017,heat,hold,Waverly,702.865643,712.233525,712.233525,10.0,False,False,False
2,0219b339a36deb299cb64ff76ba40ac7563cf3af,feb,2017,auto,auto,Marion,656.500000,770.000000,660.000000,15.0,False,False,False
3,0219b339a36deb299cb64ff76ba40ac7563cf3af,feb,2017,auto,hold,Marion,652.939024,770.000000,639.268293,15.0,False,False,False
4,04748c1d6262989d882b588289f9ea9f2ea31f34,feb,2017,heat,hold,Council Bluffs,678.139535,680.279070,679.953488,60.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
295,fb42a67ac2b382b245d1b82048afc0fc245afcff,feb,2021,heat,hold,West Des Moines,718.756098,731.926829,731.780488,36.0,False,False,False
296,fb8938c98ae20844354501d57ec0090a7f0979c8,feb,2021,heat,hold,Merrill,673.489121,679.460400,679.422977,10.0,False,False,True
297,fd3151094f47b028063bbc5319e34de07f7bb894,feb,2021,auto,hold,Clarinda,686.300637,756.360510,688.152866,117.0,False,False,False
298,fdc4237ba28b6852387dd87acd303bdb70d5a981,feb,2021,heat,hold,Johnston,686.206897,690.000000,690.000000,25.0,False,False,False


In [67]:
IA_feb.to_csv("Scraper_Output/State_Month_Day/IA/IA_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/IA-day/2017-jun-day-IA.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,db3615fbc0e098d59cffc392a0120c88f521f47c,2017-06-08 12:00:00 UTC,cool,hold,687,699,699,IA,Marshalltown,0,False,False,False,Gas
1,db3615fbc0e098d59cffc392a0120c88f521f47c,2017-06-06 14:05:00 UTC,cool,hold,688,699,699,IA,Marshalltown,0,False,False,False,Gas
2,db3615fbc0e098d59cffc392a0120c88f521f47c,2017-06-28 18:30:00 UTC,cool,hold,691,689,689,IA,Marshalltown,0,False,False,False,Gas
3,db3615fbc0e098d59cffc392a0120c88f521f47c,2017-06-09 18:35:00 UTC,cool,hold,703,699,699,IA,Marshalltown,0,False,False,False,Gas
4,db3615fbc0e098d59cffc392a0120c88f521f47c,2017-06-06 13:35:00 UTC,cool,hold,687,699,699,IA,Marshalltown,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103227,15570377c8ec7729829ad9d26b4b89b0ee77a4c2,2017-06-04 16:05:00 UTC,cool,hold,755,760,760,IA,West Des Moines,0,False,False,False,Gas
103228,3cda433b46157ff81f30ef99cc5ca29d5a40d4f6,2017-06-12 13:10:00 UTC,cool,hold,763,760,750,IA,West Des Moines,20,False,False,False,Gas
103229,3cda433b46157ff81f30ef99cc5ca29d5a40d4f6,2017-06-12 13:40:00 UTC,cool,hold,763,760,750,IA,West Des Moines,20,False,False,False,Gas
103230,3cda433b46157ff81f30ef99cc5ca29d5a40d4f6,2017-06-03 15:20:00 UTC,cool,auto,762,760,640,IA,West Des Moines,20,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/IA/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/IA-day/2018-jun-day-IA.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b0add72163cde3e6d6698700448305649cc44d66,2018-06-17 19:20:00 UTC,cool,hold,759,810,750,IA,Hawarden,50,False,False,False,Gas
1,387cc17b3a7631fab770fc7513e8ed49c51de085,2018-06-07 16:35:00 UTC,auto,hold,721,773,650,IA,Sumner,105,False,False,False,Gas
2,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2018-06-26 17:50:00 UTC,cool,hold,723,728,728,IA,Oakland,40,True,False,True,Electric
3,6a73fea06997171e07adeee7076149165a8f2171,2018-06-23 14:05:00 UTC,auto,auto,774,830,620,IA,Moville,0,True,False,True,Electric
4,b0add72163cde3e6d6698700448305649cc44d66,2018-06-23 12:40:00 UTC,cool,auto,733,840,750,IA,Hawarden,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
253565,3cda433b46157ff81f30ef99cc5ca29d5a40d4f6,2018-06-08 19:05:00 UTC,cool,auto,757,760,740,IA,West Des Moines,20,False,False,False,Gas
253566,3cda433b46157ff81f30ef99cc5ca29d5a40d4f6,2018-06-06 18:55:00 UTC,cool,auto,759,760,740,IA,West Des Moines,20,False,False,False,Gas
253567,1aaf7e53ad637b5cb0738eb355b9d9cbc860de75,2018-06-22 19:20:00 UTC,cool,hold,743,760,760,IA,West Des Moines,17,False,False,False,Gas
253568,15570377c8ec7729829ad9d26b4b89b0ee77a4c2,2018-06-17 14:10:00 UTC,cool,auto,768,760,760,IA,West Des Moines,0,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/IA/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/IA-day/2019-jun-day-IA.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9a9117903d77707ab6698ac30ef7b56bfdead8bc,2019-06-16 10:00:00 UTC,cool,hold,750,810,700,IA,Shellsburg,17,False,False,False,Gas
2,bd6db4712588e6c7641b32e38005cb4a449d0528,2019-06-29 16:50:00 UTC,auto,auto,744,810,650,IA,Le Claire,0,True,False,False,Gas
6,1f9ce2164b32a4cd2427670d136fbd04b4d3eb41,2019-06-30 16:30:00 UTC,cool,hold,747,723,723,IA,Orange City,60,False,False,False,Gas
8,395dc28118cb981c3785e7b2517123ff0f127ad4,2019-06-09 14:10:00 UTC,cool,hold,662,610,610,IA,Manchester,10,True,False,False,Gas
9,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2019-06-10 12:25:00 UTC,cool,hold,667,663,663,IA,Oakland,40,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
365160,c3021da4802104915dbce311fdd9cad30d6e1041,2019-06-27 12:40:00 UTC,cool,hold,724,720,720,IA,Clive,30,True,False,False,Gas
365161,c3021da4802104915dbce311fdd9cad30d6e1041,2019-06-11 13:50:00 UTC,cool,hold,695,720,720,IA,Clive,30,True,False,False,Gas
365162,c3021da4802104915dbce311fdd9cad30d6e1041,2019-06-30 19:25:00 UTC,cool,auto,725,720,720,IA,Clive,30,True,False,False,Gas
365163,c3021da4802104915dbce311fdd9cad30d6e1041,2019-06-28 16:00:00 UTC,cool,hold,724,720,720,IA,Clive,30,True,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/IA/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/IA-day/2020-jun-day-IA.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,395dc28118cb981c3785e7b2517123ff0f127ad4,2020-06-21 14:25:00 UTC,cool,hold,680,677,677,IA,Manchester,10,True,False,False,Gas
1,ab51b7cb609f25191e89d3a6245d2cada605ead5,2020-06-28 17:20:00 UTC,cool,hold,740,723,723,IA,Marengo,55,False,False,False,Gas
2,1905a19280914630a1ba1bc0498e249f413c51da,2020-06-01 18:00:00 UTC,cool,auto,735,748,748,IA,Boone,0,False,False,False,Gas
3,722299a210bf07423b6ee8558d0f3d6c7dd77596,2020-06-05 11:45:00 UTC,cool,auto,708,694,694,IA,Woden,105,True,False,False,Gas
4,2ffe2b8c50cd1d719fd7c583ee2c580ca64078ab,2020-06-13 16:35:00 UTC,cool,hold,701,676,662,IA,Riverside,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387388,fb42a67ac2b382b245d1b82048afc0fc245afcff,2020-06-09 12:45:00 UTC,cool,hold,759,760,760,IA,West Des Moines,36,False,False,False,Gas
387389,fb42a67ac2b382b245d1b82048afc0fc245afcff,2020-06-06 14:35:00 UTC,cool,hold,759,760,760,IA,West Des Moines,36,False,False,False,Gas
387390,fb42a67ac2b382b245d1b82048afc0fc245afcff,2020-06-07 18:40:00 UTC,cool,hold,765,760,760,IA,West Des Moines,36,False,False,False,Gas
387391,fb42a67ac2b382b245d1b82048afc0fc245afcff,2020-06-29 18:40:00 UTC,cool,hold,763,760,760,IA,West Des Moines,36,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/IA/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/IA-day/2021-jun-day-IA.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ea86d78c6516627457a46dd2f40350ffbf02904f,2021-06-14 19:45:00 UTC,cool,hold,784,775,775,IA,Calamus,10,True,False,False,Gas
1,d7ef658a56f2cfa9a57606482271f4221fba02c8,2021-06-18 12:50:00 UTC,auto,hold,741,736,686,IA,Mount Vernon,25,True,False,False,Gas
2,ea86d78c6516627457a46dd2f40350ffbf02904f,2021-06-01 16:25:00 UTC,auto,hold,733,805,715,IA,Calamus,10,True,False,False,Gas
3,d7ef658a56f2cfa9a57606482271f4221fba02c8,2021-06-20 11:25:00 UTC,auto,hold,733,736,686,IA,Mount Vernon,25,True,False,False,Gas
4,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2021-06-19 18:40:00 UTC,cool,hold,706,719,719,IA,Oakland,40,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237827,4ea1992e70540e1aafd0111621d79a295c8a08b0,2021-06-22 18:55:00 UTC,cool,hold,729,760,760,IA,West Des Moines,10,False,False,False,Gas
237828,4469fc71de27689530494c612bca61b80577a3b7,2021-06-09 19:50:00 UTC,auto,hold,761,760,680,IA,West Des Moines,29,True,False,False,Gas
237829,b6abbaa1e5ba05dfb03ce32f653a5daf324fe7b9,2021-06-13 17:15:00 UTC,cool,hold,762,760,760,IA,West Des Moines,20,False,False,False,Gas
237830,4ea1992e70540e1aafd0111621d79a295c8a08b0,2021-06-17 18:20:00 UTC,cool,hold,719,760,760,IA,West Des Moines,10,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/IA/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IA/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IA_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IA/jun/" + file)
    IA_jun = pd.concat([IA_jun, df])
    
IA_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,jun,2017,cool,auto,Waverly,727.405780,735.456352,727.477423,10.0,False,False,False
1,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,jun,2017,cool,hold,Waverly,726.662298,739.672321,739.582113,10.0,False,False,False
2,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,jun,2017,heat,auto,Waverly,719.372549,690.000000,690.000000,10.0,False,False,False
3,04748c1d6262989d882b588289f9ea9f2ea31f34,jun,2017,cool,hold,Council Bluffs,720.961538,720.000000,718.923077,60.0,False,False,False
4,0638155588f2185bdefa7062451edb8f490350d6,jun,2017,cool,hold,Ames,731.268817,738.709677,738.709677,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
341,fb41a0af656a964b21c262055f90f9f6bab23319,jun,2021,auto,hold,Iowa City,735.125000,740.000000,710.000000,10.0,False,False,False
342,fb8938c98ae20844354501d57ec0090a7f0979c8,jun,2021,cool,hold,Merrill,700.514286,691.600000,690.457143,10.0,False,False,True
343,fd3151094f47b028063bbc5319e34de07f7bb894,jun,2021,auto,hold,Clarinda,739.681933,743.048817,673.042778,117.0,False,False,False
344,ffb651a666a075042c233d899e286134eef0d150,jun,2021,auto,hold,Cedar Rapids,786.275862,780.000000,730.000000,0.0,False,False,False


In [100]:
IA_jun.to_csv("Scraper_Output/State_Month_Day/IA/IA_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/IA-day/2017-jul-day-IA.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,115abc163f2cee34744f73c4ef4e706404e8d6ca,2017-07-10 11:25:00 UTC,cool,hold,721,744,605,IA,Holland,120,False,False,False,Gas
1,6a73fea06997171e07adeee7076149165a8f2171,2017-07-29 14:50:00 UTC,cool,auto,779,830,620,IA,Moville,0,True,False,True,Electric
2,e2cc939214361dc28cd1e2baeb1aa4fbc6aafaab,2017-07-21 12:25:00 UTC,cool,auto,733,775,712,IA,Hudson,55,False,False,False,Gas
3,387cc17b3a7631fab770fc7513e8ed49c51de085,2017-07-05 15:10:00 UTC,auto,hold,741,746,607,IA,Sumner,105,False,False,False,Gas
4,6a73fea06997171e07adeee7076149165a8f2171,2017-07-30 12:40:00 UTC,cool,auto,802,830,620,IA,Moville,0,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129306,fb42a67ac2b382b245d1b82048afc0fc245afcff,2017-07-28 17:30:00 UTC,cool,hold,762,760,760,IA,West Des Moines,36,False,False,False,Gas
129307,15570377c8ec7729829ad9d26b4b89b0ee77a4c2,2017-07-21 11:00:00 UTC,cool,auto,757,760,700,IA,West Des Moines,0,False,False,False,Gas
129308,fb42a67ac2b382b245d1b82048afc0fc245afcff,2017-07-28 16:05:00 UTC,cool,hold,763,760,760,IA,West Des Moines,36,False,False,False,Gas
129309,3cda433b46157ff81f30ef99cc5ca29d5a40d4f6,2017-07-06 12:45:00 UTC,cool,auto,764,760,640,IA,West Des Moines,20,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/IA/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/IA-day/2018-jul-day-IA.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2018-07-02 17:30:00 UTC,cool,hold,724,728,728,IA,Oakland,40,True,False,True,Electric
2,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2018-07-04 14:55:00 UTC,cool,hold,722,717,717,IA,Oakland,40,True,False,True,Electric
3,1905a19280914630a1ba1bc0498e249f413c51da,2018-07-21 18:50:00 UTC,cool,auto,702,716,716,IA,Boone,0,False,False,False,Gas
4,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2018-07-03 15:30:00 UTC,cool,hold,719,717,717,IA,Oakland,40,True,False,True,Electric
5,1905a19280914630a1ba1bc0498e249f413c51da,2018-07-10 12:50:00 UTC,cool,auto,720,723,723,IA,Boone,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
258397,fb42a67ac2b382b245d1b82048afc0fc245afcff,2018-07-17 14:50:00 UTC,cool,hold,762,760,760,IA,West Des Moines,36,False,False,False,Gas
258398,fb42a67ac2b382b245d1b82048afc0fc245afcff,2018-07-17 16:45:00 UTC,cool,hold,764,760,760,IA,West Des Moines,36,False,False,False,Gas
258399,fb42a67ac2b382b245d1b82048afc0fc245afcff,2018-07-15 19:10:00 UTC,cool,hold,763,760,760,IA,West Des Moines,36,False,False,False,Gas
258400,fb42a67ac2b382b245d1b82048afc0fc245afcff,2018-07-13 12:35:00 UTC,cool,hold,762,760,760,IA,West Des Moines,36,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/IA/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/IA-day/2019-jul-day-IA.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c1ca71603f8266cc51992ff9af88152dafda31ba,2019-07-24 11:35:00 UTC,cool,auto,740,810,790,IA,Decorah,40,False,False,False,Gas
1,ade2b715a00b210de209400cc72b71eb094d9e70,2019-07-04 19:20:00 UTC,cool,hold,676,681,681,IA,Orange City,49,True,False,False,Gas
2,7a2888f9de27124a4615c24ca79c34ca35c48ca8,2019-07-15 17:55:00 UTC,cool,hold,744,759,759,IA,Grinnell,120,False,False,False,Gas
3,ade2b715a00b210de209400cc72b71eb094d9e70,2019-07-03 16:50:00 UTC,cool,hold,697,701,701,IA,Orange City,49,True,False,False,Gas
4,d36d923dd7afafed7e9b818f553b9f3515b807d2,2019-07-31 19:30:00 UTC,auto,auto,768,810,710,IA,Marshalltown,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
397474,fb42a67ac2b382b245d1b82048afc0fc245afcff,2019-07-21 18:45:00 UTC,cool,hold,755,760,760,IA,West Des Moines,36,False,False,False,Gas
397475,fb42a67ac2b382b245d1b82048afc0fc245afcff,2019-07-23 11:45:00 UTC,cool,hold,742,760,760,IA,West Des Moines,36,False,False,False,Gas
397476,fb42a67ac2b382b245d1b82048afc0fc245afcff,2019-07-31 19:50:00 UTC,cool,hold,752,760,760,IA,West Des Moines,36,False,False,False,Gas
397477,fb42a67ac2b382b245d1b82048afc0fc245afcff,2019-07-21 14:00:00 UTC,cool,hold,754,760,760,IA,West Des Moines,36,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/IA/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/IA-day/2020-jul-day-IA.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,74bd56ca747cd682e3f723fcd78bf5626af1d6e8,2020-07-04 15:40:00 UTC,auto,hold,726,706,676,IA,Asbury,17,False,False,False,Gas
1,55060d4461379c6a0dd3503944c975a5282575cc,2020-07-31 19:05:00 UTC,auto,hold,707,708,688,IA,Blue Grass,9,True,False,False,Gas
2,ade2b715a00b210de209400cc72b71eb094d9e70,2020-07-29 19:15:00 UTC,cool,hold,686,701,701,IA,Orange City,49,True,False,False,Gas
3,55060d4461379c6a0dd3503944c975a5282575cc,2020-07-22 16:15:00 UTC,auto,hold,718,718,698,IA,Blue Grass,9,True,False,False,Gas
4,ea86d78c6516627457a46dd2f40350ffbf02904f,2020-07-09 14:30:00 UTC,cool,hold,782,775,775,IA,Calamus,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
407783,111557f0c206810590c3017cb3275e33f105b0c5,2020-07-21 15:50:00 UTC,cool,auto,746,760,760,IA,West Des Moines,0,True,False,False,Gas
407784,fb42a67ac2b382b245d1b82048afc0fc245afcff,2020-07-03 18:30:00 UTC,cool,hold,765,760,760,IA,West Des Moines,36,False,False,False,Gas
407785,fb42a67ac2b382b245d1b82048afc0fc245afcff,2020-07-09 19:30:00 UTC,cool,hold,763,760,760,IA,West Des Moines,36,False,False,False,Gas
407786,fb42a67ac2b382b245d1b82048afc0fc245afcff,2020-07-19 16:35:00 UTC,cool,hold,760,760,760,IA,West Des Moines,36,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/IA/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/IA-day/2021-jul-day-IA.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ade2b715a00b210de209400cc72b71eb094d9e70,2021-07-19 18:30:00 UTC,cool,hold,706,721,721,IA,Orange City,49,True,False,False,Gas
1,1876018ce9951e0e7f958fb67264a635d640df80,2021-07-06 11:15:00 UTC,cool,hold,758,755,755,IA,Palo,10,False,False,False,Gas
2,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2021-07-14 15:20:00 UTC,cool,hold,729,729,729,IA,Oakland,40,True,False,True,Electric
3,ea86d78c6516627457a46dd2f40350ffbf02904f,2021-07-23 13:00:00 UTC,auto,hold,777,781,721,IA,Calamus,10,True,False,False,Gas
4,ade2b715a00b210de209400cc72b71eb094d9e70,2021-07-22 16:25:00 UTC,cool,hold,688,701,701,IA,Orange City,49,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238682,4ea1992e70540e1aafd0111621d79a295c8a08b0,2021-07-08 16:05:00 UTC,cool,hold,703,760,760,IA,West Des Moines,10,False,False,False,Gas
238683,4469fc71de27689530494c612bca61b80577a3b7,2021-07-09 17:35:00 UTC,auto,hold,763,760,680,IA,West Des Moines,29,True,False,False,Gas
238684,4ea1992e70540e1aafd0111621d79a295c8a08b0,2021-07-06 17:05:00 UTC,cool,hold,757,760,760,IA,West Des Moines,10,False,False,False,Gas
238685,4ea1992e70540e1aafd0111621d79a295c8a08b0,2021-07-09 16:45:00 UTC,cool,hold,731,760,760,IA,West Des Moines,10,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/IA/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IA/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IA_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IA/jul/" + file)
    IA_jul = pd.concat([IA_jul, df])
    
IA_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,jul,2017,cool,auto,Waverly,723.752991,720.698804,719.695285,10.0,False,False,False
1,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,jul,2017,cool,hold,Waverly,732.596561,738.698413,738.716931,10.0,False,False,False
2,04748c1d6262989d882b588289f9ea9f2ea31f34,jul,2017,cool,auto,Council Bluffs,736.447368,735.842105,640.315789,60.0,False,False,False
3,04748c1d6262989d882b588289f9ea9f2ea31f34,jul,2017,cool,hold,Council Bluffs,731.522590,730.000000,730.000000,60.0,False,False,False
4,0638155588f2185bdefa7062451edb8f490350d6,jul,2017,cool,hold,Ames,741.511905,740.000000,740.000000,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
303,fb8938c98ae20844354501d57ec0090a7f0979c8,jul,2021,cool,hold,Merrill,688.433198,682.174089,682.024291,10.0,False,False,True
304,fd3151094f47b028063bbc5319e34de07f7bb894,jul,2021,auto,hold,Clarinda,740.602888,740.626955,671.185921,117.0,False,False,False
305,fdc4237ba28b6852387dd87acd303bdb70d5a981,jul,2021,cool,hold,Johnston,739.865169,744.213483,744.213483,25.0,False,False,False
306,ffb651a666a075042c233d899e286134eef0d150,jul,2021,cool,hold,Cedar Rapids,752.970718,752.358604,752.358604,0.0,False,False,False


In [133]:
IA_jul.to_csv("Scraper_Output/State_Month_Day/IA/IA_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/IA-day/2017-aug-day-IA.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6a73fea06997171e07adeee7076149165a8f2171,2017-08-26 17:25:00 UTC,auto,auto,788,830,620,IA,Moville,0,True,False,True,Electric
1,6a73fea06997171e07adeee7076149165a8f2171,2017-08-12 17:05:00 UTC,cool,auto,785,830,620,IA,Moville,0,True,False,True,Electric
2,6a73fea06997171e07adeee7076149165a8f2171,2017-08-05 16:00:00 UTC,cool,auto,767,830,620,IA,Moville,0,True,False,True,Electric
3,6a73fea06997171e07adeee7076149165a8f2171,2017-08-05 19:30:00 UTC,cool,auto,766,830,620,IA,Moville,0,True,False,True,Electric
4,6a73fea06997171e07adeee7076149165a8f2171,2017-08-12 16:20:00 UTC,cool,auto,782,830,620,IA,Moville,0,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106904,d98988d852ac3404849b51f3c49df701ccad3115,2017-08-31 13:00:00 UTC,cool,hold,694,720,720,IA,Clive,27,False,False,False,Gas
106905,f896633669af82fcff7c64f7e5265faefe6b7063,2017-08-16 19:05:00 UTC,cool,auto,724,720,680,IA,Clive,36,False,False,False,Gas
106906,f896633669af82fcff7c64f7e5265faefe6b7063,2017-08-17 16:55:00 UTC,cool,auto,722,720,680,IA,Clive,36,False,False,False,Gas
106907,f896633669af82fcff7c64f7e5265faefe6b7063,2017-08-16 13:15:00 UTC,cool,auto,721,720,680,IA,Clive,36,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/IA/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/IA-day/2018-aug-day-IA.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2018-08-21 17:40:00 UTC,cool,hold,684,687,687,IA,Oakland,40,True,False,True,Electric
1,1905a19280914630a1ba1bc0498e249f413c51da,2018-08-05 16:45:00 UTC,cool,auto,702,699,699,IA,Boone,0,False,False,False,Gas
2,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2018-08-21 16:50:00 UTC,cool,hold,684,687,687,IA,Oakland,40,True,False,True,Electric
3,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2018-08-03 16:30:00 UTC,cool,hold,691,687,687,IA,Oakland,40,True,False,True,Electric
4,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2018-08-18 12:45:00 UTC,cool,hold,695,707,707,IA,Oakland,40,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
248529,fb42a67ac2b382b245d1b82048afc0fc245afcff,2018-08-24 18:15:00 UTC,cool,hold,741,760,760,IA,West Des Moines,36,False,False,False,Gas
248530,1aaf7e53ad637b5cb0738eb355b9d9cbc860de75,2018-08-16 15:30:00 UTC,cool,auto,763,760,760,IA,West Des Moines,17,False,False,False,Gas
248531,1aaf7e53ad637b5cb0738eb355b9d9cbc860de75,2018-08-16 12:30:00 UTC,cool,auto,760,760,760,IA,West Des Moines,17,False,False,False,Gas
248532,1aaf7e53ad637b5cb0738eb355b9d9cbc860de75,2018-08-19 13:45:00 UTC,cool,auto,759,760,760,IA,West Des Moines,17,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/IA/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/IA-day/2019-aug-day-IA.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2019-08-02 10:35:00 UTC,cool,hold,664,665,665,IA,Oakland,40,True,False,True,Electric
1,02d6f92c8b43e6f08da8a2b7efe50c30bab0dcc7,2019-08-02 11:45:00 UTC,cool,hold,720,810,790,IA,maquoketa,70,False,False,False,Gas
2,02d6f92c8b43e6f08da8a2b7efe50c30bab0dcc7,2019-08-11 18:55:00 UTC,cool,hold,767,810,790,IA,maquoketa,70,False,False,False,Gas
3,02d6f92c8b43e6f08da8a2b7efe50c30bab0dcc7,2019-08-15 11:50:00 UTC,cool,hold,718,810,790,IA,maquoketa,70,False,False,False,Gas
4,18b911ddf15c71764393cda00758d3a4519289be,2019-08-03 16:25:00 UTC,cool,auto,740,748,749,IA,Fairfield,9,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377503,fb42a67ac2b382b245d1b82048afc0fc245afcff,2019-08-18 12:40:00 UTC,cool,hold,757,760,760,IA,West Des Moines,36,False,False,False,Gas
377504,a63794e707a4eea854d8396a5cf972446d9991b0,2019-08-17 17:55:00 UTC,auto,auto,757,760,690,IA,West Des Moines,9,True,False,False,Gas
377505,fb42a67ac2b382b245d1b82048afc0fc245afcff,2019-08-08 18:45:00 UTC,cool,hold,764,760,760,IA,West Des Moines,36,False,False,False,Gas
377506,b6abbaa1e5ba05dfb03ce32f653a5daf324fe7b9,2019-08-17 17:35:00 UTC,cool,hold,760,760,760,IA,West Des Moines,20,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/IA/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/IA-day/2020-aug-day-IA.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1905a19280914630a1ba1bc0498e249f413c51da,2020-08-17 17:35:00 UTC,cool,auto,731,744,744,IA,Boone,0,False,False,False,Gas
1,55060d4461379c6a0dd3503944c975a5282575cc,2020-08-01 11:50:00 UTC,auto,hold,692,708,688,IA,Blue Grass,9,True,False,False,Gas
2,ea86d78c6516627457a46dd2f40350ffbf02904f,2020-08-29 15:50:00 UTC,cool,hold,804,801,790,IA,Calamus,10,True,False,False,Gas
3,ea86d78c6516627457a46dd2f40350ffbf02904f,2020-08-22 12:00:00 UTC,cool,hold,771,801,790,IA,Calamus,10,True,False,False,Gas
4,ea86d78c6516627457a46dd2f40350ffbf02904f,2020-08-13 17:50:00 UTC,cool,hold,794,781,781,IA,Calamus,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
385293,fb42a67ac2b382b245d1b82048afc0fc245afcff,2020-08-09 16:10:00 UTC,cool,hold,763,760,760,IA,West Des Moines,36,False,False,False,Gas
385294,111557f0c206810590c3017cb3275e33f105b0c5,2020-08-07 15:10:00 UTC,cool,auto,742,760,690,IA,West Des Moines,0,True,False,False,Gas
385295,111557f0c206810590c3017cb3275e33f105b0c5,2020-08-07 17:25:00 UTC,cool,auto,753,760,690,IA,West Des Moines,0,True,False,False,Gas
385296,fb42a67ac2b382b245d1b82048afc0fc245afcff,2020-08-09 16:50:00 UTC,cool,hold,756,760,760,IA,West Des Moines,36,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/IA/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IA/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IA_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IA/aug/" + file)
    IA_aug = pd.concat([IA_aug, df])
    
IA_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,aug,2017,cool,auto,Waverly,720.893189,718.440402,720.297214,10.0,False,False,False
1,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,aug,2017,cool,hold,Waverly,724.496791,735.882353,735.879144,10.0,False,False,False
2,04748c1d6262989d882b588289f9ea9f2ea31f34,aug,2017,cool,auto,Council Bluffs,724.523810,720.809524,641.190476,60.0,False,False,False
3,04748c1d6262989d882b588289f9ea9f2ea31f34,aug,2017,cool,hold,Council Bluffs,733.857143,720.000000,720.000000,60.0,False,False,False
4,09457d11bf174c8689162e31090972cdc42638e0,aug,2017,cool,hold,Council Bluffs,741.948718,740.000000,740.000000,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
543,fb8938c98ae20844354501d57ec0090a7f0979c8,aug,2020,cool,hold,Merrill,701.135922,688.009709,687.786408,10.0,False,False,True
544,fd3151094f47b028063bbc5319e34de07f7bb894,aug,2020,cool,auto,Clarinda,737.498845,736.352194,734.353349,117.0,False,False,False
545,fd3151094f47b028063bbc5319e34de07f7bb894,aug,2020,cool,hold,Clarinda,742.286730,741.675355,741.213270,117.0,False,False,False
546,ffb651a666a075042c233d899e286134eef0d150,aug,2020,cool,hold,Cedar Rapids,731.978142,729.682279,729.682279,0.0,False,False,False


In [160]:
IA_aug.to_csv("Scraper_Output/State_Month_Day/IA/IA_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/IA-day/2017-dec-day-IA.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,db3615fbc0e098d59cffc392a0120c88f521f47c,2017-12-03 15:25:00 UTC,heat,hold,693,697,697,IA,Marshalltown,0,False,False,False,Gas
2,6a73fea06997171e07adeee7076149165a8f2171,2017-12-23 13:00:00 UTC,auto,auto,608,830,620,IA,Moville,0,True,False,True,Electric
3,6a73fea06997171e07adeee7076149165a8f2171,2017-12-23 13:55:00 UTC,auto,auto,617,830,620,IA,Moville,0,True,False,True,Electric
4,5a24e556dd9b8f4ea9ab6efb716cd42307c346f8,2017-12-10 18:50:00 UTC,heat,hold,722,714,714,IA,Chariton,117,False,False,False,Gas
5,db3615fbc0e098d59cffc392a0120c88f521f47c,2017-12-03 19:45:00 UTC,heat,hold,695,697,697,IA,Marshalltown,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162371,a8a880818c48bea1a228fe52df5ad29ab280a3a0,2017-12-30 18:50:00 UTC,auto,auto,707,760,710,IA,West Des Moines,45,False,False,False,Gas
162372,93a0a18c24e8d84735c5c98072cfe010d6531c79,2017-12-24 15:40:00 UTC,auto,hold,682,760,680,IA,West Des Moines,0,False,False,False,Gas
162373,93a0a18c24e8d84735c5c98072cfe010d6531c79,2017-12-23 16:45:00 UTC,auto,hold,677,760,680,IA,West Des Moines,0,False,False,False,Gas
162374,a8a880818c48bea1a228fe52df5ad29ab280a3a0,2017-12-31 13:40:00 UTC,auto,auto,709,760,710,IA,West Des Moines,45,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/IA/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/IA-day/2018-dec-day-IA.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,be340c3e85d37c9be71d1dbb6e65cccfb806e417,2018-12-22 19:35:00 UTC,heat,hold,738,702,702,IA,Danville,0,False,False,False,Gas
1,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2018-12-16 14:30:00 UTC,heat,hold,667,667,667,IA,Oakland,40,True,False,True,Electric
2,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2018-12-30 19:15:00 UTC,heat,hold,672,676,676,IA,Oakland,40,True,False,True,Electric
3,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2018-12-19 15:35:00 UTC,heat,hold,672,675,675,IA,Oakland,40,True,False,True,Electric
4,5988057dfb06d76567b4a8c2f0e0cd46c6fd9f13,2018-12-16 18:10:00 UTC,heat,hold,667,667,667,IA,Oakland,40,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242690,820aaf5cb08a64473ecc63617828766be91d80bf,2018-12-15 17:45:00 UTC,auto,hold,700,760,700,IA,West Des Moines,50,False,False,False,Gas
242691,820aaf5cb08a64473ecc63617828766be91d80bf,2018-12-02 18:40:00 UTC,auto,hold,698,760,700,IA,West Des Moines,50,False,False,False,Gas
242692,820aaf5cb08a64473ecc63617828766be91d80bf,2018-12-13 12:20:00 UTC,auto,hold,698,760,700,IA,West Des Moines,50,False,False,False,Gas
242693,820aaf5cb08a64473ecc63617828766be91d80bf,2018-12-15 17:10:00 UTC,auto,hold,697,760,700,IA,West Des Moines,50,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/IA/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/IA-day/2019-dec-day-IA.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,70063feca5c0d8d1e2c4d1492551cb8ba04345d3,2019-12-09 13:05:00 UTC,heat,hold,691,699,688,IA,Pleasant Hill,20,False,False,False,Gas
1,574f8303688644873991834c232d6e6c5f6a7c80,2019-12-28 17:55:00 UTC,heat,auto,665,840,610,IA,Tiffin,5,True,False,False,Gas
2,55060d4461379c6a0dd3503944c975a5282575cc,2019-12-24 13:40:00 UTC,heat,hold,719,778,718,IA,Blue Grass,9,True,False,False,Gas
3,55060d4461379c6a0dd3503944c975a5282575cc,2019-12-21 14:35:00 UTC,heat,hold,713,778,718,IA,Blue Grass,9,True,False,False,Gas
4,55060d4461379c6a0dd3503944c975a5282575cc,2019-12-06 19:50:00 UTC,heat,hold,716,778,718,IA,Blue Grass,9,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
342548,a8a880818c48bea1a228fe52df5ad29ab280a3a0,2019-12-28 18:40:00 UTC,auto,auto,704,760,710,IA,West Des Moines,45,False,False,False,Gas
342549,a8a880818c48bea1a228fe52df5ad29ab280a3a0,2019-12-28 19:50:00 UTC,auto,auto,709,760,710,IA,West Des Moines,45,False,False,False,Gas
342550,a8a880818c48bea1a228fe52df5ad29ab280a3a0,2019-12-28 18:05:00 UTC,auto,auto,697,760,710,IA,West Des Moines,45,False,False,False,Gas
342551,a8a880818c48bea1a228fe52df5ad29ab280a3a0,2019-12-28 19:55:00 UTC,auto,auto,703,760,710,IA,West Des Moines,45,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/IA/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/IA-day/2020-dec-day-IA.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,55060d4461379c6a0dd3503944c975a5282575cc,2020-12-25 18:30:00 UTC,auto,hold,684,688,668,IA,Blue Grass,9,True,False,False,Gas
1,3e804b7c50c47f1d1577f4359e15a3b510b878ae,2020-12-16 17:55:00 UTC,heat,auto,639,640,640,IA,Marshalltown,0,True,False,False,Gas
2,1905a19280914630a1ba1bc0498e249f413c51da,2020-12-17 14:00:00 UTC,heat,auto,679,684,684,IA,Boone,0,False,False,False,Gas
4,3e804b7c50c47f1d1577f4359e15a3b510b878ae,2020-12-09 16:45:00 UTC,heat,auto,676,640,640,IA,Marshalltown,0,True,False,False,Gas
5,1d88490fcee2a9b566b05e5ddda60e023d762515,2020-12-09 14:50:00 UTC,heat,hold,671,672,672,IA,Cherokee,49,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
318172,a8a880818c48bea1a228fe52df5ad29ab280a3a0,2020-12-05 19:45:00 UTC,auto,auto,698,760,710,IA,West Des Moines,45,False,False,False,Gas
318173,a8a880818c48bea1a228fe52df5ad29ab280a3a0,2020-12-05 13:10:00 UTC,auto,auto,706,760,710,IA,West Des Moines,45,False,False,False,Gas
318174,a8a880818c48bea1a228fe52df5ad29ab280a3a0,2020-12-06 13:05:00 UTC,auto,auto,702,760,710,IA,West Des Moines,45,False,False,False,Gas
318175,a8a880818c48bea1a228fe52df5ad29ab280a3a0,2020-12-06 13:45:00 UTC,auto,auto,698,760,710,IA,West Des Moines,45,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/IA/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IA/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IA_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IA/dec/" + file)
    IA_dec = pd.concat([IA_dec, df])
    
IA_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,dec,2017,heat,auto,Waverly,697.080482,708.259277,708.259277,10.0,False,False,False
1,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,dec,2017,heat,hold,Waverly,704.158209,712.779104,712.779104,10.0,False,False,False
2,0219b339a36deb299cb64ff76ba40ac7563cf3af,dec,2017,heat,auto,Marion,665.406250,770.000000,670.000000,15.0,False,False,False
3,0219b339a36deb299cb64ff76ba40ac7563cf3af,dec,2017,heat,hold,Marion,635.200000,677.000000,670.000000,15.0,False,False,False
4,057c437c487a9dbc382fccddd4f043e4fc4832f7,dec,2017,heat,auto,Bondurant,710.733333,710.000000,710.000000,0.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
498,fd3151094f47b028063bbc5319e34de07f7bb894,dec,2020,auto,auto,Clarinda,690.686486,758.994595,693.075676,117.0,False,False,False
499,fd3151094f47b028063bbc5319e34de07f7bb894,dec,2020,auto,hold,Clarinda,694.157188,751.117694,696.599526,117.0,False,False,False
500,fd64ea7804b3cdde665d855dade9dbccf298664c,dec,2020,heat,hold,Ames,693.375000,700.500000,699.875000,0.0,False,False,False
501,fdc4237ba28b6852387dd87acd303bdb70d5a981,dec,2020,heat,hold,Johnston,675.539634,680.140244,677.152439,25.0,False,False,False


In [187]:
IA_dec.to_csv("Scraper_Output/State_Month_Day/IA/IA_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/IA/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IA_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/IA/" + file)
    IA_all = pd.concat([IA_all, df])
    
IA_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,aug,2017,cool,auto,Waverly,720.893189,718.440402,720.297214,10.0,False,False,False
1,0037b13de2c1ea596b8c97cc09d202a5e8ceeeae,aug,2017,cool,hold,Waverly,724.496791,735.882353,735.879144,10.0,False,False,False
2,04748c1d6262989d882b588289f9ea9f2ea31f34,aug,2017,cool,auto,Council Bluffs,724.523810,720.809524,641.190476,60.0,False,False,False
3,04748c1d6262989d882b588289f9ea9f2ea31f34,aug,2017,cool,hold,Council Bluffs,733.857143,720.000000,720.000000,60.0,False,False,False
4,09457d11bf174c8689162e31090972cdc42638e0,aug,2017,cool,hold,Council Bluffs,741.948718,740.000000,740.000000,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2367,fb41a0af656a964b21c262055f90f9f6bab23319,jun,2021,auto,hold,Iowa City,735.125000,740.000000,710.000000,10.0,False,False,False
2368,fb8938c98ae20844354501d57ec0090a7f0979c8,jun,2021,cool,hold,Merrill,700.514286,691.600000,690.457143,10.0,False,False,True
2369,fd3151094f47b028063bbc5319e34de07f7bb894,jun,2021,auto,hold,Clarinda,739.681933,743.048817,673.042778,117.0,False,False,False
2370,ffb651a666a075042c233d899e286134eef0d150,jun,2021,auto,hold,Cedar Rapids,786.275862,780.000000,730.000000,0.0,False,False,False


In [190]:
IA_all.to_csv("Scraper_Output/State_Month_Day/IA_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mIAe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['IA']
Unique jan_2018: ['IA']
Unique jan_2019: ['IA']
Unique jan_2020: ['IA']
Unique jan_2021: ['IA']
Unique feb_2017: ['IA']
Unique feb_2018: ['IA']
Unique feb_2019: ['IA']
Unique feb_2020: ['IA']
Unique feb_2021: ['IA']
Unique jun_2017: ['IA']
Unique jun_2018: ['IA']
Unique jun_2019: ['IA']
Unique jun_2020: ['IA']
Unique jun_2021: ['IA']
Unique jul_2017: ['IA']
Unique jul_2018: ['IA']
Unique jul_2019: ['IA']
Unique jul_2020: ['IA']
Unique jul_2021: ['IA']
Unique aug_2017: ['IA']
Unique aug_2018: ['IA']
Unique aug_2019: ['IA']
Unique aug_2020: ['IA']
Unique dec_2017: ['IA']
Unique dec_2018: ['IA']
Unique dec_2019: ['IA']
Unique dec_2020: ['IA']
