# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/IN-day/2017-jan-day-IN.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6d1a140e53b64b62531527ccae26d7b814e446fb,2017-01-30 13:05:00 UTC,heat,auto,702,710,710,IN,Fort Wayne,45,False,False,False,Gas
1,588b66a3713f196c205e3494b51823295fc2cd8c,2017-01-13 16:05:00 UTC,heat,hold,711,720,720,IN,Crown Point,5,False,False,False,Gas
2,abd40c2b49ecf23539c84b7d8d8a6f1d7fd8adfa,2017-01-07 19:25:00 UTC,heat,hold,733,760,710,IN,Eaton,5,True,False,True,Electric
3,0c702e17cf604b9107d498adaeef02ec0d83c65b,2017-01-21 11:50:00 UTC,heat,hold,694,695,695,IN,Kouts,25,False,False,False,Gas
4,f73d131b3527529d2690787322c82b9c18e14d23,2017-01-06 18:20:00 UTC,auto,hold,697,750,700,IN,Marion,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
223376,fd44300309d8a972124d7f94a7f671c75fb402c5,2017-01-07 17:50:00 UTC,heat,hold,665,710,710,IN,Muncie,0,False,False,False,Gas
223377,fd44300309d8a972124d7f94a7f671c75fb402c5,2017-01-16 14:00:00 UTC,heat,hold,679,680,680,IN,Muncie,0,False,False,False,Gas
223378,fd44300309d8a972124d7f94a7f671c75fb402c5,2017-01-11 16:25:00 UTC,heat,auto,692,710,710,IN,Muncie,0,False,False,False,Gas
223379,fd44300309d8a972124d7f94a7f671c75fb402c5,2017-01-08 11:25:00 UTC,heat,hold,679,690,690,IN,Muncie,0,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
00d57801ec029dad25ea93d564eea9c33eed2183,Jan,2017,auto,auto,Zionsville,695.000000,693.000000,637.000000,80.0,True,False,False
00d57801ec029dad25ea93d564eea9c33eed2183,Jan,2017,auto,hold,Zionsville,688.227273,742.886364,692.886364,80.0,True,False,False
00d57801ec029dad25ea93d564eea9c33eed2183,Jan,2017,cool,auto,Zionsville,652.750000,680.000000,680.000000,80.0,True,False,False
00d57801ec029dad25ea93d564eea9c33eed2183,Jan,2017,heat,auto,Zionsville,700.494186,704.058140,704.058140,80.0,True,False,False
00d57801ec029dad25ea93d564eea9c33eed2183,Jan,2017,heat,hold,Zionsville,679.935802,677.713580,677.540741,80.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fd44300309d8a972124d7f94a7f671c75fb402c5,Jan,2017,heat,hold,Muncie,659.355714,687.378571,664.971429,0.0,False,False,False
fd6e40ad5c0c4df2dda9c9b5c08fd28eea58763d,Jan,2017,auto,hold,Winona Lake,690.550336,744.731544,694.731544,105.0,False,False,False
fd6e40ad5c0c4df2dda9c9b5c08fd28eea58763d,Jan,2017,heat,hold,Winona Lake,696.366834,700.979899,700.979899,105.0,False,False,False
fe875d54cb0953d665c18431fdf5083f67692c31,Jan,2017,heat,auto,Noblesville,702.465438,689.732719,689.732719,0.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/IN/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/IN-day/2018-jan-day-IN.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7a7b33d0088fcfea510e5efc56783ab407983c73,2018-01-02 10:00:00 UTC,heat,hold,692,745,695,IN,Greenfield,20,True,False,False,Gas
1,3fe7a52abca0ba6732cdd7d3f57f2c46135db55a,2018-01-24 15:30:00 UTC,auto,hold,720,795,715,IN,Cicero,5,False,False,False,Gas
2,e271fa7eb616835a5b6e55eddc15455cce3059bd,2018-01-25 14:00:00 UTC,heat,hold,700,705,705,IN,Fort Wayne,20,False,False,False,Gas
3,c3cd8bf5f142e7db7c34a12b3eea9be3946644e6,2018-01-25 12:05:00 UTC,heat,hold,698,697,697,IN,Warsaw,27,False,False,False,Gas
4,3f14562c8729d637b2b92c01689858f749b8a29d,2018-01-20 13:25:00 UTC,auto,auto,693,745,695,IN,Lafayette,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
653939,d98a2b83d8585cb399e7b4b9aae4fa22cef1c244,2018-01-22 09:25:00 UTC,auto,auto,713,765,715,IN,Charlestown,0,False,False,False,Gas
653940,d98a2b83d8585cb399e7b4b9aae4fa22cef1c244,2018-01-15 09:55:00 UTC,auto,auto,723,765,715,IN,Charlestown,0,False,False,False,Gas
653941,d98a2b83d8585cb399e7b4b9aae4fa22cef1c244,2018-01-21 12:10:00 UTC,auto,auto,715,765,715,IN,Charlestown,0,False,False,False,Gas
653942,6b41a3e98c8835f154f49627e9bf81f1d339a8f4,2018-01-09 18:55:00 UTC,auto,hold,709,765,715,IN,Indianapolis,40,False,False,True,Electric


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/IN/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/IN-day/2019-jan-day-IN.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1a17cedcee926d4e60ebea317fd40975ebe84765,2019-01-05 18:30:00 UTC,heat,hold,714,765,715,IN,Valparaiso,0,False,False,False,Gas
1,c6e347bc13b899e52433c178ca921768fefbdfe0,2019-01-19 19:10:00 UTC,auto,hold,689,735,685,IN,Indianapolis,5,False,False,False,Gas
2,345756d5638ce1939c2422b2d278d885207a4afa,2019-01-19 19:50:00 UTC,heat,hold,671,673,670,IN,Westfield,0,False,False,False,Gas
3,69c1625b494c16689260988ee40aca5653b49f93,2019-01-18 17:50:00 UTC,heat,hold,709,702,702,IN,Indianapolis,0,True,False,True,Electric
4,52a0c6c731253e8b61d668dec39e834f87a0057a,2019-01-08 17:55:00 UTC,heat,hold,686,713,713,IN,Fishers,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1132510,52ebec8b33f5b0c11b2b4f538a01f91f909b9848,2019-01-31 18:35:00 UTC,auto,auto,660,760,660,IN,Santa Claus,20,False,False,False,Gas
1132511,f9f946723c16feb32ac9ae40162fb97869d7e576,2019-01-04 16:35:00 UTC,heat,auto,708,760,710,IN,Fort Wayne,17,False,False,False,Gas
1132512,fbe9e3af2c76ad1f6a10dcd6e48d484512ccd23d,2019-01-11 11:10:00 UTC,heat,auto,715,760,740,IN,Westfield,5,False,False,False,Gas
1132513,ad8f48da6e28bcb13f89d9c1e24d3667d7f1203b,2019-01-29 16:35:00 UTC,auto,hold,696,760,700,IN,Indianapolis,27,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/IN/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/IN-day/2020-jan-day-IN.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,18a3e0998de70993963b88cce532863f1f4ec6cd,2020-01-16 14:35:00 UTC,heat,hold,715,719,719,IN,Fort Wayne,39,False,False,False,Gas
1,547130d2332efa47c7efec9291b9c1d1c2e00a7a,2020-01-20 15:15:00 UTC,heat,hold,627,602,602,IN,Indianapolis,0,False,False,False,Gas
2,7d33e54212bf05341f9f911659d3a108f39f8ce6,2020-01-11 15:10:00 UTC,heat,hold,682,685,685,IN,Zionsville,15,True,False,False,Gas
3,3cb9e8a951ddb854f3c27801e6ce71d7b320ba7b,2020-01-12 18:35:00 UTC,heat,hold,689,689,689,IN,Bloomington,10,False,False,False,Gas
4,7efe2ad8e03efca806aa6314500177082d073b98,2020-01-22 11:45:00 UTC,heat,hold,687,689,689,IN,Gas City,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1251681,e7c84f9d8b0c5d7f5381d05df5811783e3851b35,2020-01-13 17:50:00 UTC,heat,auto,746,760,750,IN,Evansville,0,True,False,True,Electric
1251682,022769b8318b36b0df4e5357105aa3929cb2bf37,2020-01-03 12:15:00 UTC,auto,auto,736,760,740,IN,West Lafayette,10,True,False,True,Electric
1251683,fd957cfed975c3c26e2e6e5c9f2f21562d62296b,2020-01-12 16:50:00 UTC,auto,auto,677,760,680,IN,Indianapolis,60,False,False,False,Gas
1251684,702457010015cf4659383a3374c4171f3e7731d1,2020-01-28 11:45:00 UTC,auto,auto,680,760,680,IN,South Bend,30,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/IN/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/IN-day/2021-jan-day-IN.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e648ed7846a674e9dfd6635bb5a3bec8b20e6c50,2021-01-19 19:20:00 UTC,heat,hold,709,716,716,IN,Pendleton,9,True,False,False,Gas
1,ba0469f10d33e15fba8056b59b3756973c555463,2021-01-17 18:50:00 UTC,heat,hold,710,711,711,IN,Fishers,5,False,False,False,Gas
2,f9f80fb05d9bc662f48285261d94b5139d76fd88,2021-01-15 13:55:00 UTC,heat,hold,732,737,737,IN,Indianapolis,90,True,False,False,Gas
3,1fafa9307f06bc5307e08236e8936a9b19a2bcb7,2021-01-12 18:00:00 UTC,auxHeatOnly,hold,617,620,620,IN,Pekin,25,True,False,True,Electric
4,e648ed7846a674e9dfd6635bb5a3bec8b20e6c50,2021-01-21 16:55:00 UTC,heat,hold,708,716,716,IN,Pendleton,9,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
751813,356e228eb47792c6e8ca1244dc7272011b5da3a9,2021-01-19 09:00:00 UTC,heat,hold,762,765,765,IN,Columbus,0,True,False,False,Gas
751814,468b1e32c681024fee4a0ce79715f1b068214bad,2021-01-18 18:55:00 UTC,auto,hold,696,765,695,IN,Indianapolis,50,True,False,False,Gas
751815,468b1e32c681024fee4a0ce79715f1b068214bad,2021-01-13 17:05:00 UTC,auto,hold,673,765,675,IN,Indianapolis,50,True,False,False,Gas
751816,eb8f3cce550e08715c662d7c05c358cdacd631fd,2021-01-21 16:25:00 UTC,auto,hold,703,765,695,IN,Noblesville,5,True,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/IN/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IN/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IN_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IN/jan/" + file)
    IN_jan = pd.concat([IN_jan, df])
    
IN_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00d57801ec029dad25ea93d564eea9c33eed2183,Jan,2017,auto,auto,Zionsville,695.000000,693.000000,637.000000,80.0,True,False,False
1,00d57801ec029dad25ea93d564eea9c33eed2183,Jan,2017,auto,hold,Zionsville,688.227273,742.886364,692.886364,80.0,True,False,False
2,00d57801ec029dad25ea93d564eea9c33eed2183,Jan,2017,cool,auto,Zionsville,652.750000,680.000000,680.000000,80.0,True,False,False
3,00d57801ec029dad25ea93d564eea9c33eed2183,Jan,2017,heat,auto,Zionsville,700.494186,704.058140,704.058140,80.0,True,False,False
4,00d57801ec029dad25ea93d564eea9c33eed2183,Jan,2017,heat,hold,Zionsville,679.935802,677.713580,677.540741,80.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
909,ff723053bf8b5bbd687d4b73a2199eff41f4f03d,Jan,2021,auto,hold,Indianapolis,656.015764,724.023854,655.837793,20.0,False,False,False
910,ffe9fd927f4ddb01a78133e42f733d9cd8d7e3b7,Jan,2021,heat,hold,bloomington,671.029630,670.281481,668.003704,45.0,False,False,False
911,fff72394802492aab6d3dd869857f7649c963f97,Jan,2021,heat,hold,Winfield,673.719222,672.614831,671.648668,5.0,False,False,False
912,fffc5ebbd2df6f94be7f5d7d05cc8420c0bce101,Jan,2021,heat,hold,Indianapolis,643.600000,673.000000,642.400000,50.0,False,False,False


In [34]:
IN_jan.to_csv("Scraper_Output/State_Month_Day/IN/IN_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/IN-day/2017-feb-day-IN.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,540f02fc30ccf5c2a4e0fdaf6b2c027614b0679d,2017-02-09 17:50:00 UTC,heat,hold,710,715,715,IN,Zionsville,15,False,False,False,Gas
1,75fc13e7c2f42bab23ed497bf1135afe102b6e2a,2017-02-04 19:10:00 UTC,auto,auto,630,750,640,IN,Greenfield,45,False,False,False,Gas
2,272a3adb8e9cba852556c80064d451ab6c468f9e,2017-02-06 13:25:00 UTC,auto,hold,689,740,690,IN,Noblesville,10,False,False,True,Electric
3,ba0469f10d33e15fba8056b59b3756973c555463,2017-02-06 19:15:00 UTC,heat,hold,659,660,650,IN,Fishers,5,False,False,False,Gas
4,0de4bebad0c007a31bd3c73db089aad86572f9bd,2017-02-19 19:25:00 UTC,heat,auto,649,772,668,IN,Indianapolis,35,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201239,fd422daa7e07f1177631835ac89d465e48b7b1b3,2017-02-25 17:30:00 UTC,heat,auto,670,660,660,IN,Jeffersonville,60,False,False,False,Gas
201240,fd422daa7e07f1177631835ac89d465e48b7b1b3,2017-02-13 18:20:00 UTC,heat,hold,681,680,680,IN,Jeffersonville,60,False,False,False,Gas
201241,fd422daa7e07f1177631835ac89d465e48b7b1b3,2017-02-28 14:10:00 UTC,heat,hold,688,680,680,IN,Jeffersonville,60,False,False,False,Gas
201242,fd422daa7e07f1177631835ac89d465e48b7b1b3,2017-02-15 15:35:00 UTC,heat,hold,684,680,680,IN,Jeffersonville,60,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/IN/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/IN-day/2018-feb-day-IN.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dce84036a480cef989824f931c3f03e267493ef0,2018-02-09 15:05:00 UTC,heat,auto,655,660,660,IN,Greencastle,20,True,False,True,Electric
1,48aa7c1be69e6197f5925ad30f0a29901c99cd1c,2018-02-25 14:50:00 UTC,heat,auto,667,740,660,IN,Peru,90,False,False,False,Gas
2,31856d90d916835ad29a948d881f8ad5a6f0e960,2018-02-21 12:15:00 UTC,heat,hold,660,650,650,IN,Sheridan,10,False,False,False,Gas
3,b1eb80ff3179d325446c38ae7a31f1f1be2fe18d,2018-02-18 15:30:00 UTC,heat,hold,690,690,690,IN,Boonville,5,True,False,True,Electric
4,c3cd8bf5f142e7db7c34a12b3eea9be3946644e6,2018-02-04 11:40:00 UTC,heat,hold,700,697,697,IN,Warsaw,27,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
582376,022769b8318b36b0df4e5357105aa3929cb2bf37,2018-02-10 15:55:00 UTC,auto,hold,733,755,735,IN,West Lafayette,10,True,False,True,Electric
582377,70f67b4dc8e610138531488966c1e02d0b8f0f65,2018-02-18 17:00:00 UTC,heat,auto,735,740,740,IN,West Lafayette,5,False,False,False,Gas
582378,022769b8318b36b0df4e5357105aa3929cb2bf37,2018-02-17 14:30:00 UTC,auto,hold,735,755,735,IN,West Lafayette,10,True,False,True,Electric
582379,022769b8318b36b0df4e5357105aa3929cb2bf37,2018-02-09 13:45:00 UTC,auto,hold,733,755,735,IN,West Lafayette,10,True,False,True,Electric


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/IN/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/IN-day/2019-feb-day-IN.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cb26b8f6914f1b7a1cfdbe1f79dbbfd52f691e07,2019-02-13 16:15:00 UTC,heat,hold,698,681,681,IN,Newburgh,8,False,False,False,Gas
1,1a5a5fa45ac3435cbd52f9858653367413f77c57,2019-02-22 18:55:00 UTC,auto,hold,737,840,740,IN,Fort Wayne,17,False,False,False,Gas
2,7dde514d09c312462ba4297181752939846d4a86,2019-02-25 12:15:00 UTC,heat,auto,708,768,710,IN,Syracuse,0,False,False,False,Gas
3,ea19720b2e749e457db703ea3096d4376376123e,2019-02-27 18:20:00 UTC,auto,auto,736,830,740,IN,Fort Wayne,50,False,False,False,Gas
4,5e90e48a2f36c96d7fb44b9a6c0d9132a91a2dbf,2019-02-27 17:25:00 UTC,heat,hold,720,725,725,IN,Fort Wayne,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
762959,a6a3fd8bac4c2c5ca38feaa7354c69e8ac2e2feb,2019-02-24 16:05:00 UTC,heat,auto,701,760,700,IN,Indianapolis,30,False,False,False,Gas
762960,fc8e46fa812a8e91d4118e079cbb07f7da89cc07,2019-02-03 16:50:00 UTC,auto,hold,661,760,650,IN,Jeffersonville,20,False,False,False,Gas
762961,3f62a82d59345e1d214a4156e56d5aa60f851a6f,2019-02-15 18:25:00 UTC,heat,hold,669,760,670,IN,Cicero,16,True,False,True,Electric
762962,0b7052541e2c265a9dc449514cc9de7319e527b2,2019-02-20 19:40:00 UTC,auto,hold,685,760,690,IN,Zionsville,0,True,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/IN/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/IN-day/2020-feb-day-IN.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7854e0f4452da683264ee897a24c48ca9b532b62,2020-02-01 18:00:00 UTC,auto,hold,709,775,715,IN,Fort Wayne,47,False,False,False,Gas
1,6a71e848ad741f5c45cd1511de1c208b85554136,2020-02-01 19:40:00 UTC,auto,hold,699,747,697,IN,Westfield,15,False,False,False,Gas
2,982697f0fd9cbc3f813fd893426cc8d4e272d942,2020-02-10 13:40:00 UTC,auto,hold,738,788,738,IN,Indianapolis,20,False,False,False,Gas
3,3bafcb0a24910185661f80d392d8ddc98816041f,2020-02-22 19:55:00 UTC,heat,hold,688,688,688,IN,Carmel,46,False,False,False,Gas
4,982697f0fd9cbc3f813fd893426cc8d4e272d942,2020-02-25 12:50:00 UTC,auto,hold,725,778,728,IN,Indianapolis,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1107811,3f62a82d59345e1d214a4156e56d5aa60f851a6f,2020-02-28 12:15:00 UTC,heat,auto,661,760,670,IN,Cicero,16,True,False,True,Electric
1107812,702457010015cf4659383a3374c4171f3e7731d1,2020-02-04 13:20:00 UTC,auto,auto,674,760,680,IN,South Bend,30,False,False,False,Gas
1107813,7cece27315c99c92b02e92b27e1b090b69de6fb9,2020-02-08 12:50:00 UTC,heat,hold,668,760,670,IN,South Bend,90,False,False,False,Gas
1107814,d8b457b41ff29d92aa8e08a32df8540ec19b47ff,2020-02-08 18:15:00 UTC,heat,auto,679,760,680,IN,Elkhart,98,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/IN/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/IN-day/2021-feb-day-IN.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3cb9e8a951ddb854f3c27801e6ce71d7b320ba7b,2021-02-21 10:45:00 UTC,auto,hold,643,699,649,IN,Bloomington,10,False,False,False,Gas
1,c1235608a2bcf441e107b19d312390405858fd0a,2021-02-11 19:55:00 UTC,auto,hold,672,735,685,IN,Indianapolis,70,False,False,True,Electric
2,1fafa9307f06bc5307e08236e8936a9b19a2bcb7,2021-02-26 12:50:00 UTC,heat,hold,632,620,620,IN,Pekin,25,True,False,True,Electric
3,0d42221517dd092f036ee62beb102bd9b05b8f47,2021-02-15 17:55:00 UTC,auxHeatOnly,hold,708,712,712,IN,Fort Wayne,0,False,False,True,Electric
4,0a2dab0730813160586d72a9158d4b57b59d5cd2,2021-02-10 11:25:00 UTC,auto,hold,697,830,700,IN,Anderson,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
667867,03206870da9d3e8eb1804d07265573b9286b876a,2021-02-09 16:50:00 UTC,auto,hold,698,760,700,IN,Garrett,25,False,False,False,Gas
667868,3f62a82d59345e1d214a4156e56d5aa60f851a6f,2021-02-13 11:45:00 UTC,heat,hold,661,760,670,IN,Cicero,16,True,False,True,Electric
667869,98d046e68e2cec6461df951ced473432099c1e3d,2021-02-19 10:40:00 UTC,auto,hold,705,760,710,IN,South Bend,10,True,False,False,Gas
667870,b869b5d0db337e72af2d3bfe658e4da12df7da08,2021-02-27 16:05:00 UTC,auto,hold,707,760,710,IN,Warsaw,10,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/IN/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IN/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IN_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IN/feb/" + file)
    IN_feb = pd.concat([IN_feb, df])
    
IN_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01d2dea193d6f494d3d2a2a80c1b8bb11c100b94,feb,2017,heat,auto,Portage,710.589744,783.384615,649.923077,70.0,False,False,False
1,01d2dea193d6f494d3d2a2a80c1b8bb11c100b94,feb,2017,heat,hold,Portage,699.500000,723.500000,717.500000,70.0,False,False,False
2,0205a8de82aad26001281be438d7a4abf8503db4,feb,2017,heat,hold,Valparaiso,652.487179,650.000000,620.000000,15.0,False,False,False
3,02f0dce3a14264a8d2ef5ac782cfc049a6d04ca1,feb,2017,heat,auto,Elkhart,676.128065,780.689373,638.604905,55.0,False,False,False
4,02f0dce3a14264a8d2ef5ac782cfc049a6d04ca1,feb,2017,heat,hold,Elkhart,655.276786,657.035714,641.866071,55.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
853,ff723053bf8b5bbd687d4b73a2199eff41f4f03d,feb,2021,heat,hold,Indianapolis,657.295077,660.624680,658.715153,20.0,False,False,False
854,ffe9fd927f4ddb01a78133e42f733d9cd8d7e3b7,feb,2021,heat,hold,bloomington,677.958904,682.890411,679.356164,45.0,False,False,False
855,fff72394802492aab6d3dd869857f7649c963f97,feb,2021,heat,hold,Winfield,678.128761,677.563779,675.968712,5.0,False,False,False
856,fffc5ebbd2df6f94be7f5d7d05cc8420c0bce101,feb,2021,heat,hold,Indianapolis,686.454545,691.939394,690.909091,50.0,False,False,False


In [67]:
IN_feb.to_csv("Scraper_Output/State_Month_Day/IN/IN_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/IN-day/2017-jun-day-IN.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9fbdcd8a249de70319327cf5a5dcb4ae9c9386ba,2017-06-17 14:10:00 UTC,cool,hold,683,680,680,IN,Nappanee,90,False,False,False,Gas
1,ca5488bc356dfc339dc77a0c5bee892e75f72fc6,2017-06-20 10:50:00 UTC,cool,hold,741,750,750,IN,Bloomington,36,False,False,False,Gas
2,bce5f6c23632b28ecf4d997cec22c36b8fc90caf,2017-06-18 13:45:00 UTC,cool,hold,716,715,715,IN,Indianapolis,70,False,False,False,Gas
3,5901b564ce611257caf4d3948d278ec7f222c633,2017-06-03 17:25:00 UTC,auto,hold,721,715,645,IN,Indianapolis,105,False,False,False,Gas
4,441d7fd4fbc554ca9f7f5545743b45596fafe4b2,2017-06-25 11:35:00 UTC,auto,hold,703,700,650,IN,La Porte,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
348265,ff55b1803002bafa70710e2c5d8a255fa9b5e3fb,2017-06-30 09:35:00 UTC,auto,auto,722,720,670,IN,Decatur,0,True,False,False,Gas
348266,ff55b1803002bafa70710e2c5d8a255fa9b5e3fb,2017-06-25 09:40:00 UTC,auto,auto,710,720,670,IN,Decatur,0,True,False,False,Gas
348267,ff55b1803002bafa70710e2c5d8a255fa9b5e3fb,2017-06-25 13:35:00 UTC,auto,auto,724,720,670,IN,Decatur,0,True,False,False,Gas
348268,ff55b1803002bafa70710e2c5d8a255fa9b5e3fb,2017-06-18 16:35:00 UTC,auto,auto,717,720,670,IN,Decatur,0,True,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/IN/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/IN-day/2018-jun-day-IN.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,339bbaaaae2db389d0f976af339d4268d26091ef,2018-06-15 13:35:00 UTC,cool,hold,666,657,657,IN,Greenwood,15,False,False,False,Gas
4,99a60090d18db1246c79bab42336b519366fff63,2018-06-12 10:30:00 UTC,cool,hold,720,719,719,IN,Zionsville,5,False,False,False,Gas
5,146ce6d52780e7576d68de5154e6a237e387b4e1,2018-06-30 17:40:00 UTC,cool,auto,746,750,655,IN,Indianapolis,0,False,False,False,Gas
6,71b0a5d3a50cfb7bf3a4dccc26ea84d71a9f07a5,2018-06-07 19:40:00 UTC,cool,hold,735,725,725,IN,Lawrenceburg,0,True,False,True,Electric
7,039909bdfbda1c62898ef54624a44987206967ca,2018-06-12 15:20:00 UTC,cool,hold,723,735,735,IN,Avon,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
838279,d5f0609095ca2f15ca55f43ba382997a87568316,2018-06-20 19:50:00 UTC,cool,hold,764,760,760,IN,Evansville,30,False,False,False,Gas
838280,c01ddec6d8678b9390cd735c3b3637804375e152,2018-06-07 19:45:00 UTC,cool,auto,764,760,760,IN,Bloomington,25,False,False,False,Gas
838281,09b3cf838accc845d708d7a2fe8ea9cb451e0401,2018-06-01 17:45:00 UTC,cool,auto,767,760,760,IN,Greenwood,26,False,False,False,Gas
838282,951cde9b93a5c1897830683e4c8223b7f28e316a,2018-06-18 11:45:00 UTC,cool,hold,763,760,760,IN,Rensselaer,10,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/IN/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/IN-day/2019-jun-day-IN.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cccef510e04cf9c29c091555dfab6f7fb51c910a,2019-06-15 12:30:00 UTC,auto,auto,693,692,642,IN,Valparaiso,0,False,False,False,Gas
2,a9e707d25f7936d949063346ab4a1c6c88c4b4af,2019-06-29 13:25:00 UTC,cool,hold,725,741,741,IN,Fort Wayne,0,True,False,False,Gas
3,6fdd52afb0a82490075186ba01fefbe702d40c5d,2019-06-23 19:50:00 UTC,cool,auto,761,750,722,IN,Indianapolis,50,False,False,False,Gas
4,5632fcb6fb088f681525883bb89c93bd0f5052ac,2019-06-29 12:35:00 UTC,cool,hold,716,713,713,IN,West Lafayette,10,False,False,False,Gas
5,cb26b8f6914f1b7a1cfdbe1f79dbbfd52f691e07,2019-06-13 13:45:00 UTC,cool,hold,688,723,723,IN,Newburgh,8,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1185141,d5f0609095ca2f15ca55f43ba382997a87568316,2019-06-19 19:45:00 UTC,cool,hold,759,760,760,IN,Evansville,30,False,False,False,Gas
1185142,951cde9b93a5c1897830683e4c8223b7f28e316a,2019-06-20 15:55:00 UTC,cool,hold,716,760,760,IN,Rensselaer,10,False,False,False,Gas
1185143,817867d43f43f58d2107c0eae5f2628310dddaed,2019-06-10 13:45:00 UTC,cool,hold,719,790,760,IN,Greenwood,30,True,False,False,Gas
1185144,89453fca9e62d2f8b5999245a638365638fa1349,2019-06-15 17:30:00 UTC,cool,hold,721,760,760,IN,Valparaiso,20,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/IN/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/IN-day/2020-jun-day-IN.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e51a84cc61f396d4e8c6d85a286a15394b33c2ab,2020-06-08 15:35:00 UTC,auto,hold,699,687,637,IN,Bremen,30,False,False,False,Gas
1,ba37d64b2a9fdb87743277736257108bcd904160,2020-06-26 14:30:00 UTC,cool,hold,729,722,722,IN,Fort Wayne,20,False,False,False,Gas
2,d0d103ff680f836ce701ec12413119259c6565a3,2020-06-19 17:20:00 UTC,cool,hold,709,704,704,IN,Fort Wayne,20,True,False,False,Gas
3,ae87071619e60dbbbb36b4bb240988d470855800,2020-06-10 13:35:00 UTC,cool,hold,727,780,738,IN,Michigan City,30,True,False,False,Gas
4,8a94c1da3d340f79b231cfb5269b365ebc5af108,2020-06-12 19:00:00 UTC,auto,hold,742,788,693,IN,Indianapolis,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1329282,9a9bbb1d6c6663a5c9a9f21de76ccf9b685ece35,2020-06-14 16:30:00 UTC,cool,hold,681,760,760,IN,Indianapolis,17,True,False,False,Gas
1329283,532338f329f9a07b70c6c2e65912221fceafd6b5,2020-06-21 16:25:00 UTC,cool,hold,755,760,760,IN,Valparaiso,45,False,False,False,Gas
1329284,a3d60736f779913e1ad00ea2749eb24a58a1bc6a,2020-06-03 17:55:00 UTC,cool,auto,762,760,760,IN,Westfield,15,False,False,False,Gas
1329285,dccf56642b0ec33289a7beb072c6c2ef3c357f1b,2020-06-28 11:40:00 UTC,cool,hold,752,760,760,IN,Valparaiso,40,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/IN/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/IN-day/2021-jun-day-IN.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,de8d75687c766438263ac61d7b30b6abcd397d81,2021-06-22 10:00:00 UTC,cool,hold,712,722,722,IN,Muncie,27,False,False,False,Gas
1,602319fd835a2c15c66be8ae71a4a6aab8750e2d,2021-06-29 18:50:00 UTC,cool,hold,735,737,737,IN,Carmel,0,False,False,False,Gas
2,5b060e3571074766c182e910fc9a18d97064e0e4,2021-06-13 18:40:00 UTC,auto,hold,737,738,688,IN,Greenwood,10,False,False,False,Gas
4,c6e1b51512487ff0f655c86b5564a0c77bc9e586,2021-06-12 09:35:00 UTC,auto,hold,690,695,645,IN,Plainfield,0,False,False,False,Gas
5,3a6a79bd5c8c2c340eab93c1e8d5b9cb889b8e88,2021-06-08 17:45:00 UTC,auto,hold,748,737,717,IN,Anderson,75,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
791674,5ed8fa4c9548027b00bf9369acf666855a29c80c,2021-06-08 18:35:00 UTC,cool,hold,766,760,760,IN,South Bend,67,False,False,False,Gas
791675,f00bf587c74d07e7b99633309ea6627c00e65861,2021-06-13 18:10:00 UTC,cool,hold,710,710,760,IN,Whitestown,5,True,False,False,Gas
791676,f458b2007efc45ccb383cf3ca38c68b3c0d385ac,2021-06-30 15:30:00 UTC,cool,hold,724,760,760,IN,Fishers,20,False,False,False,Gas
791677,4398ee514cc794e00274f116172b72d524829fa3,2021-06-20 19:15:00 UTC,cool,hold,760,760,760,IN,Cedar Lake,15,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/IN/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IN/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IN_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IN/jun/" + file)
    IN_jun = pd.concat([IN_jun, df])
    
IN_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00a57ff87df6c0a6293c5d88b27fcf5f48a0ad28,jun,2017,cool,auto,Fort Wayne,720.996350,721.081204,719.429745,20.0,False,False,True
1,00a57ff87df6c0a6293c5d88b27fcf5f48a0ad28,jun,2017,cool,hold,Fort Wayne,716.069638,717.647168,717.639740,20.0,False,False,True
2,00a57ff87df6c0a6293c5d88b27fcf5f48a0ad28,jun,2017,heat,auto,Fort Wayne,719.809524,720.000000,720.000000,20.0,False,False,True
3,00d57801ec029dad25ea93d564eea9c33eed2183,jun,2017,cool,auto,Zionsville,681.722662,684.709984,680.995246,80.0,True,False,False
4,00d57801ec029dad25ea93d564eea9c33eed2183,jun,2017,cool,hold,Zionsville,681.397647,686.463529,686.463529,80.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1079,ffde7caa5dbcff7cb0ac96677bef056c0596e742,jun,2021,cool,hold,West Harrison,741.526316,740.000000,740.000000,0.0,True,False,True
1080,ffe9fd927f4ddb01a78133e42f733d9cd8d7e3b7,jun,2021,cool,hold,bloomington,758.171296,747.314815,745.902778,45.0,False,False,False
1081,fff72394802492aab6d3dd869857f7649c963f97,jun,2021,cool,hold,Winfield,700.591045,701.213134,701.213134,5.0,False,False,False
1082,fffc5ebbd2df6f94be7f5d7d05cc8420c0bce101,jun,2021,cool,hold,Indianapolis,698.438903,696.541147,696.254364,50.0,False,False,False


In [100]:
IN_jun.to_csv("Scraper_Output/State_Month_Day/IN/IN_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/IN-day/2017-jul-day-IN.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d11758c84e5b33d7203038acb7ce43aa4eb84b20,2017-07-23 16:00:00 UTC,cool,auto,766,760,700,IN,Noblesville,15,False,False,True,Electric
1,dbfd15e0a838aa196074a46eaf357af3b25e421a,2017-07-09 18:50:00 UTC,cool,auto,730,730,710,IN,Indianapolis,0,False,False,False,Gas
2,540f02fc30ccf5c2a4e0fdaf6b2c027614b0679d,2017-07-20 15:55:00 UTC,cool,hold,699,720,720,IN,Zionsville,15,False,False,False,Gas
3,cd0123025114dc107818cf1d946bdb1fdc7a8d60,2017-07-29 18:55:00 UTC,cool,hold,715,710,710,IN,Carmel,0,False,False,False,Gas
4,b2364550d572bf025a19829b41d00a16393b117d,2017-07-25 17:15:00 UTC,auto,hold,716,715,655,IN,Indianapolis,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
418311,fd422daa7e07f1177631835ac89d465e48b7b1b3,2017-07-05 10:30:00 UTC,cool,auto,680,680,700,IN,Jeffersonville,60,False,False,False,Gas
418312,fd422daa7e07f1177631835ac89d465e48b7b1b3,2017-07-31 17:35:00 UTC,cool,hold,726,720,720,IN,Jeffersonville,60,False,False,False,Gas
418313,fd422daa7e07f1177631835ac89d465e48b7b1b3,2017-07-04 17:40:00 UTC,cool,hold,713,700,700,IN,Jeffersonville,60,False,False,False,Gas
418314,fd422daa7e07f1177631835ac89d465e48b7b1b3,2017-07-20 19:00:00 UTC,cool,hold,704,700,700,IN,Jeffersonville,60,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/IN/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/IN-day/2018-jul-day-IN.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,1a5a5fa45ac3435cbd52f9858653367413f77c57,2018-07-02 17:15:00 UTC,auto,auto,758,755,705,IN,Fort Wayne,17,False,False,False,Gas
2,c96f01a2b6ed5338be2c74967a4f55c0fb98f979,2018-07-16 17:00:00 UTC,cool,hold,703,699,699,IN,Fort Wayne,15,False,False,False,Gas
3,0b7052541e2c265a9dc449514cc9de7319e527b2,2018-07-22 18:40:00 UTC,cool,hold,713,707,707,IN,Zionsville,0,True,False,False,Gas
4,a9c84319a2b4c3a0efebdceef3a61d523ad43fc3,2018-07-15 15:15:00 UTC,auto,hold,733,728,651,IN,Carmel,10,False,False,False,Gas
5,c96f01a2b6ed5338be2c74967a4f55c0fb98f979,2018-07-08 11:20:00 UTC,cool,hold,707,705,705,IN,Fort Wayne,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
942310,ee78fbf53cd711c2eadef8057c5507608546ba3f,2018-07-01 17:30:00 UTC,cool,auto,767,760,760,IN,Schererville,0,False,False,False,Gas
942311,5776353788108b3f0e82b75983d9001cee85442e,2018-07-10 12:20:00 UTC,cool,hold,712,760,760,IN,Indianapolis,10,False,False,False,Gas
942312,d5f0609095ca2f15ca55f43ba382997a87568316,2018-07-18 12:40:00 UTC,cool,hold,763,760,760,IN,Evansville,30,False,False,False,Gas
942313,7e31b64d8f0df32dde070d6076883b062922185e,2018-07-05 18:05:00 UTC,cool,auto,762,760,760,IN,Fishers,27,False,False,True,Electric


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/IN/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/IN-day/2019-jul-day-IN.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,033e2670927a37f3143d3c786d72c6104e003c5d,2019-07-17 12:45:00 UTC,auto,hold,719,737,651,IN,Bloomington,0,False,False,True,Electric
1,b6878fd09d04488a5cd8a0b6fef2d2d92f45bff6,2019-07-09 19:35:00 UTC,cool,hold,751,750,727,IN,Indianapolis,70,False,False,False,Gas
3,3a0d13a94e5166eb7e848d4dc3635fb9f529559d,2019-07-26 16:35:00 UTC,cool,auto,748,760,728,IN,Indianapolis,40,False,False,False,Gas
4,d0d103ff680f836ce701ec12413119259c6565a3,2019-07-24 18:20:00 UTC,cool,hold,705,701,701,IN,Fort Wayne,20,True,False,False,Gas
5,bb18d3ff9c8c733ad81f55580219b0bd0c0fe861,2019-07-20 17:40:00 UTC,cool,hold,707,702,702,IN,Nappanee,9,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1344760,b0f580c2cab8d749b71bd25ed8c81ed9874fffcb,2019-07-21 15:10:00 UTC,cool,auto,765,760,760,IN,Evansville,50,False,False,False,Gas
1344761,525e99addfb7fe972136a86740335857bab44c46,2019-07-11 13:30:00 UTC,cool,auto,761,760,760,IN,Indianapolis,10,True,False,True,Electric
1344762,4398ee514cc794e00274f116172b72d524829fa3,2019-07-13 13:45:00 UTC,cool,hold,768,760,760,IN,Cedar Lake,15,False,False,False,Gas
1344763,86a8704b2e4ffedd2e561ad2042efadafaa2fba5,2019-07-03 12:45:00 UTC,cool,auto,771,760,760,IN,Chesterton,15,True,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/IN/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/IN-day/2020-jul-day-IN.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,07fe3f12f75c2d69db61cd0281977bf4dcc2a78e,2020-07-22 12:20:00 UTC,auto,hold,685,685,635,IN,Bluffton,89,False,False,False,Gas
1,b358c6c9667dae6ae0e7b7c5f1201a01f007d8c5,2020-07-19 14:55:00 UTC,cool,auto,735,729,729,IN,Newburgh,40,True,False,False,Gas
2,de8d75687c766438263ac61d7b30b6abcd397d81,2020-07-15 17:55:00 UTC,auto,hold,729,732,682,IN,Muncie,27,False,False,False,Gas
3,2aa03b4dab067f085eadff34dbb974d942019809,2020-07-10 15:40:00 UTC,cool,hold,695,689,689,IN,Indianapolis,20,True,False,True,Electric
4,f5cd42ba81c7adb687b238c23358b33acf9ca3b7,2020-07-29 11:30:00 UTC,cool,auto,730,742,706,IN,South Bend,77,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1459519,a865abd76e2dfa35ffa0054ed6b680a3a3108b4f,2020-07-20 10:15:00 UTC,cool,auto,744,760,760,IN,Griffith,20,False,False,False,Gas
1459520,1295a37e9fd58f0a4817e0d9d88d230fff90fbb0,2020-07-15 13:00:00 UTC,cool,auto,742,740,760,IN,Granger,45,True,False,False,Gas
1459521,ebaa26bfbf25013750cac6aaff94acafcf9ca467,2020-07-10 18:25:00 UTC,cool,hold,759,760,760,IN,Indianapolis,0,False,False,False,Gas
1459522,0a62461ac15b45ccc6a2c25edbf56b83398d80fc,2020-07-01 15:05:00 UTC,cool,hold,763,760,760,IN,Granger,50,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/IN/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/IN-day/2021-jul-day-IN.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,34ffbc76cbe7cdfb090fdc7230332e11ee929055,2021-07-12 12:05:00 UTC,cool,hold,719,729,729,IN,Evansville,30,False,False,False,Gas
1,98fd6984d84855a1cf074fc93fd7450fa1fb655e,2021-07-07 15:25:00 UTC,auto,hold,727,722,682,IN,Evansville,30,True,False,True,Electric
2,c9f2efe86b5ecf65199956058dcb865973406812,2021-07-24 19:30:00 UTC,cool,hold,776,752,752,IN,Goshen,60,False,False,False,Gas
3,5632fcb6fb088f681525883bb89c93bd0f5052ac,2021-07-09 11:20:00 UTC,auto,hold,711,713,633,IN,West Lafayette,10,False,False,False,Gas
4,1f0db5a99d59b77296c38e5a019923125db90477,2021-07-01 13:55:00 UTC,cool,hold,721,718,718,IN,South Bend,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
773787,77d004c029f97876e63bcff76e91202f11d6abac,2021-07-05 17:25:00 UTC,cool,hold,762,760,760,IN,Fort Wayne,10,False,False,False,Gas
773788,076bee46685981455a52d6eed3b25d862393a6ed,2021-07-10 13:10:00 UTC,cool,hold,756,760,760,IN,Fort Wayne,40,False,False,False,Gas
773789,4398ee514cc794e00274f116172b72d524829fa3,2021-07-06 17:25:00 UTC,cool,hold,766,760,760,IN,Cedar Lake,15,False,False,False,Gas
773790,0a62461ac15b45ccc6a2c25edbf56b83398d80fc,2021-07-28 19:55:00 UTC,cool,hold,753,760,760,IN,Granger,50,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/IN/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IN/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IN_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IN/jul/" + file)
    IN_jul = pd.concat([IN_jul, df])
    
IN_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004a8ed2edb250a5a1c876d076227bbb8a2a2135,jul,2017,auto,auto,Fort Wayne,750.439850,750.437343,690.000000,30.0,False,False,False
1,004a8ed2edb250a5a1c876d076227bbb8a2a2135,jul,2017,auto,hold,Fort Wayne,759.377193,760.000000,710.000000,30.0,False,False,False
2,004a8ed2edb250a5a1c876d076227bbb8a2a2135,jul,2017,cool,hold,Fort Wayne,735.894737,735.000000,735.000000,30.0,False,False,False
3,00a57ff87df6c0a6293c5d88b27fcf5f48a0ad28,jul,2017,cool,auto,Fort Wayne,723.294297,722.699593,722.301426,20.0,False,False,True
4,00a57ff87df6c0a6293c5d88b27fcf5f48a0ad28,jul,2017,cool,hold,Fort Wayne,729.221500,729.632500,729.632500,20.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
930,ff723053bf8b5bbd687d4b73a2199eff41f4f03d,jul,2021,cool,hold,Indianapolis,665.237152,663.163365,663.163365,20.0,False,False,False
931,ffe9fd927f4ddb01a78133e42f733d9cd8d7e3b7,jul,2021,cool,hold,bloomington,772.516129,761.543011,759.634409,45.0,False,False,False
932,fff72394802492aab6d3dd869857f7649c963f97,jul,2021,cool,hold,Winfield,696.629612,697.874358,697.874358,5.0,False,False,False
933,fffc5ebbd2df6f94be7f5d7d05cc8420c0bce101,jul,2021,cool,hold,Indianapolis,698.644231,697.500000,697.500000,50.0,False,False,False


In [133]:
IN_jul.to_csv("Scraper_Output/State_Month_Day/IN/IN_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/IN-day/2017-aug-day-IN.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,013a339f52d32f0442774a796606a51a78855d21,2017-08-09 13:45:00 UTC,cool,hold,716,770,770,IN,Indianapolis,5,True,False,True,Electric
1,eb7497429713bcfe2964a7d75612d63291c0163a,2017-08-03 18:20:00 UTC,cool,auto,723,700,700,IN,Huntertown,120,False,False,False,Gas
2,a6aa8f13fc4dd78ff40d6adfa6ff0211fca977b4,2017-08-27 12:00:00 UTC,auto,hold,723,740,660,IN,Munster,25,False,False,False,Gas
3,27d9fd567f2e9e019dea4d5904eab54587d18281,2017-08-05 16:15:00 UTC,auto,hold,729,730,680,IN,Evansville,0,False,False,False,Gas
4,77519dfb5295142216ec80c32c8e7a962c78bf84,2017-08-31 17:45:00 UTC,auto,auto,731,730,670,IN,Carmel,20,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
425521,fd422daa7e07f1177631835ac89d465e48b7b1b3,2017-08-14 13:20:00 UTC,cool,auto,678,680,700,IN,Jeffersonville,60,False,False,False,Gas
425522,fd422daa7e07f1177631835ac89d465e48b7b1b3,2017-08-25 13:20:00 UTC,cool,hold,690,720,720,IN,Jeffersonville,60,False,False,False,Gas
425523,fd422daa7e07f1177631835ac89d465e48b7b1b3,2017-08-14 18:20:00 UTC,cool,auto,707,700,700,IN,Jeffersonville,60,False,False,False,Gas
425524,fd422daa7e07f1177631835ac89d465e48b7b1b3,2017-08-27 18:00:00 UTC,cool,hold,711,680,680,IN,Jeffersonville,60,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/IN/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/IN-day/2018-aug-day-IN.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a9e707d25f7936d949063346ab4a1c6c88c4b4af,2018-08-22 09:20:00 UTC,cool,hold,737,735,735,IN,Fort Wayne,0,True,False,False,Gas
1,5b060e3571074766c182e910fc9a18d97064e0e4,2018-08-28 16:30:00 UTC,cool,auto,745,740,685,IN,Greenwood,10,False,False,False,Gas
2,1ed64d648f1a0afc78e0fde4d7809896fef727b0,2018-08-20 18:25:00 UTC,cool,auto,749,762,691,IN,Whitestown,5,False,False,False,Gas
4,9ed4ba7e7f8cbbb8af3aa5efc46c8369ccf3b3d0,2018-08-16 17:35:00 UTC,cool,hold,742,758,738,IN,fort wayne,30,False,False,False,Gas
6,8a94c1da3d340f79b231cfb5269b365ebc5af108,2018-08-28 11:50:00 UTC,auto,hold,801,788,716,IN,Indianapolis,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
953352,8f76fe1984bdfcd5ead368b9128c25b71ccbcf92,2018-08-13 12:55:00 UTC,cool,hold,732,760,760,IN,Richmond,40,False,False,False,Gas
953353,9287ff2907bac49c1cd99facbbee7892c04fecb9,2018-08-17 12:55:00 UTC,cool,hold,760,760,760,IN,Battle Ground,0,False,False,False,Gas
953354,9287ff2907bac49c1cd99facbbee7892c04fecb9,2018-08-22 08:30:00 UTC,cool,hold,760,760,760,IN,Battle Ground,0,False,False,False,Gas
953355,7e31b64d8f0df32dde070d6076883b062922185e,2018-08-14 16:15:00 UTC,cool,auto,765,760,760,IN,Fishers,27,False,False,True,Electric


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/IN/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/IN-day/2019-aug-day-IN.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9288faf711271278efc173cd12711ff561a0126e,2019-08-11 19:00:00 UTC,cool,hold,731,725,725,IN,Westfield,30,False,False,False,Gas
2,bb18d3ff9c8c733ad81f55580219b0bd0c0fe861,2019-08-05 19:05:00 UTC,cool,hold,725,722,722,IN,Nappanee,9,False,False,False,Gas
3,4676a8329e1bb47ea14de03b4bef19f9bec12211,2019-08-24 18:15:00 UTC,auto,hold,676,675,655,IN,Sellersburg,0,False,False,False,Gas
4,9288faf711271278efc173cd12711ff561a0126e,2019-08-10 10:35:00 UTC,cool,hold,720,725,725,IN,Westfield,30,False,False,False,Gas
5,c4bb7d7fe1f718e1404e19fc5cc8cd0dde686460,2019-08-15 11:45:00 UTC,cool,hold,698,721,721,IN,South Bend,90,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1274928,d66550ff14fe131e654791a886585aa4c7ce9052,2019-08-19 07:55:00 UTC,cool,hold,747,760,760,IN,Indianapolis,0,False,False,False,Gas
1274929,ee78fbf53cd711c2eadef8057c5507608546ba3f,2019-08-18 15:10:00 UTC,cool,hold,750,760,760,IN,Schererville,0,False,False,False,Gas
1274930,8f76fe1984bdfcd5ead368b9128c25b71ccbcf92,2019-08-30 16:25:00 UTC,cool,hold,735,760,760,IN,Richmond,40,False,False,False,Gas
1274931,39315322f9c3719caeb90c7472c16ae7310d4219,2019-08-22 10:55:00 UTC,cool,hold,760,760,760,IN,Bremen,20,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/IN/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/IN-day/2020-aug-day-IN.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f99f0e7de56500195ff75112a07ee66b48ef8b57,2020-08-20 11:10:00 UTC,cool,auto,682,712,712,IN,Indianapolis,0,True,False,True,Electric
1,f1669867b84648ebb1088894424711e7c74daad1,2020-08-16 19:55:00 UTC,cool,hold,685,679,679,IN,Richmond,90,False,False,False,Gas
2,bb18d3ff9c8c733ad81f55580219b0bd0c0fe861,2020-08-27 12:15:00 UTC,cool,hold,703,702,702,IN,Nappanee,9,False,False,False,Gas
3,e84ffffb1b5bc1a349afcf0f127f2d5202748f73,2020-08-11 10:40:00 UTC,cool,hold,775,800,800,IN,Indianapolis,0,False,False,False,Gas
4,2d6fc1a8b26dfabde018e43186ddd277622ce9b0,2020-08-24 13:00:00 UTC,auto,hold,743,737,667,IN,Grabill,0,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1358593,ec7382a3d0917b6ac7507f5290726a69832186cc,2020-08-11 17:45:00 UTC,cool,hold,759,760,760,IN,Leo-Cedarville,10,False,False,False,Gas
1358594,3fd55d464a3dcc0a275a0a3d28a92a272c5fba0c,2020-08-29 10:10:00 UTC,cool,hold,764,760,760,IN,Goshen,10,False,False,False,Gas
1358595,532338f329f9a07b70c6c2e65912221fceafd6b5,2020-08-25 16:05:00 UTC,cool,hold,765,760,760,IN,Valparaiso,45,False,False,False,Gas
1358596,5f39fac1e94e2f5224668c03d02058a6b2560b22,2020-08-19 14:40:00 UTC,cool,hold,751,760,760,IN,Albany,48,True,False,True,Electric


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/IN/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IN/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IN_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IN/aug/" + file)
    IN_aug = pd.concat([IN_aug, df])
    
IN_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004a8ed2edb250a5a1c876d076227bbb8a2a2135,aug,2017,auto,auto,Fort Wayne,761.965779,769.365019,690.000000,30.0,False,False,False
1,004a8ed2edb250a5a1c876d076227bbb8a2a2135,aug,2017,auto,hold,Fort Wayne,740.676423,740.736585,682.585366,30.0,False,False,False
2,00a57ff87df6c0a6293c5d88b27fcf5f48a0ad28,aug,2017,cool,auto,Fort Wayne,726.444151,728.952507,726.135444,20.0,False,False,True
3,00a57ff87df6c0a6293c5d88b27fcf5f48a0ad28,aug,2017,cool,hold,Fort Wayne,724.605003,726.361838,726.357766,20.0,False,False,True
4,00a57ff87df6c0a6293c5d88b27fcf5f48a0ad28,aug,2017,heat,auto,Fort Wayne,717.776119,730.000000,730.000000,20.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1683,fff72394802492aab6d3dd869857f7649c963f97,aug,2020,cool,auto,Winfield,699.788489,699.398921,699.398921,5.0,False,False,False
1684,fff72394802492aab6d3dd869857f7649c963f97,aug,2020,cool,hold,Winfield,695.465452,694.909548,694.909548,5.0,False,False,False
1685,fffc5ebbd2df6f94be7f5d7d05cc8420c0bce101,aug,2020,cool,auto,Indianapolis,695.460000,690.120000,689.960000,50.0,False,False,False
1686,fffc5ebbd2df6f94be7f5d7d05cc8420c0bce101,aug,2020,cool,hold,Indianapolis,705.229167,705.796875,705.307292,50.0,False,False,False


In [160]:
IN_aug.to_csv("Scraper_Output/State_Month_Day/IN/IN_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/IN-day/2017-dec-day-IN.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,951cde9b93a5c1897830683e4c8223b7f28e316a,2017-12-27 17:55:00 UTC,heat,hold,675,680,680,IN,Rensselaer,10,False,False,False,Gas
1,3dbaae1b62f1d5bbc8f2ff88bdf429ada4e4ce03,2017-12-09 14:20:00 UTC,auto,auto,693,745,695,IN,Indianapolis,20,False,False,False,Gas
2,f458b2007efc45ccb383cf3ca38c68b3c0d385ac,2017-12-07 15:25:00 UTC,heat,hold,699,720,700,IN,Fishers,20,False,False,False,Gas
3,6f31d3d883b605fcf03a38d6073deba6c7d45e08,2017-12-02 12:20:00 UTC,heat,auto,699,700,700,IN,Bloomington,85,False,False,False,Gas
4,34ffbc76cbe7cdfb090fdc7230332e11ee929055,2017-12-23 16:40:00 UTC,heat,hold,698,700,700,IN,Evansville,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
608492,f41dd2c4c83940672dd763a7ebde385ce436d379,2017-12-01 16:35:00 UTC,auto,hold,727,750,700,IN,New Albany,20,False,False,True,Electric
608493,f41dd2c4c83940672dd763a7ebde385ce436d379,2017-12-23 13:25:00 UTC,heat,auto,705,710,710,IN,New Albany,20,False,False,True,Electric
608494,f41dd2c4c83940672dd763a7ebde385ce436d379,2017-12-02 07:20:00 UTC,auto,hold,697,750,700,IN,New Albany,20,False,False,True,Electric
608495,f41dd2c4c83940672dd763a7ebde385ce436d379,2017-12-30 19:15:00 UTC,heat,auto,714,710,710,IN,New Albany,20,False,False,True,Electric


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/IN/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/IN-day/2018-dec-day-IN.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cc5d22eefe728524e137017c824bc1dab3d87ec4,2018-12-20 14:20:00 UTC,auto,hold,743,795,745,IN,Fort Wayne,0,False,False,False,Gas
1,abd40c2b49ecf23539c84b7d8d8a6f1d7fd8adfa,2018-12-14 17:35:00 UTC,heat,hold,740,758,711,IN,Eaton,5,True,False,True,Electric
2,b30f06a288ed4858674728875661c1b116fd84dd,2018-12-01 18:40:00 UTC,auto,hold,737,840,740,IN,Fishers,5,False,False,False,Gas
3,982697f0fd9cbc3f813fd893426cc8d4e272d942,2018-12-20 12:55:00 UTC,auto,auto,715,810,720,IN,Indianapolis,20,False,False,False,Gas
4,7dde514d09c312462ba4297181752939846d4a86,2018-12-15 15:55:00 UTC,heat,auto,707,779,720,IN,Syracuse,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1070594,c6e1b51512487ff0f655c86b5564a0c77bc9e586,2018-12-02 13:05:00 UTC,auto,hold,691,760,690,IN,Plainfield,0,False,False,False,Gas
1070595,52ebec8b33f5b0c11b2b4f538a01f91f909b9848,2018-12-15 19:45:00 UTC,heat,auto,660,760,660,IN,Santa Claus,20,False,False,False,Gas
1070596,ad8f48da6e28bcb13f89d9c1e24d3667d7f1203b,2018-12-11 13:55:00 UTC,auto,hold,710,760,710,IN,Indianapolis,27,False,False,False,Gas
1070597,2cac9c3596fd15a7fd44309d7120452ec2f34b49,2018-12-11 13:00:00 UTC,auto,hold,683,760,670,IN,Indianapolis,15,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/IN/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/IN-day/2019-dec-day-IN.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3c6b5a0dd7e877061d4eeef21f178a0233b1ad30,2019-12-12 13:25:00 UTC,heat,hold,711,721,721,IN,Lewisville,90,True,False,False,Gas
1,27d0e3fbc30cea007899ef996d1dda524256e64f,2019-12-07 11:40:00 UTC,auto,auto,666,840,670,IN,Greenwood,5,True,False,False,Gas
2,0f1ec5236db8da5190e65242f8bd75d7a2e02d31,2019-12-08 13:10:00 UTC,heat,auto,626,630,630,IN,Fishers,0,False,False,False,Gas
3,50d2fb8de9e6f913dcc3608dd01091ab9c114c59,2019-12-02 13:20:00 UTC,heat,hold,702,702,702,IN,Newburgh,26,False,False,False,Gas
4,84eb73ba4f76934cf8119a959be7f91386cba9e2,2019-12-29 15:00:00 UTC,auto,hold,703,755,705,IN,Brownsburg,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1263137,77f071e99632300bdf0377df73a0101b60731677,2019-12-15 17:35:00 UTC,heat,auto,738,765,740,IN,Beech Grove,55,False,False,False,Gas
1263138,77f071e99632300bdf0377df73a0101b60731677,2019-12-25 16:30:00 UTC,heat,auto,704,765,670,IN,Beech Grove,55,False,False,False,Gas
1263139,b3718b7641dc0b52a7cc002ac519e2963062301b,2019-12-23 18:30:00 UTC,heat,hold,768,765,765,IN,Anderson,0,False,False,True,Electric
1263140,483a841cb8a6c5d8b9bd711e0d178aeca0ab6d33,2019-12-25 12:40:00 UTC,heat,auto,717,765,740,IN,Clayton,0,False,False,True,Electric


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/IN/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/IN-day/2020-dec-day-IN.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,03206870da9d3e8eb1804d07265573b9286b876a,2020-12-01 19:50:00 UTC,auto,hold,707,785,695,IN,Garrett,25,False,False,False,Gas
1,7fc58d00ffb499a75dba0bddcdf223dead763576,2020-12-07 12:45:00 UTC,heat,hold,687,679,670,IN,Indpls,60,False,False,False,Gas
2,bb18d3ff9c8c733ad81f55580219b0bd0c0fe861,2020-12-03 19:45:00 UTC,auto,hold,661,712,662,IN,Nappanee,9,False,False,False,Gas
3,5a464284ea40b14beaeba7f1fa602f8774fd5214,2020-12-08 19:35:00 UTC,heat,hold,665,665,665,IN,Noblesville,90,False,False,False,Gas
4,b4ca5e681111ecef34223e04df4685b3e033c36a,2020-12-31 18:35:00 UTC,auto,hold,677,820,680,IN,Angola,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1116972,468b1e32c681024fee4a0ce79715f1b068214bad,2020-12-14 12:10:00 UTC,auto,hold,680,765,685,IN,Indianapolis,50,True,False,False,Gas
1116973,468b1e32c681024fee4a0ce79715f1b068214bad,2020-12-21 19:40:00 UTC,auto,hold,689,765,695,IN,Indianapolis,50,True,False,False,Gas
1116974,d8e6f50f514de8a9e2657f0a88dec6ade2d0452d,2020-12-25 18:05:00 UTC,auto,hold,717,765,715,IN,Noblesville,10,False,False,True,Electric
1116975,468b1e32c681024fee4a0ce79715f1b068214bad,2020-12-29 13:10:00 UTC,auto,hold,698,765,705,IN,Indianapolis,50,True,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/IN/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/IN/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IN_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/IN/dec/" + file)
    IN_dec = pd.concat([IN_dec, df])
    
IN_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004a8ed2edb250a5a1c876d076227bbb8a2a2135,dec,2017,auto,auto,Fort Wayne,721.715596,782.119266,716.577982,30.0,False,False,False
1,004a8ed2edb250a5a1c876d076227bbb8a2a2135,dec,2017,auto,hold,Fort Wayne,723.000000,770.000000,699.000000,30.0,False,False,False
2,00a57ff87df6c0a6293c5d88b27fcf5f48a0ad28,dec,2017,heat,hold,Fort Wayne,705.299020,725.000000,725.000000,20.0,False,False,True
3,00d57801ec029dad25ea93d564eea9c33eed2183,dec,2017,auto,hold,Zionsville,642.675000,695.000000,645.000000,80.0,True,False,False
4,00d57801ec029dad25ea93d564eea9c33eed2183,dec,2017,cool,auto,Zionsville,668.120000,686.760000,691.000000,80.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1660,ffe9fd927f4ddb01a78133e42f733d9cd8d7e3b7,dec,2020,heat,hold,bloomington,683.185654,685.303797,683.575949,45.0,False,False,False
1661,fff72394802492aab6d3dd869857f7649c963f97,dec,2020,heat,auto,Winfield,678.232704,675.311321,675.311321,5.0,False,False,False
1662,fff72394802492aab6d3dd869857f7649c963f97,dec,2020,heat,hold,Winfield,687.229134,682.914961,682.914961,5.0,False,False,False
1663,fffe8c5282c544d600624426acd2335d1588b903,dec,2020,auto,auto,Plainfield,692.230769,754.692308,699.000000,0.0,False,False,False


In [187]:
IN_dec.to_csv("Scraper_Output/State_Month_Day/IN/IN_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/IN/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
IN_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/IN/" + file)
    IN_all = pd.concat([IN_all, df])
    
IN_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004a8ed2edb250a5a1c876d076227bbb8a2a2135,aug,2017,auto,auto,Fort Wayne,761.965779,769.365019,690.000000,30.0,False,False,False
1,004a8ed2edb250a5a1c876d076227bbb8a2a2135,aug,2017,auto,hold,Fort Wayne,740.676423,740.736585,682.585366,30.0,False,False,False
2,00a57ff87df6c0a6293c5d88b27fcf5f48a0ad28,aug,2017,cool,auto,Fort Wayne,726.444151,728.952507,726.135444,20.0,False,False,True
3,00a57ff87df6c0a6293c5d88b27fcf5f48a0ad28,aug,2017,cool,hold,Fort Wayne,724.605003,726.361838,726.357766,20.0,False,False,True
4,00a57ff87df6c0a6293c5d88b27fcf5f48a0ad28,aug,2017,heat,auto,Fort Wayne,717.776119,730.000000,730.000000,20.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6920,ffde7caa5dbcff7cb0ac96677bef056c0596e742,jun,2021,cool,hold,West Harrison,741.526316,740.000000,740.000000,0.0,True,False,True
6921,ffe9fd927f4ddb01a78133e42f733d9cd8d7e3b7,jun,2021,cool,hold,bloomington,758.171296,747.314815,745.902778,45.0,False,False,False
6922,fff72394802492aab6d3dd869857f7649c963f97,jun,2021,cool,hold,Winfield,700.591045,701.213134,701.213134,5.0,False,False,False
6923,fffc5ebbd2df6f94be7f5d7d05cc8420c0bce101,jun,2021,cool,hold,Indianapolis,698.438903,696.541147,696.254364,50.0,False,False,False


In [190]:
IN_all.to_csv("Scraper_Output/State_Month_Day/IN_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mINe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['IN']
Unique jan_2018: ['IN']
Unique jan_2019: ['IN']
Unique jan_2020: ['IN']
Unique jan_2021: ['IN']
Unique feb_2017: ['IN']
Unique feb_2018: ['IN']
Unique feb_2019: ['IN']
Unique feb_2020: ['IN']
Unique feb_2021: ['IN']
Unique jun_2017: ['IN']
Unique jun_2018: ['IN']
Unique jun_2019: ['IN']
Unique jun_2020: ['IN']
Unique jun_2021: ['IN']
Unique jul_2017: ['IN']
Unique jul_2018: ['IN']
Unique jul_2019: ['IN']
Unique jul_2020: ['IN']
Unique jul_2021: ['IN']
Unique aug_2017: ['IN']
Unique aug_2018: ['IN']
Unique aug_2019: ['IN']
Unique aug_2020: ['IN']
Unique dec_2017: ['IN']
Unique dec_2018: ['IN']
Unique dec_2019: ['IN']
Unique dec_2020: ['IN']
