# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/NE-day/2017-jan-day-NE.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,d78e9843002347d8166cbb8d4dd6b7907605bf29,2017-01-20 19:10:00 UTC,heat,hold,712,737,709,NE,Lincoln,25,False,False,False,Gas
2,46bf2a77cde604c97c4aa1f291d453ed4178676e,2017-01-07 19:35:00 UTC,auto,auto,709,762,692,NE,Lincoln,10,False,False,True,Electric
3,484f2ca2f908af5160afd910c8349433da3afc81,2017-01-08 16:30:00 UTC,heat,hold,648,655,652,NE,Hickman,0,True,False,True,Electric
5,5182408f94362ec1153ac7567db8bdc5cb2f1aea,2017-01-28 16:30:00 UTC,heat,hold,673,673,663,NE,Kearney,35,False,False,False,Gas
6,4675e2b2038d8f9f67bb9217ca824d0727b629e1,2017-01-24 18:30:00 UTC,auxHeatOnly,hold,709,719,719,NE,Omaha,56,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39093,89d1333ab2a46ce0b691b9b804c9dd61e64748e1,2017-01-27 14:50:00 UTC,auto,auto,701,760,700,NE,Lincoln,35,False,False,False,Gas
39094,89d1333ab2a46ce0b691b9b804c9dd61e64748e1,2017-01-21 18:50:00 UTC,auto,auto,714,760,700,NE,Lincoln,35,False,False,False,Gas
39095,89d1333ab2a46ce0b691b9b804c9dd61e64748e1,2017-01-27 16:50:00 UTC,auto,auto,697,760,700,NE,Lincoln,35,False,False,False,Gas
39096,c1c075cfc90c4712fe95f5afb28d980c6c427631,2017-01-10 17:00:00 UTC,heat,auto,712,760,710,NE,Lincoln,35,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
05320d509a18b2d4022029409e7a267736e78e0b,Jan,2017,heat,auto,Lincoln,702.837209,696.116279,694.720930,90.0,False,False,False
05320d509a18b2d4022029409e7a267736e78e0b,Jan,2017,heat,hold,Lincoln,681.258824,656.300000,652.817647,90.0,False,False,False
0d779ed5bbc93a32506824e72f1b051e9980a0cf,Jan,2017,heat,auto,Blair,697.016129,708.387097,707.508065,35.0,False,False,False
0d779ed5bbc93a32506824e72f1b051e9980a0cf,Jan,2017,heat,hold,Blair,697.666667,703.833333,703.833333,35.0,False,False,False
139fad3ed64a1a20e1def36f6af543b56e705d97,Jan,2017,heat,hold,Papillion,659.891892,660.312500,659.981419,5.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fa41ecba2896263b0616f05220abba8af9cd378d,Jan,2017,heat,auto,Geneva,717.517241,720.000000,720.000000,100.0,False,False,False
fa41ecba2896263b0616f05220abba8af9cd378d,Jan,2017,heat,hold,Geneva,709.848315,712.758427,711.342697,100.0,False,False,False
faa85f28edb3a9a38ac6130d63b1c3f892466036,Jan,2017,heat,hold,Omaha,697.808824,700.000000,690.000000,55.0,False,False,False
fc9679ea396d94ad5b502ffedbca140b30ec008d,Jan,2017,heat,auto,Bennington,715.714286,719.857143,719.357143,10.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/NE/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/NE-day/2018-jan-day-NE.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0f5333f7a040e9e6da0179fda8302f89aefcbcbd,2018-01-20 15:05:00 UTC,heat,auto,677,787,642,NE,Lincoln,50,False,False,False,Gas
1,1985472cdc3f8d3bf064a68f444737c05c724ea8,2018-01-05 19:10:00 UTC,auto,hold,710,785,715,NE,Omaha,10,False,False,False,Gas
3,1985472cdc3f8d3bf064a68f444737c05c724ea8,2018-01-11 17:35:00 UTC,auto,hold,700,785,705,NE,Omaha,10,False,False,False,Gas
4,6c71018df5d7d3ec67c41d961ba0a0bbdd3a9e19,2018-01-06 14:20:00 UTC,heat,hold,664,683,683,NE,Lincoln,25,False,False,True,Electric
5,1985472cdc3f8d3bf064a68f444737c05c724ea8,2018-01-09 16:50:00 UTC,auto,hold,722,775,725,NE,Omaha,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123067,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-01-20 12:45:00 UTC,heat,hold,718,760,760,NE,Omaha,0,True,False,True,Electric
123068,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-01-10 16:50:00 UTC,heat,hold,755,760,760,NE,Omaha,0,True,False,True,Electric
123069,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-01-10 19:00:00 UTC,heat,hold,758,760,760,NE,Omaha,0,True,False,True,Electric
123070,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-01-10 16:35:00 UTC,heat,hold,755,760,760,NE,Omaha,0,True,False,True,Electric


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/NE/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/NE-day/2019-jan-day-NE.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,02674609e156f6807ee78e1c9ac2851c16854804,2019-01-10 19:20:00 UTC,heat,hold,675,685,685,NE,Omaha,50,False,False,True,Electric
1,d78e9843002347d8166cbb8d4dd6b7907605bf29,2019-01-30 19:40:00 UTC,heat,hold,719,721,721,NE,Lincoln,25,False,False,False,Gas
2,02674609e156f6807ee78e1c9ac2851c16854804,2019-01-09 17:25:00 UTC,heat,hold,684,685,685,NE,Omaha,50,False,False,True,Electric
3,c039f7f15fbd5d98b80efba2631acb48bef7d066,2019-01-05 14:45:00 UTC,auto,hold,712,775,715,NE,Bennington,0,False,False,False,Gas
4,3b46790ec167583ec99f9fcc4d1aa248ecd4fa77,2019-01-17 14:20:00 UTC,heat,auto,716,727,720,NE,Gretna,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205298,f15f623c199dc4f547c63da94db6260301a8cc81,2019-01-03 19:45:00 UTC,heat,auto,686,760,690,NE,Springfield,0,False,False,True,Electric
205299,f15f623c199dc4f547c63da94db6260301a8cc81,2019-01-03 19:55:00 UTC,heat,auto,684,760,690,NE,Springfield,0,False,False,True,Electric
205300,f15f623c199dc4f547c63da94db6260301a8cc81,2019-01-06 16:50:00 UTC,heat,auto,688,760,690,NE,Springfield,0,False,False,True,Electric
205301,f15f623c199dc4f547c63da94db6260301a8cc81,2019-01-04 19:45:00 UTC,heat,auto,686,760,690,NE,Springfield,0,False,False,True,Electric


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/NE/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/NE-day/2020-jan-day-NE.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c039f7f15fbd5d98b80efba2631acb48bef7d066,2020-01-08 13:35:00 UTC,auto,hold,699,765,705,NE,Bennington,0,False,False,False,Gas
1,197f6257e48f1d14dffab296917ddf2c3090264b,2020-01-13 16:20:00 UTC,auto,hold,702,765,715,NE,Omaha,15,False,False,False,Gas
2,c039f7f15fbd5d98b80efba2631acb48bef7d066,2020-01-02 18:25:00 UTC,auto,hold,704,765,705,NE,Bennington,0,False,False,False,Gas
3,670cb331c3d05d1b746e7f968c11986ee3a15a93,2020-01-18 10:35:00 UTC,heat,hold,717,719,719,NE,Omaha,40,True,False,False,Gas
4,124aefa02526f136375928b0c92df70f356d26b9,2020-01-18 13:35:00 UTC,heat,hold,663,675,675,NE,Omaha,50,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
217318,e50f2e1393d5251e0586c8b339bca1f26e3430e6,2020-01-01 17:45:00 UTC,auto,hold,698,760,700,NE,Lincoln,27,False,False,False,Gas
217319,e50f2e1393d5251e0586c8b339bca1f26e3430e6,2020-01-01 13:55:00 UTC,auto,hold,698,760,700,NE,Lincoln,27,False,False,False,Gas
217320,e50f2e1393d5251e0586c8b339bca1f26e3430e6,2020-01-02 15:10:00 UTC,auto,hold,700,760,700,NE,Lincoln,27,False,False,False,Gas
217321,e50f2e1393d5251e0586c8b339bca1f26e3430e6,2020-01-07 16:05:00 UTC,auto,hold,694,760,700,NE,Lincoln,27,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/NE/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/NE-day/2021-jan-day-NE.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4b62cface5b5ca77d4a85a0ce65e7a0af360e9d1,2021-01-04 19:25:00 UTC,heat,hold,711,722,722,NE,Omaha,0,False,False,False,Gas
1,a6f5c07aac86f2cc5e78dc0165a33a5edec68476,2021-01-11 17:15:00 UTC,heat,hold,705,743,700,NE,Omaha,10,False,False,False,Gas
2,ae207699741703a86828265b6556382c300adb71,2021-01-27 16:15:00 UTC,auto,hold,718,773,723,NE,Lincoln,95,False,False,False,Gas
3,2504df671d8f2abfb4e209289ba629963f2db33b,2021-01-17 18:35:00 UTC,heat,hold,753,752,752,NE,Lincoln,0,False,False,False,Gas
4,6a4878bb967916a664ed4d25a33d2d8c92342a59,2021-01-30 13:30:00 UTC,heat,hold,689,696,696,NE,HAMPTON,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
136171,b89130febe4af29347ca1be608faf2350540a600,2021-01-16 15:20:00 UTC,heat,hold,757,760,760,NE,North Platte,15,True,False,False,Gas
136172,b89130febe4af29347ca1be608faf2350540a600,2021-01-16 19:10:00 UTC,heat,hold,761,760,760,NE,North Platte,15,True,False,False,Gas
136173,b89130febe4af29347ca1be608faf2350540a600,2021-01-16 19:40:00 UTC,heat,hold,760,760,760,NE,North Platte,15,True,False,False,Gas
136174,b89130febe4af29347ca1be608faf2350540a600,2021-01-16 17:15:00 UTC,heat,hold,757,760,760,NE,North Platte,15,True,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/NE/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NE/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NE_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NE/jan/" + file)
    NE_jan = pd.concat([NE_jan, df])
    
NE_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,05320d509a18b2d4022029409e7a267736e78e0b,Jan,2017,heat,auto,Lincoln,702.837209,696.116279,694.720930,90.0,False,False,False
1,05320d509a18b2d4022029409e7a267736e78e0b,Jan,2017,heat,hold,Lincoln,681.258824,656.300000,652.817647,90.0,False,False,False
2,0d779ed5bbc93a32506824e72f1b051e9980a0cf,Jan,2017,heat,auto,Blair,697.016129,708.387097,707.508065,35.0,False,False,False
3,0d779ed5bbc93a32506824e72f1b051e9980a0cf,Jan,2017,heat,hold,Blair,697.666667,703.833333,703.833333,35.0,False,False,False
4,139fad3ed64a1a20e1def36f6af543b56e705d97,Jan,2017,heat,hold,Papillion,659.891892,660.312500,659.981419,5.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,fa41ecba2896263b0616f05220abba8af9cd378d,Jan,2021,heat,hold,Geneva,704.378238,709.984456,709.253886,100.0,False,False,False
199,faa85f28edb3a9a38ac6130d63b1c3f892466036,Jan,2021,heat,hold,Omaha,704.473267,712.470297,705.557426,55.0,False,False,False
200,faa8c435b84d052676a86ba7d2d6a783d26e6b19,Jan,2021,heat,hold,Omaha,690.762376,693.009901,692.514851,20.0,True,False,False
201,faf3b76e8b0193bcf0256358efb4e593b669a1c7,Jan,2021,heat,hold,Hickman,704.577465,710.084507,709.971831,0.0,False,False,True


In [34]:
NE_jan.to_csv("Scraper_Output/State_Month_Day/NE/NE_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/NE-day/2017-feb-day-NE.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bbcd2609414c9fbb9e754b8f0f1939cb2a3c47aa,2017-02-10 18:20:00 UTC,auto,hold,737,765,735,NE,Lincoln,10,True,False,True,Electric
1,bbcd2609414c9fbb9e754b8f0f1939cb2a3c47aa,2017-02-10 14:45:00 UTC,auto,hold,722,765,735,NE,Lincoln,10,True,False,True,Electric
2,7cfa84db6a98d9989c00390bf61914b066cab4b8,2017-02-19 13:45:00 UTC,heat,hold,700,685,685,NE,Omaha,50,False,False,False,Gas
3,bbcd2609414c9fbb9e754b8f0f1939cb2a3c47aa,2017-02-14 14:10:00 UTC,auto,hold,755,775,755,NE,Lincoln,10,True,False,True,Electric
4,bbcd2609414c9fbb9e754b8f0f1939cb2a3c47aa,2017-02-10 14:15:00 UTC,auto,hold,721,765,735,NE,Lincoln,10,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43111,89d1333ab2a46ce0b691b9b804c9dd61e64748e1,2017-02-09 17:55:00 UTC,auto,auto,696,760,700,NE,Lincoln,35,False,False,False,Gas
43112,89d1333ab2a46ce0b691b9b804c9dd61e64748e1,2017-02-11 14:35:00 UTC,auto,auto,697,760,700,NE,Lincoln,35,False,False,False,Gas
43113,8da5c11cefc4000111b5a018878a8c11a229f5f3,2017-02-06 17:35:00 UTC,auto,hold,687,760,690,NE,Omaha,30,False,False,True,Electric
43114,9e191515cec0f18d94d1c2fdeadb8f02695f27e7,2017-02-19 15:20:00 UTC,auto,hold,689,760,670,NE,Omaha,0,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/NE/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/NE-day/2018-feb-day-NE.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6c71018df5d7d3ec67c41d961ba0a0bbdd3a9e19,2018-02-13 15:15:00 UTC,heat,hold,661,667,667,NE,Lincoln,25,False,False,True,Electric
1,d0142ffe7ec8913522cd169fed649d6122f3c49c,2018-02-27 18:10:00 UTC,auto,auto,707,765,715,NE,Lincoln,35,False,False,False,Gas
2,deb5a406ef9cf79c602c1597ec8090cbf2e11913,2018-02-23 16:25:00 UTC,heat,hold,712,718,708,NE,Lincoln,20,False,False,False,Gas
3,2504df671d8f2abfb4e209289ba629963f2db33b,2018-02-14 15:10:00 UTC,auto,hold,736,830,740,NE,Lincoln,0,False,False,False,Gas
5,d0040049f0105d39f159341c12d1f054719fb53f,2018-02-19 18:20:00 UTC,heat,hold,675,744,674,NE,Elkhorn,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120959,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-02-13 17:20:00 UTC,heat,hold,759,760,760,NE,Omaha,0,True,False,True,Electric
120960,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-02-13 19:40:00 UTC,heat,hold,759,760,760,NE,Omaha,0,True,False,True,Electric
120961,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-02-13 14:05:00 UTC,heat,hold,742,760,760,NE,Omaha,0,True,False,True,Electric
120962,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-02-13 14:50:00 UTC,heat,hold,759,760,760,NE,Omaha,0,True,False,True,Electric


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/NE/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/NE-day/2019-feb-day-NE.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c039f7f15fbd5d98b80efba2631acb48bef7d066,2019-02-16 13:55:00 UTC,auto,hold,721,775,725,NE,Bennington,0,False,False,False,Gas
3,6c71018df5d7d3ec67c41d961ba0a0bbdd3a9e19,2019-02-10 17:05:00 UTC,heat,hold,653,659,659,NE,Lincoln,25,False,False,True,Electric
4,a26d25cdd2e63d4ce7c8754f7b9479ca92e16561,2019-02-02 18:35:00 UTC,heat,auto,662,672,668,NE,Lincoln,15,False,False,False,Gas
5,3b46790ec167583ec99f9fcc4d1aa248ecd4fa77,2019-02-13 14:00:00 UTC,heat,hold,718,717,717,NE,Gretna,0,False,False,False,Gas
7,3b46790ec167583ec99f9fcc4d1aa248ecd4fa77,2019-02-04 14:25:00 UTC,heat,hold,714,717,717,NE,Gretna,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
144587,a879115cd52da5e02d3f452a5dd21eea4b5dafe9,2019-02-20 18:35:00 UTC,heat,auto,759,760,760,NE,Minatare,20,False,False,False,Gas
144588,a879115cd52da5e02d3f452a5dd21eea4b5dafe9,2019-02-26 14:35:00 UTC,heat,auto,758,760,760,NE,Minatare,20,False,False,False,Gas
144589,a879115cd52da5e02d3f452a5dd21eea4b5dafe9,2019-02-26 18:25:00 UTC,heat,auto,760,760,760,NE,Minatare,20,False,False,False,Gas
144590,a879115cd52da5e02d3f452a5dd21eea4b5dafe9,2019-02-26 14:50:00 UTC,heat,auto,752,760,760,NE,Minatare,20,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/NE/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/NE-day/2020-feb-day-NE.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8148e47f455d15aa74eb84f220afc14d7f182b10,2020-02-22 16:25:00 UTC,heat,hold,690,685,685,NE,Omaha,5,False,False,False,Gas
1,124aefa02526f136375928b0c92df70f356d26b9,2020-02-22 19:20:00 UTC,heat,hold,659,655,655,NE,Omaha,50,True,False,True,Electric
2,2504df671d8f2abfb4e209289ba629963f2db33b,2020-02-04 14:35:00 UTC,heat,auto,750,775,750,NE,Lincoln,0,False,False,False,Gas
3,c039f7f15fbd5d98b80efba2631acb48bef7d066,2020-02-09 11:30:00 UTC,auto,hold,717,775,725,NE,Bennington,0,False,False,False,Gas
4,0eb988a29c001ee5efd375446de17a6888c90e4c,2020-02-15 18:45:00 UTC,heat,hold,661,640,640,NE,Omaha,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199262,dc8b24b927b1c859f3d316b2c07c14a69a26181b,2020-02-14 19:50:00 UTC,auto,hold,673,760,670,NE,Bennington,5,False,False,False,Gas
199263,dc8b24b927b1c859f3d316b2c07c14a69a26181b,2020-02-21 18:20:00 UTC,auto,auto,693,760,670,NE,Bennington,5,False,False,False,Gas
199264,dc8b24b927b1c859f3d316b2c07c14a69a26181b,2020-02-21 19:00:00 UTC,auto,auto,692,760,670,NE,Bennington,5,False,False,False,Gas
199265,dc8b24b927b1c859f3d316b2c07c14a69a26181b,2020-02-01 14:30:00 UTC,auto,hold,681,760,680,NE,Bennington,5,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/NE/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/NE-day/2021-feb-day-NE.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,ae207699741703a86828265b6556382c300adb71,2021-02-03 16:00:00 UTC,auto,hold,721,776,726,NE,Lincoln,95,False,False,False,Gas
2,d3ad6d46c43f95c6fbf6e250eeb4696d6a95200d,2021-02-28 17:00:00 UTC,heat,hold,653,664,640,NE,North Platte,60,False,False,False,Gas
3,e0b9a0d802f823a1cf6a2106c4ac75c05d08b31e,2021-02-21 17:05:00 UTC,auto,hold,674,742,672,NE,Lincoln,40,False,False,True,Electric
4,2504df671d8f2abfb4e209289ba629963f2db33b,2021-02-07 17:50:00 UTC,heat,hold,738,743,743,NE,Lincoln,0,False,False,False,Gas
5,2504df671d8f2abfb4e209289ba629963f2db33b,2021-02-04 16:40:00 UTC,heat,hold,748,743,743,NE,Lincoln,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130162,86c502d93e7747d5ce109c6610f9e71a7e2a7b1b,2021-02-03 17:55:00 UTC,heat,hold,756,760,760,NE,Bellevue,58,False,False,False,Gas
130163,86c502d93e7747d5ce109c6610f9e71a7e2a7b1b,2021-02-03 15:55:00 UTC,heat,hold,760,760,760,NE,Bellevue,58,False,False,False,Gas
130164,86c502d93e7747d5ce109c6610f9e71a7e2a7b1b,2021-02-03 18:00:00 UTC,heat,hold,759,760,760,NE,Bellevue,58,False,False,False,Gas
130165,86c502d93e7747d5ce109c6610f9e71a7e2a7b1b,2021-02-03 16:15:00 UTC,heat,hold,754,760,760,NE,Bellevue,58,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/NE/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NE/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NE_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NE/feb/" + file)
    NE_feb = pd.concat([NE_feb, df])
    
NE_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,05320d509a18b2d4022029409e7a267736e78e0b,feb,2017,heat,auto,Lincoln,706.320000,721.960000,718.640000,90.0,False,False,False
1,05320d509a18b2d4022029409e7a267736e78e0b,feb,2017,heat,hold,Lincoln,707.606195,690.053097,686.526549,90.0,False,False,False
2,0d779ed5bbc93a32506824e72f1b051e9980a0cf,feb,2017,heat,auto,Blair,708.043478,714.782609,714.782609,35.0,False,False,False
3,10484576cdf6fa0ce377a1110f267866bcfea53d,feb,2017,heat,hold,Omaha,704.510204,677.448980,677.448980,0.0,False,False,False
4,1368c1b32f1bf0e6653e05e1c3cc7a00b8ac0384,feb,2017,cool,hold,Papillion,650.524590,650.065574,630.065574,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
208,f9efefb7392498029db8a74ea5f6a2f93d20fa47,feb,2021,heat,hold,Bellevue,682.800000,691.200000,689.240000,25.0,False,False,False
209,faa85f28edb3a9a38ac6130d63b1c3f892466036,feb,2021,heat,hold,Omaha,701.371681,711.000000,707.723451,55.0,False,False,False
210,faa8c435b84d052676a86ba7d2d6a783d26e6b19,feb,2021,heat,hold,Omaha,692.013514,693.756757,693.391892,20.0,True,False,False
211,faf3b76e8b0193bcf0256358efb4e593b669a1c7,feb,2021,auxHeatOnly,hold,Hickman,706.114286,710.114286,709.971429,0.0,False,False,True


In [67]:
NE_feb.to_csv("Scraper_Output/State_Month_Day/NE/NE_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/NE-day/2017-jun-day-NE.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ec27fedc2a3af90b5453dc4df107b5f19ab4cc9c,2017-06-01 15:30:00 UTC,cool,hold,729,731,731,NE,Syracuse,55,False,False,True,Electric
6,e98b28d1f1c335a8494b1c1e7367c8d02464a2eb,2017-06-02 12:25:00 UTC,auto,auto,744,774,694,NE,Lincoln,0,False,False,False,Gas
7,56b6390b43d3384d08426091f5f5d9e4726167d0,2017-06-16 11:25:00 UTC,auto,auto,775,781,680,NE,Blair,25,False,False,False,Gas
8,56b6390b43d3384d08426091f5f5d9e4726167d0,2017-06-11 11:30:00 UTC,auto,auto,773,781,671,NE,Blair,25,False,False,False,Gas
9,1985472cdc3f8d3bf064a68f444737c05c724ea8,2017-06-08 18:00:00 UTC,auto,hold,748,775,695,NE,Omaha,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84167,dcd591783d60cc5ebb4cab197cde6801d1d2b70f,2017-06-05 18:40:00 UTC,cool,hold,724,760,760,NE,Lincoln,77,False,False,False,Gas
84168,dcd591783d60cc5ebb4cab197cde6801d1d2b70f,2017-06-05 18:45:00 UTC,cool,hold,724,760,760,NE,Lincoln,77,False,False,False,Gas
84169,dcd591783d60cc5ebb4cab197cde6801d1d2b70f,2017-06-05 18:15:00 UTC,cool,hold,722,760,760,NE,Lincoln,77,False,False,False,Gas
84170,dcd591783d60cc5ebb4cab197cde6801d1d2b70f,2017-06-05 19:15:00 UTC,cool,hold,728,760,760,NE,Lincoln,77,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/NE/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/NE-day/2018-jun-day-NE.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1cde6ae2ccdf910bb7fd2bd26dac007a23614191,2018-06-17 13:30:00 UTC,auto,hold,731,732,676,NE,Lincoln,10,False,False,False,Gas
1,50f777071abb7b1664d734531f57de6ce9007794,2018-06-23 19:15:00 UTC,cool,hold,728,729,729,NE,Omaha,30,False,False,False,Gas
3,a152c3918557f196745694cf50f718c79cef91f8,2018-06-12 14:05:00 UTC,auto,auto,736,733,683,NE,Grand Island,40,False,False,False,Gas
4,3fbd860c5b0aa1d896ef522174171b57b8c4c44b,2018-06-10 16:25:00 UTC,cool,hold,697,697,697,NE,Lincoln,70,False,False,False,Gas
5,a152c3918557f196745694cf50f718c79cef91f8,2018-06-16 15:30:00 UTC,auto,auto,735,733,683,NE,Grand Island,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
185700,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-06-17 18:40:00 UTC,cool,hold,749,760,760,NE,Omaha,0,True,False,True,Electric
185701,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-06-17 17:30:00 UTC,cool,hold,741,760,760,NE,Omaha,0,True,False,True,Electric
185702,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-06-16 18:30:00 UTC,cool,hold,743,760,760,NE,Omaha,0,True,False,True,Electric
185703,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-06-17 19:05:00 UTC,cool,hold,755,760,760,NE,Omaha,0,True,False,True,Electric


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/NE/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/NE-day/2019-jun-day-NE.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a8f8fef9a85bf3d53a6f15cbf33ded8fc0130f27,2019-06-08 18:35:00 UTC,auto,auto,721,726,676,NE,Omaha,90,True,False,True,Electric
1,50f777071abb7b1664d734531f57de6ce9007794,2019-06-10 15:00:00 UTC,cool,hold,733,732,732,NE,Omaha,30,False,False,False,Gas
2,0a7b8a3fc74f19b5d4a368c58557be1f3ca053f7,2019-06-12 14:10:00 UTC,cool,auto,680,830,792,NE,Lincoln,69,True,False,False,Gas
3,50f777071abb7b1664d734531f57de6ce9007794,2019-06-11 19:05:00 UTC,cool,hold,731,732,732,NE,Omaha,30,False,False,False,Gas
4,50f777071abb7b1664d734531f57de6ce9007794,2019-06-23 16:10:00 UTC,cool,hold,713,706,706,NE,Omaha,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235789,6c2daa77787fdb0d7171782aeefd25a0b112e5f0,2019-06-08 12:40:00 UTC,auto,auto,703,730,620,NE,Plattsmouth,10,True,False,False,Gas
235790,6c2daa77787fdb0d7171782aeefd25a0b112e5f0,2019-06-08 11:40:00 UTC,auto,auto,704,730,620,NE,Plattsmouth,10,True,False,False,Gas
235791,6c2daa77787fdb0d7171782aeefd25a0b112e5f0,2019-06-08 12:00:00 UTC,auto,auto,702,730,620,NE,Plattsmouth,10,True,False,False,Gas
235792,6c2daa77787fdb0d7171782aeefd25a0b112e5f0,2019-06-08 13:50:00 UTC,auto,auto,702,730,620,NE,Plattsmouth,10,True,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/NE/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/NE-day/2020-jun-day-NE.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6c2daa77787fdb0d7171782aeefd25a0b112e5f0,2020-06-28 15:35:00 UTC,cool,auto,669,670,670,NE,Plattsmouth,10,True,False,False,Gas
1,6c2daa77787fdb0d7171782aeefd25a0b112e5f0,2020-06-08 19:35:00 UTC,cool,auto,671,670,670,NE,Plattsmouth,10,True,False,False,Gas
2,6c2daa77787fdb0d7171782aeefd25a0b112e5f0,2020-06-08 14:40:00 UTC,cool,auto,676,670,670,NE,Plattsmouth,10,True,False,False,Gas
3,6c2daa77787fdb0d7171782aeefd25a0b112e5f0,2020-06-20 13:15:00 UTC,cool,auto,660,670,670,NE,Plattsmouth,10,True,False,False,Gas
4,6c2daa77787fdb0d7171782aeefd25a0b112e5f0,2020-06-20 19:35:00 UTC,cool,auto,673,670,670,NE,Plattsmouth,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
246185,6d43e20c2c1846a3448c6d954942cdbf6b9fc622,2020-06-21 09:45:00 UTC,cool,hold,730,730,730,NE,Papillion,20,True,False,False,Gas
246186,6d43e20c2c1846a3448c6d954942cdbf6b9fc622,2020-06-22 09:10:00 UTC,cool,hold,732,730,730,NE,Papillion,20,True,False,False,Gas
246187,6d43e20c2c1846a3448c6d954942cdbf6b9fc622,2020-06-03 18:55:00 UTC,auto,auto,736,730,610,NE,Papillion,20,True,False,False,Gas
246188,6d43e20c2c1846a3448c6d954942cdbf6b9fc622,2020-06-03 19:10:00 UTC,auto,auto,731,730,610,NE,Papillion,20,True,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/NE/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/NE-day/2021-jun-day-NE.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,399c9c9f011895083e1e3063640a4b8313daad01,2021-06-27 18:25:00 UTC,auto,hold,744,744,664,NE,Fremont,10,False,False,True,Electric
1,770532d678d67213ddc1339014f8df35b27aa31a,2021-06-14 19:45:00 UTC,auto,hold,689,685,635,NE,Plattsmouth,10,True,False,False,Gas
2,e1c436fad40c0745964d68bfa66687da305d5b4f,2021-06-05 14:45:00 UTC,cool,hold,702,696,696,NE,Omaha,10,False,False,False,Gas
3,e1c436fad40c0745964d68bfa66687da305d5b4f,2021-06-20 19:45:00 UTC,cool,hold,708,706,706,NE,Omaha,10,False,False,False,Gas
4,2504df671d8f2abfb4e209289ba629963f2db33b,2021-06-14 16:35:00 UTC,cool,hold,770,788,788,NE,Lincoln,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
141243,dcd591783d60cc5ebb4cab197cde6801d1d2b70f,2021-06-13 14:50:00 UTC,cool,hold,747,760,760,NE,Lincoln,77,False,False,False,Gas
141244,dcd591783d60cc5ebb4cab197cde6801d1d2b70f,2021-06-12 15:30:00 UTC,cool,hold,738,760,760,NE,Lincoln,77,False,False,False,Gas
141245,dcd591783d60cc5ebb4cab197cde6801d1d2b70f,2021-06-12 17:20:00 UTC,cool,hold,741,760,760,NE,Lincoln,77,False,False,False,Gas
141246,dcd591783d60cc5ebb4cab197cde6801d1d2b70f,2021-06-13 14:05:00 UTC,cool,hold,745,760,760,NE,Lincoln,77,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/NE/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NE/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NE_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NE/jun/" + file)
    NE_jun = pd.concat([NE_jun, df])
    
NE_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0087ac1d7540ff5efa6ae20213627943a2b07fe8,jun,2017,auto,hold,Omaha,736.500000,719.500000,693.500000,35.0,False,False,False
1,0087ac1d7540ff5efa6ae20213627943a2b07fe8,jun,2017,cool,hold,Omaha,753.615385,706.923077,706.923077,35.0,False,False,False
2,033afd975854035a4b0ab320768f094d2b716859,jun,2017,auto,hold,La Vista,704.320000,748.000000,650.000000,0.0,False,False,False
3,05320d509a18b2d4022029409e7a267736e78e0b,jun,2017,cool,auto,Lincoln,732.890625,718.718750,700.531250,90.0,False,False,False
4,05320d509a18b2d4022029409e7a267736e78e0b,jun,2017,cool,hold,Lincoln,732.180180,736.938739,736.282883,90.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
219,faa85f28edb3a9a38ac6130d63b1c3f892466036,jun,2021,cool,hold,Omaha,727.871901,741.438017,712.714876,55.0,False,False,False
220,faa8c435b84d052676a86ba7d2d6a783d26e6b19,jun,2021,auto,hold,Omaha,737.666667,700.000000,680.000000,20.0,True,False,False
221,faa8c435b84d052676a86ba7d2d6a783d26e6b19,jun,2021,cool,hold,Omaha,705.419355,701.072581,701.072581,20.0,True,False,False
222,faf3b76e8b0193bcf0256358efb4e593b669a1c7,jun,2021,auto,hold,Hickman,746.884615,716.000000,666.000000,0.0,False,False,True


In [100]:
NE_jun.to_csv("Scraper_Output/State_Month_Day/NE/NE_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/NE-day/2017-jul-day-NE.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,e6c7a7449be199e76934eb1df4e97a561d4ad874,2017-07-30 14:15:00 UTC,cool,hold,785,840,790,NE,Nebraska City,85,False,False,False,Gas
2,399c9c9f011895083e1e3063640a4b8313daad01,2017-07-15 18:00:00 UTC,auto,hold,715,713,663,NE,Fremont,10,False,False,True,Electric
3,e5020a56db9e45e942254216808f1607f90aac53,2017-07-02 18:20:00 UTC,auto,hold,704,718,670,NE,Bennington,15,False,False,False,Gas
4,e6c7a7449be199e76934eb1df4e97a561d4ad874,2017-07-01 14:30:00 UTC,cool,hold,747,830,790,NE,Nebraska City,85,False,False,False,Gas
5,1985472cdc3f8d3bf064a68f444737c05c724ea8,2017-07-05 17:15:00 UTC,auto,hold,751,765,715,NE,Omaha,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99169,dc8b24b927b1c859f3d316b2c07c14a69a26181b,2017-07-15 18:50:00 UTC,auto,hold,760,760,630,NE,Bennington,5,False,False,False,Gas
99170,dc8b24b927b1c859f3d316b2c07c14a69a26181b,2017-07-12 18:50:00 UTC,auto,hold,745,760,630,NE,Bennington,5,False,False,False,Gas
99171,dc8b24b927b1c859f3d316b2c07c14a69a26181b,2017-07-13 12:30:00 UTC,auto,hold,736,760,630,NE,Bennington,5,False,False,False,Gas
99172,dc8b24b927b1c859f3d316b2c07c14a69a26181b,2017-07-14 12:45:00 UTC,auto,hold,747,760,630,NE,Bennington,5,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/NE/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/NE-day/2018-jul-day-NE.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6c71018df5d7d3ec67c41d961ba0a0bbdd3a9e19,2018-07-25 14:35:00 UTC,cool,hold,688,687,687,NE,Lincoln,25,False,False,True,Electric
1,6c71018df5d7d3ec67c41d961ba0a0bbdd3a9e19,2018-07-30 13:50:00 UTC,cool,hold,684,687,687,NE,Lincoln,25,False,False,True,Electric
2,a152c3918557f196745694cf50f718c79cef91f8,2018-07-08 19:10:00 UTC,auto,hold,732,733,683,NE,Grand Island,40,False,False,False,Gas
5,24557ca6d177aa3b5a71f9110b182dc2dc436af0,2018-07-31 19:25:00 UTC,cool,hold,766,775,775,NE,Omaha,10,False,False,True,Electric
6,6c71018df5d7d3ec67c41d961ba0a0bbdd3a9e19,2018-07-29 17:20:00 UTC,cool,hold,686,685,685,NE,Lincoln,25,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
208588,c665a959dfb6cabd5665004bf9a1fc6afef9282e,2018-07-29 16:55:00 UTC,cool,hold,737,760,760,NE,Bellevue,30,False,False,False,Gas
208589,c665a959dfb6cabd5665004bf9a1fc6afef9282e,2018-07-29 16:10:00 UTC,cool,hold,733,760,760,NE,Bellevue,30,False,False,False,Gas
208590,c665a959dfb6cabd5665004bf9a1fc6afef9282e,2018-07-29 15:10:00 UTC,cool,hold,730,760,760,NE,Bellevue,30,False,False,False,Gas
208591,c665a959dfb6cabd5665004bf9a1fc6afef9282e,2018-07-29 16:20:00 UTC,cool,hold,733,760,760,NE,Bellevue,30,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/NE/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/NE-day/2019-jul-day-NE.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,50f777071abb7b1664d734531f57de6ce9007794,2019-07-15 12:35:00 UTC,cool,hold,680,742,742,NE,Omaha,30,False,False,False,Gas
1,2fdc63565f7136c15970a1012e3042ff5cef182f,2019-07-29 15:20:00 UTC,auto,auto,741,737,687,NE,Omaha,0,False,False,False,Gas
2,01dbf65be10c41aa3c116d6c0a0a9a8e63658bab,2019-07-01 19:40:00 UTC,auto,auto,742,741,640,NE,Omaha,0,False,False,True,Electric
3,2504df671d8f2abfb4e209289ba629963f2db33b,2019-07-04 17:15:00 UTC,cool,hold,730,728,728,NE,Lincoln,0,False,False,False,Gas
4,2504df671d8f2abfb4e209289ba629963f2db33b,2019-07-04 18:05:00 UTC,cool,hold,730,728,728,NE,Lincoln,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
246820,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2019-07-22 14:40:00 UTC,cool,auto,705,760,760,NE,Omaha,0,True,False,True,Electric
246821,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2019-07-30 17:00:00 UTC,cool,hold,750,760,760,NE,Omaha,0,True,False,True,Electric
246822,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2019-07-22 16:15:00 UTC,cool,auto,708,760,760,NE,Omaha,0,True,False,True,Electric
246823,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2019-07-30 19:55:00 UTC,cool,hold,765,760,760,NE,Omaha,0,True,False,True,Electric


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/NE/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/NE-day/2020-jul-day-NE.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ae207699741703a86828265b6556382c300adb71,2020-07-08 18:45:00 UTC,cool,hold,738,736,736,NE,Lincoln,95,False,False,False,Gas
1,c0893a724ad626c64e22f059c4a1e81afa8396fc,2020-07-14 11:40:00 UTC,auto,auto,713,714,664,NE,Omaha,80,False,False,False,Gas
2,24557ca6d177aa3b5a71f9110b182dc2dc436af0,2020-07-12 11:50:00 UTC,cool,hold,776,795,790,NE,Omaha,10,False,False,True,Electric
3,24557ca6d177aa3b5a71f9110b182dc2dc436af0,2020-07-15 11:30:00 UTC,cool,hold,755,795,790,NE,Omaha,10,False,False,True,Electric
4,c0893a724ad626c64e22f059c4a1e81afa8396fc,2020-07-21 16:30:00 UTC,auto,auto,720,714,664,NE,Omaha,80,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245558,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2020-07-14 09:55:00 UTC,cool,hold,761,760,760,NE,Omaha,0,True,False,True,Electric
245559,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2020-07-30 18:55:00 UTC,cool,hold,750,760,760,NE,Omaha,0,True,False,True,Electric
245560,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2020-07-14 12:50:00 UTC,cool,hold,761,760,760,NE,Omaha,0,True,False,True,Electric
245561,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2020-07-30 18:15:00 UTC,cool,hold,746,760,760,NE,Omaha,0,True,False,True,Electric


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/NE/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/NE-day/2021-jul-day-NE.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cf6446120e29e89c9aa494dff46a008db0ee948b,2021-07-06 12:35:00 UTC,cool,hold,751,752,752,NE,Bellevue,55,False,False,False,Gas
1,d0040049f0105d39f159341c12d1f054719fb53f,2021-07-25 11:50:00 UTC,cool,hold,664,665,665,NE,Elkhorn,0,False,False,False,Gas
2,c039f7f15fbd5d98b80efba2631acb48bef7d066,2021-07-14 11:15:00 UTC,auto,hold,702,703,653,NE,Bennington,0,False,False,False,Gas
3,faf3b76e8b0193bcf0256358efb4e593b669a1c7,2021-07-14 11:45:00 UTC,cool,hold,780,776,730,NE,Hickman,0,False,False,True,Electric
4,faf3b76e8b0193bcf0256358efb4e593b669a1c7,2021-07-27 16:50:00 UTC,cool,hold,746,746,730,NE,Hickman,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145926,c1c075cfc90c4712fe95f5afb28d980c6c427631,2021-07-05 08:10:00 UTC,cool,hold,782,760,760,NE,Lincoln,35,False,False,False,Gas
145927,c1c075cfc90c4712fe95f5afb28d980c6c427631,2021-07-04 17:50:00 UTC,cool,hold,787,760,760,NE,Lincoln,35,False,False,False,Gas
145928,c1c075cfc90c4712fe95f5afb28d980c6c427631,2021-07-04 19:45:00 UTC,cool,hold,772,760,760,NE,Lincoln,35,False,False,False,Gas
145929,c1c075cfc90c4712fe95f5afb28d980c6c427631,2021-07-03 17:15:00 UTC,cool,hold,775,760,760,NE,Lincoln,35,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/NE/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NE/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NE_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NE/jul/" + file)
    NE_jul = pd.concat([NE_jul, df])
    
NE_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,033afd975854035a4b0ab320768f094d2b716859,jul,2017,auto,hold,La Vista,673.833333,670.041667,620.000000,0.0,False,False,False
1,05320d509a18b2d4022029409e7a267736e78e0b,jul,2017,cool,auto,Lincoln,734.333333,755.000000,692.000000,90.0,False,False,False
2,05320d509a18b2d4022029409e7a267736e78e0b,jul,2017,cool,hold,Lincoln,729.119512,734.965854,734.034146,90.0,False,False,False
3,0ad90268dcfa7e7aa1d92768a5d17e1841ceaf17,jul,2017,cool,hold,McCool Junction,759.255230,768.619247,768.573222,15.0,True,False,True
4,0d779ed5bbc93a32506824e72f1b051e9980a0cf,jul,2017,cool,auto,Blair,769.466667,760.000000,690.000000,35.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
193,fa41ecba2896263b0616f05220abba8af9cd378d,jul,2021,cool,hold,Geneva,744.916667,740.375000,738.583333,100.0,False,False,False
194,faa85f28edb3a9a38ac6130d63b1c3f892466036,jul,2021,cool,hold,Omaha,717.452055,687.735160,685.063927,55.0,False,False,False
195,faa8c435b84d052676a86ba7d2d6a783d26e6b19,jul,2021,cool,hold,Omaha,695.957746,691.957746,691.943662,20.0,True,False,False
196,faf3b76e8b0193bcf0256358efb4e593b669a1c7,jul,2021,cool,hold,Hickman,762.708908,763.499373,729.982434,0.0,False,False,True


In [133]:
NE_jul.to_csv("Scraper_Output/State_Month_Day/NE/NE_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/NE-day/2017-aug-day-NE.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,50f777071abb7b1664d734531f57de6ce9007794,2017-08-31 19:15:00 UTC,cool,hold,760,752,752,NE,Omaha,30,False,False,False,Gas
1,50f777071abb7b1664d734531f57de6ce9007794,2017-08-29 18:10:00 UTC,cool,hold,757,752,752,NE,Omaha,30,False,False,False,Gas
4,3bc5a1ea03c7caced8b4b1fa3d54e9ec8c3f951b,2017-08-12 13:10:00 UTC,cool,auto,740,840,700,NE,North Platte,30,False,False,False,Gas
5,50f777071abb7b1664d734531f57de6ce9007794,2017-08-31 15:20:00 UTC,cool,hold,741,742,742,NE,Omaha,30,False,False,False,Gas
7,50f777071abb7b1664d734531f57de6ce9007794,2017-08-15 16:10:00 UTC,cool,hold,730,731,731,NE,Omaha,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
84442,c11897bdaace2de8866bb11023b1f476b267fa27,2017-08-31 15:50:00 UTC,auto,hold,756,760,700,NE,Omaha,15,False,False,False,Gas
84443,c11897bdaace2de8866bb11023b1f476b267fa27,2017-08-30 18:35:00 UTC,auto,hold,764,760,700,NE,Omaha,15,False,False,False,Gas
84444,c11897bdaace2de8866bb11023b1f476b267fa27,2017-08-12 18:00:00 UTC,auto,hold,761,760,700,NE,Omaha,15,False,False,False,Gas
84445,c11897bdaace2de8866bb11023b1f476b267fa27,2017-08-11 17:25:00 UTC,auto,hold,764,760,700,NE,Omaha,15,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/NE/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/NE-day/2018-aug-day-NE.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,197f6257e48f1d14dffab296917ddf2c3090264b,2018-08-25 16:25:00 UTC,cool,hold,736,732,720,NE,Omaha,15,False,False,False,Gas
1,24557ca6d177aa3b5a71f9110b182dc2dc436af0,2018-08-02 13:25:00 UTC,cool,hold,765,775,775,NE,Omaha,10,False,False,True,Electric
2,6c71018df5d7d3ec67c41d961ba0a0bbdd3a9e19,2018-08-06 17:05:00 UTC,cool,hold,687,687,687,NE,Lincoln,25,False,False,True,Electric
3,6c71018df5d7d3ec67c41d961ba0a0bbdd3a9e19,2018-08-02 13:55:00 UTC,cool,hold,683,687,687,NE,Lincoln,25,False,False,True,Electric
4,50f777071abb7b1664d734531f57de6ce9007794,2018-08-13 16:05:00 UTC,cool,hold,725,719,719,NE,Omaha,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
190345,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-08-26 13:05:00 UTC,cool,hold,732,760,760,NE,Omaha,0,True,False,True,Electric
190346,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-08-26 18:45:00 UTC,cool,hold,763,760,760,NE,Omaha,0,True,False,True,Electric
190347,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-08-26 15:35:00 UTC,cool,hold,744,760,760,NE,Omaha,0,True,False,True,Electric
190348,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2018-08-26 16:45:00 UTC,cool,hold,748,760,760,NE,Omaha,0,True,False,True,Electric


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/NE/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/NE-day/2019-aug-day-NE.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,64468e2b4ce43bb003d5ebb297af917298d85ddf,2019-08-27 12:25:00 UTC,auto,hold,751,765,715,NE,Lincoln,39,False,False,False,Gas
1,e6c7a7449be199e76934eb1df4e97a561d4ad874,2019-08-02 11:10:00 UTC,cool,hold,765,830,790,NE,Nebraska City,85,False,False,False,Gas
3,bb17dd6b682b51daaffefc2585ef323aab7828e3,2019-08-04 18:05:00 UTC,cool,hold,725,721,721,NE,Omaha,50,False,False,True,Electric
4,e6c7a7449be199e76934eb1df4e97a561d4ad874,2019-08-31 14:15:00 UTC,cool,hold,753,830,790,NE,Nebraska City,85,False,False,False,Gas
5,7b9c42484b70b04b51eca1379ebd7dbca68ac432,2019-08-06 08:35:00 UTC,auto,hold,721,716,646,NE,Papillion,5,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
246991,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2019-08-06 08:35:00 UTC,cool,hold,764,760,760,NE,Omaha,0,True,False,True,Electric
246992,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2019-08-05 16:15:00 UTC,cool,hold,765,760,760,NE,Omaha,0,True,False,True,Electric
246993,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2019-08-06 11:40:00 UTC,cool,hold,754,760,760,NE,Omaha,0,True,False,True,Electric
246994,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2019-08-19 13:00:00 UTC,cool,hold,699,760,760,NE,Omaha,0,True,False,True,Electric


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/NE/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/NE-day/2020-aug-day-NE.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6e5cf30b657c43e09ec50c7717cf699cfcf35178,2020-08-16 15:30:00 UTC,cool,auto,731,728,728,NE,Elkhorn,0,False,False,False,Gas
1,e5020a56db9e45e942254216808f1607f90aac53,2020-08-08 19:55:00 UTC,auto,hold,691,683,613,NE,Bennington,15,False,False,False,Gas
2,2fdc63565f7136c15970a1012e3042ff5cef182f,2020-08-01 17:50:00 UTC,auto,auto,733,830,680,NE,Omaha,0,False,False,False,Gas
3,9f26605658880b3798b118e33b72a6c686383954,2020-08-22 15:30:00 UTC,auto,hold,690,707,657,NE,Papillion,20,False,False,False,Gas
4,2bd35be62bb091959037e6ffb751504af73c21da,2020-08-24 17:40:00 UTC,cool,hold,818,840,790,NE,Omaha,40,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238575,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2020-08-11 17:15:00 UTC,cool,hold,744,760,760,NE,Omaha,0,True,False,True,Electric
238576,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2020-08-11 18:35:00 UTC,cool,hold,752,760,760,NE,Omaha,0,True,False,True,Electric
238577,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2020-08-04 19:40:00 UTC,cool,hold,760,760,760,NE,Omaha,0,True,False,True,Electric
238578,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,2020-08-01 17:15:00 UTC,cool,auto,735,760,760,NE,Omaha,0,True,False,True,Electric


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/NE/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NE/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NE_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NE/aug/" + file)
    NE_aug = pd.concat([NE_aug, df])
    
NE_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0087ac1d7540ff5efa6ae20213627943a2b07fe8,aug,2017,auto,hold,Omaha,741.040000,740.000000,680.000000,35.0,False,False,False
1,03fb21c8de5320f353144ac52422f977cfb769f1,aug,2017,cool,hold,York,735.913706,734.137056,706.218274,10.0,True,False,True
2,05320d509a18b2d4022029409e7a267736e78e0b,aug,2017,cool,auto,Lincoln,719.923077,715.726496,700.000000,90.0,False,False,False
3,05320d509a18b2d4022029409e7a267736e78e0b,aug,2017,cool,hold,Lincoln,721.363946,726.908163,726.353741,90.0,False,False,False
4,0ad90268dcfa7e7aa1d92768a5d17e1841ceaf17,aug,2017,cool,hold,McCool Junction,756.729167,775.466667,775.245833,15.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
375,faa8c435b84d052676a86ba7d2d6a783d26e6b19,aug,2020,cool,auto,Omaha,709.611111,709.805556,709.430556,20.0,True,False,False
376,faa8c435b84d052676a86ba7d2d6a783d26e6b19,aug,2020,cool,hold,Omaha,708.000000,710.000000,710.000000,20.0,True,False,False
377,fe6909cb9f272a9b66f3c1b365252ff7168d5760,aug,2020,auto,hold,omaha,710.076923,682.384615,615.282051,0.0,False,False,False
378,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,aug,2020,cool,auto,Omaha,727.692643,732.138404,732.138404,0.0,True,False,True


In [160]:
NE_aug.to_csv("Scraper_Output/State_Month_Day/NE/NE_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/NE-day/2017-dec-day-NE.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1985472cdc3f8d3bf064a68f444737c05c724ea8,2017-12-16 18:05:00 UTC,auto,hold,728,785,705,NE,Omaha,10,False,False,False,Gas
1,1985472cdc3f8d3bf064a68f444737c05c724ea8,2017-12-30 17:50:00 UTC,auto,hold,722,775,725,NE,Omaha,10,False,False,False,Gas
2,40fc9b58f9fc26baac1b9d96ce344d71518f6e49,2017-12-17 13:20:00 UTC,heat,hold,738,788,720,NE,Bellevue,0,True,False,False,Gas
4,1985472cdc3f8d3bf064a68f444737c05c724ea8,2017-12-25 15:35:00 UTC,auto,hold,716,765,715,NE,Omaha,10,False,False,False,Gas
6,4f608a05003feda5fb362f497866f36d31acd324,2017-12-03 15:50:00 UTC,auto,hold,719,840,710,NE,Lincoln,115,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
104707,dc8b24b927b1c859f3d316b2c07c14a69a26181b,2017-12-20 12:30:00 UTC,auto,hold,671,760,670,NE,Bennington,5,False,False,False,Gas
104708,dc8b24b927b1c859f3d316b2c07c14a69a26181b,2017-12-19 12:35:00 UTC,auto,hold,689,760,680,NE,Bennington,5,False,False,False,Gas
104709,dc8b24b927b1c859f3d316b2c07c14a69a26181b,2017-12-20 18:15:00 UTC,auto,hold,714,760,700,NE,Bennington,5,False,False,False,Gas
104710,dc8b24b927b1c859f3d316b2c07c14a69a26181b,2017-12-19 16:55:00 UTC,auto,hold,704,760,700,NE,Bennington,5,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/NE/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/NE-day/2018-dec-day-NE.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,908220d248d3d0de476e4ced851377b2c920ba94,2018-12-16 13:20:00 UTC,heat,auto,711,791,680,NE,Papillion,0,True,False,False,Gas
1,7b9c42484b70b04b51eca1379ebd7dbca68ac432,2018-12-02 12:55:00 UTC,auto,hold,734,782,732,NE,Papillion,5,True,False,False,Gas
2,1cde6ae2ccdf910bb7fd2bd26dac007a23614191,2018-12-11 14:45:00 UTC,auto,hold,712,765,715,NE,Lincoln,10,False,False,False,Gas
3,249107bc009e59a0fd5f8dff7cf865f4bde925c3,2018-12-29 19:35:00 UTC,heat,hold,680,685,685,NE,Lincoln,0,False,False,False,Gas
4,02674609e156f6807ee78e1c9ac2851c16854804,2018-12-08 18:30:00 UTC,heat,hold,648,655,655,NE,Omaha,50,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199801,f15f623c199dc4f547c63da94db6260301a8cc81,2018-12-07 14:40:00 UTC,heat,auto,686,760,690,NE,Springfield,0,False,False,True,Electric
199802,f15f623c199dc4f547c63da94db6260301a8cc81,2018-12-16 12:55:00 UTC,heat,auto,688,760,690,NE,Springfield,0,False,False,True,Electric
199803,f15f623c199dc4f547c63da94db6260301a8cc81,2018-12-27 18:25:00 UTC,heat,auto,694,760,690,NE,Springfield,0,False,False,True,Electric
199804,f15f623c199dc4f547c63da94db6260301a8cc81,2018-12-16 14:55:00 UTC,heat,auto,688,760,690,NE,Springfield,0,False,False,True,Electric


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/NE/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/NE-day/2019-dec-day-NE.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,124aefa02526f136375928b0c92df70f356d26b9,2019-12-28 12:35:00 UTC,heat,hold,673,675,675,NE,Omaha,50,True,False,True,Electric
1,124aefa02526f136375928b0c92df70f356d26b9,2019-12-13 16:30:00 UTC,heat,hold,682,685,685,NE,Omaha,50,True,False,True,Electric
2,c039f7f15fbd5d98b80efba2631acb48bef7d066,2019-12-08 14:10:00 UTC,auto,hold,704,765,705,NE,Bennington,0,False,False,False,Gas
3,6a4be48209bd11b657a04f995d5afec116bbb937,2019-12-17 13:15:00 UTC,heat,hold,711,728,704,NE,Omaha,20,False,False,False,Gas
4,6a5f3cda47d1f1ebb68eba316cdb8b6e88c5b112,2019-12-08 14:30:00 UTC,auto,hold,713,756,716,NE,Hastings,40,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
217586,e50f2e1393d5251e0586c8b339bca1f26e3430e6,2019-12-11 14:25:00 UTC,auto,hold,695,760,700,NE,Lincoln,27,False,False,False,Gas
217587,e50f2e1393d5251e0586c8b339bca1f26e3430e6,2019-12-18 16:35:00 UTC,auto,hold,699,760,700,NE,Lincoln,27,False,False,False,Gas
217588,e50f2e1393d5251e0586c8b339bca1f26e3430e6,2019-12-09 19:10:00 UTC,auto,hold,677,760,680,NE,Lincoln,27,False,False,False,Gas
217589,e50f2e1393d5251e0586c8b339bca1f26e3430e6,2019-12-08 18:45:00 UTC,auto,hold,698,760,700,NE,Lincoln,27,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/NE/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/NE-day/2020-dec-day-NE.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6a4878bb967916a664ed4d25a33d2d8c92342a59,2020-12-10 15:15:00 UTC,heat,hold,702,706,706,NE,HAMPTON,10,False,False,True,Electric
1,2504df671d8f2abfb4e209289ba629963f2db33b,2020-12-20 17:35:00 UTC,heat,hold,743,743,743,NE,Lincoln,0,False,False,False,Gas
2,6a4878bb967916a664ed4d25a33d2d8c92342a59,2020-12-12 12:55:00 UTC,heat,hold,705,706,706,NE,HAMPTON,10,False,False,True,Electric
3,6a4878bb967916a664ed4d25a33d2d8c92342a59,2020-12-19 19:45:00 UTC,heat,hold,701,706,706,NE,HAMPTON,10,False,False,True,Electric
4,5182408f94362ec1153ac7567db8bdc5cb2f1aea,2020-12-16 13:35:00 UTC,heat,hold,677,732,680,NE,Kearney,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198113,ea8146a43e83f42479f4cc045bfee4fa30d0f6eb,2020-12-24 14:55:00 UTC,heat,hold,759,760,760,NE,Lemoyne,0,False,False,False,Gas
198114,ea8146a43e83f42479f4cc045bfee4fa30d0f6eb,2020-12-24 14:25:00 UTC,heat,hold,761,760,760,NE,Lemoyne,0,False,False,False,Gas
198115,ea8146a43e83f42479f4cc045bfee4fa30d0f6eb,2020-12-24 14:35:00 UTC,heat,hold,756,760,760,NE,Lemoyne,0,False,False,False,Gas
198116,ea8146a43e83f42479f4cc045bfee4fa30d0f6eb,2020-12-24 14:20:00 UTC,heat,hold,760,760,760,NE,Lemoyne,0,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/NE/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NE/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NE_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NE/dec/" + file)
    NE_dec = pd.concat([NE_dec, df])
    
NE_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,05320d509a18b2d4022029409e7a267736e78e0b,dec,2017,heat,auto,Lincoln,706.447368,728.157895,727.947368,90.0,False,False,False
1,05320d509a18b2d4022029409e7a267736e78e0b,dec,2017,heat,hold,Lincoln,722.527473,731.934066,731.846154,90.0,False,False,False
2,07ab14a1e62abe24dbdc3c8c819ab2ba7d93c8bd,dec,2017,heat,auto,Elkhorn,681.927798,696.025271,677.830325,20.0,False,False,False
3,07ab14a1e62abe24dbdc3c8c819ab2ba7d93c8bd,dec,2017,heat,hold,Elkhorn,687.792169,688.494578,688.494578,20.0,False,False,False
4,0888869bed67808acc6deba6f1d8c0ebc69e5d79,dec,2017,heat,hold,Omaha,695.849315,696.712329,696.712329,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
358,faf3b76e8b0193bcf0256358efb4e593b669a1c7,dec,2020,auxHeatOnly,hold,Hickman,704.368421,711.789474,709.105263,0.0,False,False,True
359,faf3b76e8b0193bcf0256358efb4e593b669a1c7,dec,2020,heat,auto,Hickman,696.130435,700.521739,699.869565,0.0,False,False,True
360,faf3b76e8b0193bcf0256358efb4e593b669a1c7,dec,2020,heat,hold,Hickman,698.282609,702.489130,702.146739,0.0,False,False,True
361,ff8d6679e9ce5c1228cf640311c0110b3aafb5bd,dec,2020,heat,auto,Omaha,718.434537,721.476298,721.476298,0.0,True,False,True


In [187]:
NE_dec.to_csv("Scraper_Output/State_Month_Day/NE/NE_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/NE/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NE_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/NE/" + file)
    NE_all = pd.concat([NE_all, df])
    
NE_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0087ac1d7540ff5efa6ae20213627943a2b07fe8,aug,2017,auto,hold,Omaha,741.040000,740.000000,680.000000,35.0,False,False,False
1,03fb21c8de5320f353144ac52422f977cfb769f1,aug,2017,cool,hold,York,735.913706,734.137056,706.218274,10.0,True,False,True
2,05320d509a18b2d4022029409e7a267736e78e0b,aug,2017,cool,auto,Lincoln,719.923077,715.726496,700.000000,90.0,False,False,False
3,05320d509a18b2d4022029409e7a267736e78e0b,aug,2017,cool,hold,Lincoln,721.363946,726.908163,726.353741,90.0,False,False,False
4,0ad90268dcfa7e7aa1d92768a5d17e1841ceaf17,aug,2017,cool,hold,McCool Junction,756.729167,775.466667,775.245833,15.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1616,faa85f28edb3a9a38ac6130d63b1c3f892466036,jun,2021,cool,hold,Omaha,727.871901,741.438017,712.714876,55.0,False,False,False
1617,faa8c435b84d052676a86ba7d2d6a783d26e6b19,jun,2021,auto,hold,Omaha,737.666667,700.000000,680.000000,20.0,True,False,False
1618,faa8c435b84d052676a86ba7d2d6a783d26e6b19,jun,2021,cool,hold,Omaha,705.419355,701.072581,701.072581,20.0,True,False,False
1619,faf3b76e8b0193bcf0256358efb4e593b669a1c7,jun,2021,auto,hold,Hickman,746.884615,716.000000,666.000000,0.0,False,False,True


In [190]:
NE_all.to_csv("Scraper_Output/State_Month_Day/NE_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mNEe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['NE']
Unique jan_2018: ['NE']
Unique jan_2019: ['NE']
Unique jan_2020: ['NE']
Unique jan_2021: ['NE']
Unique feb_2017: ['NE']
Unique feb_2018: ['NE']
Unique feb_2019: ['NE']
Unique feb_2020: ['NE']
Unique feb_2021: ['NE']
Unique jun_2017: ['NE']
Unique jun_2018: ['NE']
Unique jun_2019: ['NE']
Unique jun_2020: ['NE']
Unique jun_2021: ['NE']
Unique jul_2017: ['NE']
Unique jul_2018: ['NE']
Unique jul_2019: ['NE']
Unique jul_2020: ['NE']
Unique jul_2021: ['NE']
Unique aug_2017: ['NE']
Unique aug_2018: ['NE']
Unique aug_2019: ['NE']
Unique aug_2020: ['NE']
Unique dec_2017: ['NE']
Unique dec_2018: ['NE']
Unique dec_2019: ['NE']
Unique dec_2020: ['NE']
