# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/MO-day/2017-jan-day-MO.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,484bc203099c57d37cee198cebd175e5041c2c3c,2017-01-14 19:25:00 UTC,heat,auto,689,690,690,MO,Raymore,40,False,False,True,Electric
1,c60809be3e7743503c5f095898b71e36e7dbc7b7,2017-01-10 17:55:00 UTC,auxHeatOnly,hold,749,750,750,MO,Kansas City,15,False,False,True,Electric
2,7fdf0c7424fcfff60445e03a92696aafdf024072,2017-01-10 18:30:00 UTC,heat,auto,685,820,640,MO,Chesterfield,30,False,False,False,Gas
3,005b44ffe2812f645179dccec3443da1ec58a68d,2017-01-05 13:05:00 UTC,heat,hold,689,696,696,MO,Kirkwood,0,False,False,False,Gas
4,d57608c5200328a33ad0ca2273da7b37a2b747c8,2017-01-22 11:10:00 UTC,heat,hold,687,690,690,MO,Innsbrook,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
240750,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-01-26 18:55:00 UTC,heat,hold,694,700,700,MO,Kansas city,0,False,False,False,Gas
240751,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-01-19 19:40:00 UTC,heat,hold,677,680,680,MO,Kansas city,0,False,False,False,Gas
240752,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-01-03 15:30:00 UTC,heat,auto,674,680,680,MO,Kansas city,0,False,False,False,Gas
240753,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-01-19 17:35:00 UTC,heat,hold,681,680,680,MO,Kansas city,0,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0056cebd6cd72164f185e551f894f028d3d2e319,Jan,2017,auto,hold,Clarkson Valley,702.539823,770.000000,704.247788,45.0,True,False,True
005b44ffe2812f645179dccec3443da1ec58a68d,Jan,2017,heat,hold,Kirkwood,691.520879,694.665934,694.656044,0.0,False,False,False
008c4f598020b4161f9e0ee6c31c8f02b87b8a9c,Jan,2017,heat,hold,Springfield,686.693333,689.800000,688.440000,25.0,False,False,False
022ca80b9cb18dbbc868c84e90c30a7eb88e6e56,Jan,2017,heat,auto,Wildwood,692.142857,660.000000,660.000000,5.0,False,False,False
022ca80b9cb18dbbc868c84e90c30a7eb88e6e56,Jan,2017,heat,hold,Wildwood,695.768827,698.297723,698.234676,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fe795f29d57b91cbc0fd0b9396348ed6a692377e,Jan,2017,heat,hold,Troy,689.924298,688.500611,688.500611,5.0,False,False,False
ffab02b11c20f10ac2c4ca23edd3d249b627d274,Jan,2017,heat,auto,Wildwood,670.000000,680.000000,650.000000,16.0,False,False,False
ffab02b11c20f10ac2c4ca23edd3d249b627d274,Jan,2017,heat,hold,Wildwood,679.513158,684.710526,679.144737,16.0,False,False,False
ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,Jan,2017,heat,auto,Kansas city,677.244444,680.000000,680.000000,0.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/MO/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/MO-day/2018-jan-day-MO.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4200cfb02c1eee524375975dc93bc7919b40f077,2018-01-21 17:05:00 UTC,heat,hold,727,729,729,MO,Hazelwood,40,False,False,False,Gas
1,6366f01b4e1556199b4ed169da2830be6d56f3be,2018-01-05 16:05:00 UTC,heat,hold,695,705,705,MO,Kearney,20,False,False,False,Gas
2,1013248fcd20a0ebc7c8bfb5efd1ede35a3bdeff,2018-01-25 15:35:00 UTC,heat,auto,625,620,620,MO,Saint Charles,15,True,False,False,Gas
3,8305166890e5d70cd773ae01a703c561f0901f4d,2018-01-30 10:25:00 UTC,heat,hold,683,687,687,MO,Kirkwood,0,False,False,False,Gas
4,005b44ffe2812f645179dccec3443da1ec58a68d,2018-01-30 15:20:00 UTC,heat,hold,688,687,687,MO,Kirkwood,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
696368,cb768ce19732da6c62a924332554d1c531a62c7c,2018-01-29 12:50:00 UTC,heat,auto,721,765,715,MO,Wentzville,5,False,False,False,Gas
696369,915655e9c69458252ef0d9dc59e0a5b4b5720393,2018-01-21 13:25:00 UTC,auto,hold,680,765,685,MO,Fenton,40,False,False,False,Gas
696370,915655e9c69458252ef0d9dc59e0a5b4b5720393,2018-01-05 15:15:00 UTC,auto,auto,713,765,715,MO,Fenton,40,False,False,False,Gas
696371,0a82bb392c66c106d6d350c9d720798510c3cc42,2018-01-25 10:15:00 UTC,auto,hold,711,765,715,MO,Saint Peters,27,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/MO/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/MO-day/2019-jan-day-MO.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2cc75bee9696f34db7e6ce1fb6b71b2e94f6caf0,2019-01-23 17:30:00 UTC,auto,hold,701,755,715,MO,Kansas City,0,True,False,True,Electric
1,a46b408d067b2dccd565c42c0a3a496e403f5683,2019-01-13 13:05:00 UTC,heat,auto,602,675,602,MO,Augusta,120,True,False,False,Gas
4,4eb1fe283c524ae50433639cf7287995588a664a,2019-01-12 19:35:00 UTC,auto,hold,720,785,725,MO,St. Louis,17,False,False,False,Gas
5,60b33113a4d390369bbea7fe77cc043f4b03b895,2019-01-08 18:35:00 UTC,heat,auto,719,736,700,MO,Linn Creek,40,False,False,False,Gas
7,f1924ec3085bc348925e811fe7a3e9c8fc74d573,2019-01-18 16:40:00 UTC,heat,hold,705,705,705,MO,Arnold,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1028090,5558e869955fcccddfad1e9dc2da4b078155c542,2019-01-31 15:15:00 UTC,auto,hold,699,760,710,MO,St Louis,107,False,False,False,Gas
1028091,d3ce9433680f9567dbf7dd007913d083b581659f,2019-01-01 14:15:00 UTC,auto,hold,649,760,650,MO,Saint Louis,0,False,False,False,Gas
1028092,0d97c16b509114c0035518bbe88f8b9d0f159357,2019-01-26 17:00:00 UTC,auto,hold,693,760,700,MO,Webb City,20,False,False,False,Gas
1028093,94997115037b8062e7357e43828f218779e1f121,2019-01-19 19:10:00 UTC,heat,hold,752,760,760,MO,Kansas City,35,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/MO/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/MO-day/2020-jan-day-MO.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,84a628a15a2c7fdbb6d6e7524a546d856827e23b,2020-01-09 17:30:00 UTC,auto,hold,672,676,646,MO,Warrenton,10,False,False,False,Gas
1,0e0c18ad370aa3d5acf2278a33d87a59761a3972,2020-01-23 19:45:00 UTC,heat,hold,725,712,712,MO,Sugar Creek,0,False,False,False,Gas
2,718b10529607a536252617f2073549f1e1a73f00,2020-01-01 13:15:00 UTC,auto,auto,787,840,790,MO,Montgomery City,9,False,False,False,Gas
3,50a830ee9ee237599e4d0828a29bf1a04aab25e0,2020-01-25 16:10:00 UTC,heat,hold,724,725,725,MO,Dardenne Prairie,10,False,False,False,Gas
4,12e503eab18c9a4e58b8bd9df873f724a43b712d,2020-01-14 15:50:00 UTC,auxHeatOnly,auto,686,742,700,MO,Valley Park,26,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1152912,c2f0bff662114306ede9bf1530d6689d0774504c,2020-01-26 10:40:00 UTC,heat,auto,762,760,760,MO,Kansas City,10,True,False,False,Gas
1152913,a075e8ef25c98153e35432e06111578f4404ec6c,2020-01-19 15:40:00 UTC,auto,hold,685,760,690,MO,Raytown,55,True,False,False,Gas
1152914,61bbe75287838f184ac77adbc7e4454b65d7edf1,2020-01-01 15:40:00 UTC,auto,hold,718,760,710,MO,Defiance,30,True,False,True,Electric
1152915,93bdcdf6d2581c39710fcc106be7b2b85c6d72c5,2020-01-19 15:35:00 UTC,auto,auto,664,760,670,MO,Fenton,58,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/MO/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/MO-day/2021-jan-day-MO.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c8490b30fac5c86ef930bc17242b03c6331d3cec,2021-01-07 17:05:00 UTC,auto,hold,690,717,687,MO,St. Louis,40,True,False,False,Gas
2,1c640559d11205bfa1a1e54784a4bcba181414cc,2021-01-28 17:05:00 UTC,heat,hold,663,665,665,MO,St. Charles,0,False,False,False,Gas
3,ce08afd7223a5cea5ab62820bda19b6fa68e8c00,2021-01-25 17:20:00 UTC,heat,hold,706,705,705,MO,Jefferson City,5,False,False,False,Gas
4,ab176fa2faeb5b7ac02f20f5601dbf9f4da8d7b9,2021-01-14 14:10:00 UTC,heat,hold,697,681,660,MO,O Fallon,0,False,False,False,Gas
5,5d8a01a71ac7fdfd73c887039265770d86a1f5e0,2021-01-31 19:30:00 UTC,heat,hold,715,716,716,MO,Poplar Bluff,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
722571,eff2d7b8380db66c84b477e78032b6b8e0fe4ded,2021-01-23 14:35:00 UTC,auto,hold,654,760,660,MO,O'Fallon,25,False,False,False,Gas
722572,fe795f29d57b91cbc0fd0b9396348ed6a692377e,2021-01-02 15:30:00 UTC,auto,hold,716,760,690,MO,Troy,5,False,False,False,Gas
722573,61bbe75287838f184ac77adbc7e4454b65d7edf1,2021-01-14 14:10:00 UTC,auto,hold,718,760,720,MO,Defiance,30,True,False,True,Electric
722574,ea2ab5101661c4f18c5ecb74ddcf825e28bf6e63,2021-01-31 14:30:00 UTC,auto,hold,699,760,700,MO,Moberly,40,False,False,True,Electric


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/MO/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MO/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MO_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MO/jan/" + file)
    MO_jan = pd.concat([MO_jan, df])
    
MO_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0056cebd6cd72164f185e551f894f028d3d2e319,Jan,2017,auto,hold,Clarkson Valley,702.539823,770.000000,704.247788,45.0,True,False,True
1,005b44ffe2812f645179dccec3443da1ec58a68d,Jan,2017,heat,hold,Kirkwood,691.520879,694.665934,694.656044,0.0,False,False,False
2,008c4f598020b4161f9e0ee6c31c8f02b87b8a9c,Jan,2017,heat,hold,Springfield,686.693333,689.800000,688.440000,25.0,False,False,False
3,022ca80b9cb18dbbc868c84e90c30a7eb88e6e56,Jan,2017,heat,auto,Wildwood,692.142857,660.000000,660.000000,5.0,False,False,False
4,022ca80b9cb18dbbc868c84e90c30a7eb88e6e56,Jan,2017,heat,hold,Wildwood,695.768827,698.297723,698.234676,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
846,ff66188bc7006896b45d5f1734510d8818936bb8,Jan,2021,heat,hold,Independence,672.987879,682.121212,678.763636,0.0,False,False,False
847,ff75acaf48abf1899c2069e4f5ab8e7836ad9242,Jan,2021,heat,hold,ballwin,671.168809,671.542416,671.542416,10.0,False,False,False
848,ffd2af4ae17ba4fc7304cbad80f57105d2451543,Jan,2021,heat,hold,Columbia,696.644676,699.363958,699.363958,5.0,False,False,False
849,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,Jan,2021,heat,hold,Kansas city,687.974138,690.000000,690.000000,0.0,False,False,False


In [34]:
MO_jan.to_csv("Scraper_Output/State_Month_Day/MO/MO_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/MO-day/2017-feb-day-MO.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fe795f29d57b91cbc0fd0b9396348ed6a692377e,2017-02-19 11:50:00 UTC,heat,hold,685,690,690,MO,Troy,5,False,False,False,Gas
1,1deceb626091ecee6d44b2eb41d87eb913c5d0be,2017-02-19 15:05:00 UTC,auto,auto,690,746,652,MO,Kansas city,100,False,False,False,Gas
2,fa7fa0ede3fe5079773bf87979f4a1167d1f45c6,2017-02-06 15:10:00 UTC,auto,hold,710,740,720,MO,St. Louis,30,False,False,False,Gas
3,e6da641910f01596e32323965b88028dec5e06e4,2017-02-19 16:35:00 UTC,heat,auto,664,660,660,MO,Arnold,0,False,False,False,Gas
4,5bfac229398bc031538463dc5768275f2ff3118d,2017-02-16 18:55:00 UTC,heat,hold,743,715,715,MO,Chesterfield,55,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
208803,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-02-08 19:55:00 UTC,heat,hold,675,680,680,MO,Kansas city,0,False,False,False,Gas
208804,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-02-22 19:25:00 UTC,cool,hold,668,690,690,MO,Kansas city,0,False,False,False,Gas
208805,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-02-26 18:15:00 UTC,heat,hold,685,690,690,MO,Kansas city,0,False,False,False,Gas
208806,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-02-27 13:20:00 UTC,heat,hold,663,670,670,MO,Kansas city,0,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/MO/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/MO-day/2018-feb-day-MO.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,6366f01b4e1556199b4ed169da2830be6d56f3be,2018-02-13 19:25:00 UTC,heat,hold,691,695,695,MO,Kearney,20,False,False,False,Gas
2,67520692dc5bc29b9de9e93700ecb7d36ceb0ce1,2018-02-24 16:00:00 UTC,heat,hold,680,678,678,MO,O Fallon,0,False,False,False,Gas
3,2f46bdf42773e224b063d89b9cdcf04f608feb7d,2018-02-19 14:35:00 UTC,heat,hold,663,665,665,MO,Wentzville,0,False,False,False,Gas
4,43eb51b2e34d40c644edf6ed9cf7af2f0e9547d4,2018-02-07 17:20:00 UTC,heat,hold,691,697,697,MO,Fulton,9,True,False,False,Gas
5,c71ee76e237561147e8d0f05da3486752baa811f,2018-02-19 15:45:00 UTC,heat,hold,727,715,715,MO,Fulton,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
645495,155675cd796141a45177caabf01766c277efbaa6,2018-02-08 16:30:00 UTC,auto,hold,707,760,710,MO,Springfield,5,False,False,True,Electric
645496,4b9d3fb7941febf923bcd8d7909e2a94f0893340,2018-02-06 13:35:00 UTC,heat,auto,675,760,680,MO,Weatherby Lake,55,False,False,False,Gas
645497,71f0d3d13ea4495fd98f531ee83b85a38fabc9a6,2018-02-19 13:15:00 UTC,heat,auto,632,760,640,MO,Battlefield,37,False,False,False,Gas
645498,63a6cbd34e5994a70604a4c080960a76ab175b51,2018-02-04 17:00:00 UTC,auto,auto,720,760,720,MO,Wright City,0,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/MO/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/MO-day/2019-feb-day-MO.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,4521f0d63e481dd40f31264f02961e02804477e7,2019-02-27 14:10:00 UTC,auto,hold,710,766,716,MO,Fenton,30,False,False,False,Gas
2,06f37d1143d483a0c01db1c5aded53cebdee0a4c,2019-02-02 07:20:00 UTC,auto,hold,670,725,675,MO,KANSAS CITY,0,False,False,False,Gas
3,f1924ec3085bc348925e811fe7a3e9c8fc74d573,2019-02-27 17:40:00 UTC,heat,hold,711,715,715,MO,Arnold,0,False,False,False,Gas
4,4521f0d63e481dd40f31264f02961e02804477e7,2019-02-01 15:30:00 UTC,auto,hold,714,766,716,MO,Fenton,30,False,False,False,Gas
5,9b7a00b2b4d03feae75649d19783b8f9f74138ea,2019-02-21 14:05:00 UTC,heat,hold,654,655,655,MO,Nixa,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
692625,94997115037b8062e7357e43828f218779e1f121,2019-02-03 18:30:00 UTC,heat,hold,761,760,760,MO,Kansas City,35,False,False,False,Gas
692626,94997115037b8062e7357e43828f218779e1f121,2019-02-01 12:55:00 UTC,heat,hold,753,760,760,MO,Kansas City,35,False,False,False,Gas
692627,94997115037b8062e7357e43828f218779e1f121,2019-02-01 18:40:00 UTC,heat,hold,758,760,760,MO,Kansas City,35,False,False,False,Gas
692628,94997115037b8062e7357e43828f218779e1f121,2019-02-21 16:05:00 UTC,heat,hold,759,760,760,MO,Kansas City,35,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/MO/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/MO-day/2020-feb-day-MO.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5551dc1a8f181a5f18e42b58165040f93cbab48a,2020-02-22 13:15:00 UTC,heat,hold,621,701,620,MO,Higginsville,50,False,False,False,Gas
1,74eca1000b9a3aacb0a04e143803064ebe9776b2,2020-02-04 17:30:00 UTC,heat,hold,735,735,735,MO,Ozark,16,False,False,False,Gas
2,b724e5eae2cfd24e17e63dabc6b2f859d61eb21d,2020-02-07 15:35:00 UTC,heat,auto,779,723,780,MO,Saint Louis,60,False,False,False,Gas
3,761522e10ab2c29e6d28902451295dce7f8f1ab2,2020-02-01 17:50:00 UTC,heat,auto,606,830,620,MO,Osage Beach,20,False,False,False,Gas
4,2dbd070371892b87806790a4cfc4b3ea2b5d2cb9,2020-02-01 19:30:00 UTC,auto,hold,656,840,650,MO,Wentzville,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1037285,c8cea4e45797277a906096a81342d065bc2ec1e3,2020-02-16 15:45:00 UTC,heat,hold,761,760,760,MO,Springfield,120,False,False,False,Gas
1037286,37a63d9a5bde59ef7a305fa83522d5e3f64c09c7,2020-02-08 14:10:00 UTC,auto,hold,704,760,700,MO,Pacific,30,True,False,False,Gas
1037287,a075e8ef25c98153e35432e06111578f4404ec6c,2020-02-23 15:40:00 UTC,auto,hold,680,760,680,MO,Raytown,55,True,False,False,Gas
1037288,c2f0bff662114306ede9bf1530d6689d0774504c,2020-02-25 14:45:00 UTC,heat,hold,761,760,760,MO,Kansas City,10,True,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/MO/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/MO-day/2021-feb-day-MO.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,634e12c36fcb8f00153777417ce71eb8504f6ca6,2021-02-09 17:00:00 UTC,heat,hold,690,689,689,MO,Wentzville,0,False,False,False,Gas
1,b347bb82049b13dfeb71fbb54cff7bca328b45f0,2021-02-07 13:25:00 UTC,heat,hold,701,739,682,MO,Manchester,0,False,False,False,Gas
2,0d985082afefda3355038c6de8261b1d84ef73de,2021-02-27 14:00:00 UTC,auto,hold,729,791,731,MO,Kansas City,0,False,False,False,Gas
3,8768b412890d36db2600b708822ee1ce7a19cd9e,2021-02-03 13:15:00 UTC,auto,hold,727,845,725,MO,Lamar,0,False,False,False,Gas
4,cc1d32cce7affed2c2818d56f1cde993ef018445,2021-02-27 17:30:00 UTC,auto,hold,730,775,705,MO,Leslie,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
630662,0366cd7a2609ec463aafc754bb3a11b2d20f72a6,2021-02-10 09:10:00 UTC,auto,hold,712,760,710,MO,Poplar Bluff,0,True,False,False,Gas
630663,eb59edd6a23a1372f69f19bf31bb7869b626ee6e,2021-02-12 16:40:00 UTC,auto,hold,708,760,710,MO,Clayton,9,True,False,True,Electric
630664,eb59edd6a23a1372f69f19bf31bb7869b626ee6e,2021-02-17 17:30:00 UTC,auto,hold,715,760,710,MO,Clayton,9,True,False,True,Electric
630665,f3da65babe6fafc5e328bc41bacf4008cbbeba3e,2021-02-05 14:25:00 UTC,auto,hold,708,760,710,MO,Ellisville,0,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/MO/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MO/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MO_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MO/feb/" + file)
    MO_feb = pd.concat([MO_feb, df])
    
MO_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,008c4f598020b4161f9e0ee6c31c8f02b87b8a9c,feb,2017,auto,hold,Springfield,687.259615,725.317308,674.307692,25.0,False,False,False
1,008c4f598020b4161f9e0ee6c31c8f02b87b8a9c,feb,2017,heat,auto,Springfield,660.478261,673.652174,673.391304,25.0,False,False,False
2,008c4f598020b4161f9e0ee6c31c8f02b87b8a9c,feb,2017,heat,hold,Springfield,701.000000,677.000000,659.444444,25.0,False,False,False
3,032256f0c3f84afabb439e80af499e07c3b0890f,feb,2017,heat,auto,-kansas city,675.714286,687.714286,687.714286,25.0,False,False,False
4,032256f0c3f84afabb439e80af499e07c3b0890f,feb,2017,heat,hold,-kansas city,690.638095,666.790476,656.123810,25.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
906,ff75acaf48abf1899c2069e4f5ab8e7836ad9242,feb,2021,heat,hold,ballwin,674.066409,674.452510,674.452510,10.0,False,False,False
907,ffab02b11c20f10ac2c4ca23edd3d249b627d274,feb,2021,heat,hold,Wildwood,704.238095,736.000000,710.000000,16.0,False,False,False
908,ffd2af4ae17ba4fc7304cbad80f57105d2451543,feb,2021,heat,hold,Columbia,683.214017,682.815385,682.815385,5.0,False,False,False
909,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,feb,2021,heat,hold,Kansas city,697.333333,700.030612,700.030612,0.0,False,False,False


In [67]:
MO_feb.to_csv("Scraper_Output/State_Month_Day/MO/MO_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/MO-day/2017-jun-day-MO.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,50a830ee9ee237599e4d0828a29bf1a04aab25e0,2017-06-15 16:50:00 UTC,cool,hold,745,690,690,MO,Dardenne Prairie,10,False,False,False,Gas
1,432ab055d66c9bea554247d46afec361c0f3e79a,2017-06-02 16:45:00 UTC,cool,hold,730,710,710,MO,Nixa,35,False,False,False,Gas
2,484bc203099c57d37cee198cebd175e5041c2c3c,2017-06-18 17:40:00 UTC,cool,auto,697,700,660,MO,Raymore,40,False,False,True,Electric
3,484bc203099c57d37cee198cebd175e5041c2c3c,2017-06-11 18:25:00 UTC,cool,hold,713,750,750,MO,Raymore,40,False,False,True,Electric
4,8fc5b7b257fa8ae890ffd395a05b1b75fe26c5ef,2017-06-22 17:50:00 UTC,cool,hold,743,740,740,MO,Centralia,5,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
413354,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-06-15 18:40:00 UTC,cool,hold,686,680,680,MO,Kansas city,0,False,False,False,Gas
413355,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-06-22 15:35:00 UTC,cool,hold,683,680,680,MO,Kansas city,0,False,False,False,Gas
413356,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-06-12 17:10:00 UTC,cool,hold,683,680,680,MO,Kansas city,0,False,False,False,Gas
413357,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-06-20 15:30:00 UTC,cool,hold,683,680,680,MO,Kansas city,0,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/MO/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/MO-day/2018-jun-day-MO.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,158c57bee90e5c46331cd9959b0c43287263230e,2018-06-22 10:35:00 UTC,cool,auto,685,690,662,MO,Wildwood,35,False,False,True,Electric
1,f1f51512c382092f1d73f84cb462d8b0b34c9e5b,2018-06-06 13:55:00 UTC,auto,auto,756,750,675,MO,St. Louis,60,True,False,False,Gas
2,cc5297a29df024013c7e347cef303c71d77134e0,2018-06-07 17:05:00 UTC,cool,hold,776,755,755,MO,St. Louis,107,False,False,True,Electric
3,158c57bee90e5c46331cd9959b0c43287263230e,2018-06-06 19:15:00 UTC,cool,auto,717,690,662,MO,Wildwood,35,False,False,True,Electric
4,1e5d65747e7f91a9fe04e210038f07b4b687cda5,2018-06-16 15:20:00 UTC,cool,hold,751,765,765,MO,O Fallon,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
913302,ab332b55f431448a164f539c26cb16723add0fbc,2018-06-25 09:15:00 UTC,cool,hold,761,760,760,MO,St Louis,45,False,False,False,Gas
913303,880bc7f5bbdc607d0d6306da4cc25bff80fb4e4e,2018-06-03 13:20:00 UTC,cool,auto,701,760,760,MO,Columbia,6,True,False,True,Electric
913304,ab332b55f431448a164f539c26cb16723add0fbc,2018-06-29 17:25:00 UTC,cool,hold,764,760,760,MO,St Louis,45,False,False,False,Gas
913305,85ca5335bc3c3ad772def296758968c4aacee52f,2018-06-23 15:30:00 UTC,cool,hold,757,760,760,MO,St. Louis,115,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/MO/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/MO-day/2019-jun-day-MO.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,430ca2c1423fc466bc63a4e9bfdebbde58aad1b7,2019-06-01 17:20:00 UTC,auto,hold,752,728,678,MO,Saint Charles,10,False,False,False,Gas
1,383ddbce66c22a2885b4759d11763b7ec820859e,2019-06-16 14:00:00 UTC,cool,auto,795,800,759,MO,Blue Springs,25,False,False,False,Gas
2,602e1aecb042915eaa8f38151f3a882e906514f6,2019-06-17 18:00:00 UTC,cool,auto,738,800,800,MO,Columbia,0,True,False,False,Gas
3,c890fdb4ef69f5fb13ceb23b23ff51a1a29af897,2019-06-18 14:40:00 UTC,cool,hold,733,751,751,MO,Jefferson City,0,True,False,True,Electric
4,dd86ee9a15944c7e75bc24581fa82f989473c165,2019-06-21 19:10:00 UTC,cool,hold,755,751,751,MO,O Fallon,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1209461,2dd2f52c5d516d7274953f6e048303085ffd6b3b,2019-06-29 14:30:00 UTC,cool,hold,733,760,760,MO,st james,50,False,False,True,Electric
1209462,1f66d7737cc67f5c531bd7b66ff99c83f3e8114b,2019-06-30 18:40:00 UTC,cool,auto,763,760,760,MO,Wentzville,0,False,False,False,Gas
1209463,b63a58e0ecc9eac59f0a0828bb45d64feb659c5b,2019-06-22 15:00:00 UTC,cool,hold,747,782,760,MO,St. Louis,60,False,False,False,Gas
1209464,266ab66ef3d8fbe798df7e8d5a65f22fcdeae0c8,2019-06-18 18:55:00 UTC,cool,auto,764,760,760,MO,Columbia,20,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/MO/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/MO-day/2020-jun-day-MO.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,88dea206563a045450cbfb5c3f41190bfe29185d,2020-06-17 18:30:00 UTC,cool,hold,742,738,738,MO,Saint Charles,20,True,False,False,Gas
2,fc8f32337ae9081c4e5e96073f95f9253b88bd25,2020-06-26 15:50:00 UTC,cool,hold,747,745,745,MO,Green Ridge,10,False,False,True,Electric
3,1a5212a58c52175312785d9892f8464adf0127fe,2020-06-08 19:45:00 UTC,auto,hold,698,695,645,MO,Liberty,10,False,False,True,Electric
4,8768b412890d36db2600b708822ee1ce7a19cd9e,2020-06-21 12:40:00 UTC,cool,hold,666,665,665,MO,Lamar,0,False,False,False,Gas
5,50ed4c44a0c540495ac1ba960b9f1c589ae77fa6,2020-06-09 17:20:00 UTC,cool,hold,805,840,782,MO,Saint Peters,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1254908,5551dc1a8f181a5f18e42b58165040f93cbab48a,2020-06-25 13:25:00 UTC,cool,auto,758,760,760,MO,Higginsville,50,False,False,False,Gas
1254909,cf05dfcc808a6f93e1453000a7868f5d655d7870,2020-06-25 13:00:00 UTC,cool,auto,740,760,760,MO,Saint Louis,69,False,False,False,Gas
1254910,1e5d65747e7f91a9fe04e210038f07b4b687cda5,2020-06-06 13:25:00 UTC,cool,auto,740,760,760,MO,O Fallon,0,False,False,False,Gas
1254911,30d369c3529b511bb706e5b2ad31d3f3dadc10e4,2020-06-13 14:50:00 UTC,cool,hold,760,760,760,MO,St Charles,25,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/MO/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/MO-day/2021-jun-day-MO.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9db09b9fe1b404484f9023122930605fc00bc388,2021-06-15 18:50:00 UTC,cool,hold,733,730,719,MO,Platte City,50,False,True,False,Gas
1,ed4b52c970d29480d0b6c721e32900f7b68806f9,2021-06-24 16:40:00 UTC,cool,hold,736,739,739,MO,Saint Louis,40,False,False,False,Gas
3,08f90a99ec9083a76581941f929af410e921d8ff,2021-06-15 18:40:00 UTC,cool,hold,737,765,765,MO,Clayton,0,True,False,False,Gas
4,ea9bd5b70580abc6f824b54a80ee1ea137ba6d50,2021-06-06 12:30:00 UTC,auto,hold,710,715,635,MO,Joplin,0,True,False,True,Electric
5,31e61b93b8dd61805639c9c5415826c85f972f4b,2021-06-09 11:50:00 UTC,auto,hold,706,699,649,MO,Kearney,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
743554,6869171e41b74d9403f174c9cc4cd472bed91689,2021-06-04 17:40:00 UTC,cool,hold,754,760,760,MO,Saint Louis,90,False,False,False,Gas
743555,35689ce21f84292485ea9043c98d1fb778a10c16,2021-06-26 12:35:00 UTC,cool,hold,715,760,760,MO,van buren,0,True,False,True,Electric
743556,58f17654ec369fe26801df62efebb53ea369d0f0,2021-06-25 16:10:00 UTC,cool,hold,757,760,760,MO,Columbia,6,True,False,True,Electric
743557,29d8bfa3d0d421eaa3bf893642ff9a19fd0be137,2021-06-27 15:55:00 UTC,cool,hold,768,760,760,MO,MEXICO,0,False,False,True,Electric


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/MO/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MO/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MO_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MO/jun/" + file)
    MO_jun = pd.concat([MO_jun, df])
    
MO_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0039c46facfa454852fda813f2c88ed0bbfab21d,jun,2017,auto,hold,Blue Springs,745.333333,759.375000,680.000000,20.0,False,False,False
1,0056cebd6cd72164f185e551f894f028d3d2e319,jun,2017,cool,hold,Clarkson Valley,748.733333,740.000000,740.000000,45.0,True,False,True
2,005b44ffe2812f645179dccec3443da1ec58a68d,jun,2017,cool,hold,Kirkwood,731.318915,730.000000,730.000000,0.0,False,False,False
3,008c4f598020b4161f9e0ee6c31c8f02b87b8a9c,jun,2017,cool,hold,Springfield,734.166667,730.166667,729.166667,25.0,False,False,False
4,01077fe00e6bddb7480aa62c3efecbfc1b5de9c4,jun,2017,auto,auto,Lee's Summit,758.000000,740.333333,690.000000,17.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
942,ffab02b11c20f10ac2c4ca23edd3d249b627d274,jun,2021,cool,hold,Wildwood,751.306828,749.646500,749.404494,16.0,False,False,False
943,ffd2af4ae17ba4fc7304cbad80f57105d2451543,jun,2021,cool,hold,Columbia,699.434567,715.335773,715.335773,5.0,False,False,False
944,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,jun,2021,cool,hold,Kansas city,744.761179,783.081456,719.473137,0.0,False,False,False
945,ffede669ccc5cc0e508b9ce744c4ae67cef9181b,jun,2021,cool,hold,Clayton,744.196850,722.535433,722.535433,0.0,True,False,False


In [100]:
MO_jun.to_csv("Scraper_Output/State_Month_Day/MO/MO_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/MO-day/2017-jul-day-MO.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6d36dc244e592b1a07bd7549582f0c5e917bf3dd,2017-07-30 14:25:00 UTC,cool,auto,723,840,730,MO,Creve Coeur,50,False,False,False,Gas
1,74eca1000b9a3aacb0a04e143803064ebe9776b2,2017-07-12 18:10:00 UTC,cool,hold,743,740,740,MO,Ozark,16,False,False,False,Gas
2,1f238d54fcb1a92d890df825fc2e0d687ce02924,2017-07-02 18:35:00 UTC,cool,hold,698,710,710,MO,Ofallon,15,False,False,False,Gas
3,219b156dcea0d3cf08bea0c2b199149494d267b9,2017-07-22 16:55:00 UTC,cool,hold,758,750,750,MO,Brentwood,0,False,False,False,Gas
5,7d5699ca8cc0ae172e2facba1e9d97be37e46849,2017-07-22 17:00:00 UTC,cool,auto,759,750,700,MO,Kansas City,27,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
493888,f60f0889045a3200ce1594a5db78d776b38cbdba,2017-07-14 17:00:00 UTC,auto,auto,745,740,650,MO,Saint Clair,10,False,False,False,Gas
493889,f60f0889045a3200ce1594a5db78d776b38cbdba,2017-07-19 19:40:00 UTC,auto,auto,741,740,630,MO,Saint Clair,10,False,False,False,Gas
493890,f60f0889045a3200ce1594a5db78d776b38cbdba,2017-07-30 18:25:00 UTC,auto,auto,745,740,630,MO,Saint Clair,10,False,False,False,Gas
493891,f60f0889045a3200ce1594a5db78d776b38cbdba,2017-07-04 11:55:00 UTC,auto,auto,709,750,650,MO,Saint Clair,10,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/MO/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/MO-day/2018-jul-day-MO.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c71ee76e237561147e8d0f05da3486752baa811f,2018-07-09 08:50:00 UTC,cool,hold,717,716,716,MO,Fulton,10,False,False,False,Gas
1,1e5d65747e7f91a9fe04e210038f07b4b687cda5,2018-07-08 12:20:00 UTC,cool,hold,698,765,765,MO,O Fallon,0,False,False,False,Gas
2,158c57bee90e5c46331cd9959b0c43287263230e,2018-07-24 14:05:00 UTC,cool,auto,719,720,662,MO,Wildwood,35,False,False,True,Electric
3,25174d846d5c5349cb72fe3113a17e98f655b964,2018-07-01 14:05:00 UTC,auto,hold,760,765,715,MO,SPRINGFIELD,10,False,False,False,Gas
6,005b44ffe2812f645179dccec3443da1ec58a68d,2018-07-23 15:20:00 UTC,cool,hold,735,731,731,MO,Kirkwood,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1007389,e764b0c4f57aef372b3c647d11a4341df515572b,2018-07-25 12:25:00 UTC,cool,hold,753,760,760,MO,Saint Charles,7,False,False,False,Gas
1007390,5770fb11edbbfb304739a8d2c377131ba55e3e68,2018-07-09 17:15:00 UTC,cool,hold,762,760,760,MO,Saint Louis,0,False,False,False,Gas
1007391,85ca5335bc3c3ad772def296758968c4aacee52f,2018-07-10 10:40:00 UTC,cool,hold,759,760,760,MO,St. Louis,115,False,False,False,Gas
1007392,d32aeab160f0c821d4860f92b8e6495a047dcce2,2018-07-22 19:45:00 UTC,cool,hold,766,760,760,MO,Kansas City,5,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/MO/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/MO-day/2019-jul-day-MO.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9d86e480c77a541159c144f95c5dcef6918d72e3,2019-07-05 18:05:00 UTC,cool,auto,723,716,716,MO,St. Louis,89,False,False,False,Gas
1,0624a9c57e51d50857e246ae27e08c4eecb03592,2019-07-03 14:40:00 UTC,cool,hold,703,698,698,MO,Kansas City,89,False,False,False,Gas
2,dd86ee9a15944c7e75bc24581fa82f989473c165,2019-07-31 15:20:00 UTC,cool,hold,718,717,717,MO,O Fallon,10,False,False,False,Gas
3,12e503eab18c9a4e58b8bd9df873f724a43b712d,2019-07-13 16:35:00 UTC,cool,hold,729,722,722,MO,Valley Park,26,False,False,True,Electric
4,eb01b1d294a4231cda7217edb52ca89b8b61f219,2019-07-24 19:25:00 UTC,cool,auto,753,800,800,MO,Columbia,19,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1286111,b63a58e0ecc9eac59f0a0828bb45d64feb659c5b,2019-07-25 13:20:00 UTC,cool,hold,761,762,760,MO,St. Louis,60,False,False,False,Gas
1286112,71d5d4ba1bced39b38f0fb3a8d32323fb49784f4,2019-07-21 12:45:00 UTC,cool,auto,736,740,760,MO,Springfield,5,True,False,False,Gas
1286113,86d922748515df51417a0dcd6ae4db9a46eaeffa,2019-07-20 15:05:00 UTC,cool,hold,754,760,760,MO,Joplin,0,False,False,True,Electric
1286114,d79022b2e92c8e8e93c9f76d63bcab3c3e13f741,2019-07-16 19:35:00 UTC,cool,hold,762,760,760,MO,Springfield,30,True,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/MO/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/MO-day/2020-jul-day-MO.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,409a144a1e8eba0cd58b8982779d7b2982edb7ff,2020-07-18 14:05:00 UTC,cool,hold,740,749,749,MO,Oakland,70,True,False,False,Gas
1,f4ca22d228005d9c20c954b2d273b2ad496dff10,2020-07-25 15:50:00 UTC,cool,hold,759,759,759,MO,Lee's Summit,29,False,False,True,Electric
2,8ad204cc0ba08f160ef5e43b4ea46035b53a0f5f,2020-07-10 08:35:00 UTC,cool,hold,708,705,705,MO,Springfield,20,False,False,False,Gas
3,d12c6291b7a5d6019133e414e68ac5ab11cbe5d0,2020-07-25 18:40:00 UTC,auto,auto,749,741,691,MO,Dexter,0,False,False,False,Gas
4,12e503eab18c9a4e58b8bd9df873f724a43b712d,2020-07-23 11:40:00 UTC,auto,hold,725,726,676,MO,Valley Park,26,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1266997,2b9cdacb37fdba36c20f980be301c44138de4fa2,2020-07-02 11:35:00 UTC,cool,hold,760,760,760,MO,Sunrise Beach,0,False,True,True,Electric
1266998,86d922748515df51417a0dcd6ae4db9a46eaeffa,2020-07-04 16:55:00 UTC,cool,auto,782,770,760,MO,Joplin,0,False,False,True,Electric
1266999,89adaaa04c21f0d58670e72b592cf52bc97d47ea,2020-07-19 15:50:00 UTC,cool,hold,763,760,760,MO,Crestwood,60,False,False,False,Gas
1267000,20017e84cc9ed560c575126e6800e6de4345b38c,2020-07-15 11:50:00 UTC,cool,hold,769,760,760,MO,van buren,0,True,False,True,Electric


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/MO/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/MO-day/2021-jul-day-MO.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ed4b52c970d29480d0b6c721e32900f7b68806f9,2021-07-07 17:20:00 UTC,cool,hold,740,739,739,MO,Saint Louis,40,False,False,False,Gas
1,150df04ae56a04fe38ca3c1f8e6b001e6c544f01,2021-07-06 17:30:00 UTC,cool,hold,742,744,744,MO,St. Louis,70,False,False,False,Gas
2,4eb1fe283c524ae50433639cf7287995588a664a,2021-07-26 14:35:00 UTC,auto,hold,737,735,685,MO,St. Louis,17,False,False,False,Gas
3,ed4b52c970d29480d0b6c721e32900f7b68806f9,2021-07-27 12:55:00 UTC,cool,hold,736,739,739,MO,Saint Louis,40,False,False,False,Gas
4,acc49750825d622cb375dc646e5f91a099399240,2021-07-07 12:35:00 UTC,auto,hold,716,715,665,MO,Riverside,8,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
731341,757271553fe4e4a3d8e76426f46925913049ebe5,2021-07-04 16:40:00 UTC,cool,hold,740,760,760,MO,Nixa,35,False,False,False,Gas
731342,a35b2afa72b05b18880ba277e361eb9b37f4e3b4,2021-07-27 16:20:00 UTC,cool,hold,755,760,760,MO,van buren,0,True,False,True,Electric
731343,86d922748515df51417a0dcd6ae4db9a46eaeffa,2021-07-08 18:45:00 UTC,cool,hold,771,760,760,MO,Joplin,0,False,False,True,Electric
731344,3a7d4336557d837c07ec362d6006fd2b969ceff2,2021-07-21 18:20:00 UTC,cool,hold,760,760,760,MO,Overland,75,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/MO/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MO/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MO_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MO/jul/" + file)
    MO_jul = pd.concat([MO_jul, df])
    
MO_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0056cebd6cd72164f185e551f894f028d3d2e319,jul,2017,cool,hold,Clarkson Valley,745.800948,734.739336,734.739336,45.0,True,False,True
1,005b44ffe2812f645179dccec3443da1ec58a68d,jul,2017,cool,auto,Kirkwood,753.531532,750.000000,690.000000,0.0,False,False,False
2,005b44ffe2812f645179dccec3443da1ec58a68d,jul,2017,cool,hold,Kirkwood,749.869792,742.899884,742.899884,0.0,False,False,False
3,01088b49408ab4b9d13e0d7132362623ea0db85c,jul,2017,cool,auto,Independence,765.034788,765.000000,715.000000,20.0,False,False,False
4,022ca80b9cb18dbbc868c84e90c30a7eb88e6e56,jul,2017,cool,hold,Wildwood,715.284863,711.979738,711.979738,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
828,ffab02b11c20f10ac2c4ca23edd3d249b627d274,jul,2021,cool,hold,Wildwood,751.625000,750.252820,749.598684,16.0,False,False,False
829,ffd2af4ae17ba4fc7304cbad80f57105d2451543,jul,2021,cool,hold,Columbia,701.616140,725.001091,725.000000,5.0,False,False,False
830,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,jul,2021,cool,hold,Kansas city,675.976316,674.400000,674.228947,0.0,False,False,False
831,ffede669ccc5cc0e508b9ce744c4ae67cef9181b,jul,2021,cool,hold,Clayton,740.459459,734.648649,734.648649,0.0,True,False,False


In [133]:
MO_jul.to_csv("Scraper_Output/State_Month_Day/MO/MO_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/MO-day/2017-aug-day-MO.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,219b156dcea0d3cf08bea0c2b199149494d267b9,2017-08-15 17:35:00 UTC,cool,auto,752,750,770,MO,Brentwood,0,False,False,False,Gas
1,150df04ae56a04fe38ca3c1f8e6b001e6c544f01,2017-08-29 13:35:00 UTC,cool,hold,763,770,770,MO,St. Louis,70,False,False,False,Gas
2,47cfdd1c486d96b7b9b72e635fe4f1c17e49f6e3,2017-08-11 13:05:00 UTC,cool,auto,727,730,730,MO,Ballwin,20,False,False,False,Gas
3,6f5e8f2fa9851002dcf8b4e0143958502e2e4e25,2017-08-25 12:55:00 UTC,cool,hold,717,710,710,MO,University City,80,False,False,False,Gas
4,60d0b471fffc94b0f612cb69305cb47c6ddc7fa6,2017-08-25 15:15:00 UTC,cool,auto,767,820,640,MO,St. Charles,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
525350,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-08-04 17:05:00 UTC,cool,hold,694,690,690,MO,Kansas city,0,False,False,False,Gas
525351,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-08-29 14:35:00 UTC,cool,auto,766,780,690,MO,Kansas city,0,False,False,False,Gas
525352,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-08-11 19:25:00 UTC,cool,hold,694,690,690,MO,Kansas city,0,False,False,False,Gas
525353,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,2017-08-04 14:05:00 UTC,cool,hold,684,690,690,MO,Kansas city,0,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/MO/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/MO-day/2018-aug-day-MO.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,06f37d1143d483a0c01db1c5aded53cebdee0a4c,2018-08-05 18:40:00 UTC,auto,hold,736,735,665,MO,KANSAS CITY,0,False,False,False,Gas
1,5bfac229398bc031538463dc5768275f2ff3118d,2018-08-01 19:50:00 UTC,cool,hold,705,705,705,MO,Chesterfield,55,False,False,False,Gas
2,2ac0caa46bbe221b100d4b8503453d6a4a9909b5,2018-08-17 19:50:00 UTC,auto,hold,777,775,675,MO,Richland,0,False,False,False,Gas
3,21f49c63546a238325be268993112682e42f69fa,2018-08-25 19:15:00 UTC,cool,auto,804,800,800,MO,Troy,17,False,False,False,Gas
5,47cfdd1c486d96b7b9b72e635fe4f1c17e49f6e3,2018-08-10 13:50:00 UTC,cool,hold,754,782,782,MO,Ballwin,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1000358,23f7522c0a31addc1374eb2c8e3929e3d8ad51be,2018-08-05 15:45:00 UTC,cool,hold,759,760,760,MO,Fenton,50,False,False,False,Gas
1000359,85ca5335bc3c3ad772def296758968c4aacee52f,2018-08-25 15:00:00 UTC,cool,hold,759,760,760,MO,St. Louis,115,False,False,False,Gas
1000360,aa725bb5ae9a46140367b4f6cf2d69fa10903597,2018-08-11 08:45:00 UTC,cool,hold,761,760,760,MO,Osage Beach,20,True,False,True,Electric
1000361,d636c3faed30f7dbb24f3ddc57dc09eb20b1ff8e,2018-08-19 19:15:00 UTC,cool,hold,761,760,760,MO,Saint Louis,85,False,False,True,Electric


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/MO/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/MO-day/2019-aug-day-MO.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,810648350bbd8e77c53cfa66c3cbb57ae0236b10,2019-08-03 18:55:00 UTC,auto,hold,717,715,665,MO,Chesterfield,0,False,False,False,Gas
1,154c00d52d51aaa89f8542cefb031687a74feda4,2019-08-11 15:00:00 UTC,auto,hold,725,716,666,MO,Kansas City,0,False,False,False,Gas
2,1289b8ea97bd1d04f0f58386bdab4d307298b33b,2019-08-25 13:55:00 UTC,cool,hold,719,716,716,MO,St. Louis,99,True,False,False,Gas
3,20017e84cc9ed560c575126e6800e6de4345b38c,2019-08-10 15:05:00 UTC,cool,hold,701,695,695,MO,van buren,0,True,False,True,Electric
4,93075ffdfd149d7132a9c09f0aba562ca2fc1616,2019-08-14 10:00:00 UTC,cool,hold,746,742,742,MO,Saint Charles,37,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1307296,55311abab5dde140d8a474e535276c7abf5dcc24,2019-08-02 14:30:00 UTC,cool,hold,754,760,760,MO,Arnold,50,False,False,False,Gas
1307297,b63a58e0ecc9eac59f0a0828bb45d64feb659c5b,2019-08-29 18:50:00 UTC,cool,hold,739,782,760,MO,St. Louis,60,False,False,False,Gas
1307298,232108b229bcc51c8e2a97e33df3ded7307f50c1,2019-08-12 15:10:00 UTC,cool,auto,762,760,760,MO,Loma Linda,0,True,False,True,Electric
1307299,88dea206563a045450cbfb5c3f41190bfe29185d,2019-08-05 10:20:00 UTC,cool,auto,760,760,760,MO,Saint Charles,20,True,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/MO/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/MO-day/2020-aug-day-MO.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6a8bbe04424879812cbcb2ccc1e4bb810f60e777,2020-08-16 11:20:00 UTC,auto,hold,723,752,682,MO,Columbia,59,True,False,False,Gas
1,04a87e9ca74f218daa48c4570716f6ca0f04f0ac,2020-08-15 17:10:00 UTC,auto,hold,792,795,675,MO,Frontenac,30,False,False,False,Gas
2,cf102fddd6d7df7ebb573f9d9a035f86faa8c9be,2020-08-28 14:05:00 UTC,cool,hold,747,749,749,MO,Olivette,70,True,False,False,Gas
3,ce8c6cf3bcbe5c6eeb66055d453d13e61cb51263,2020-08-10 19:25:00 UTC,cool,hold,723,728,728,MO,Cape Girardeau,5,False,False,False,Gas
4,cf102fddd6d7df7ebb573f9d9a035f86faa8c9be,2020-08-28 11:35:00 UTC,cool,hold,754,749,749,MO,Olivette,70,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1221756,c3ca0ef2bce73022036d0eb8bcf9a4cad64c9d6c,2020-08-22 19:05:00 UTC,cool,auto,769,760,760,MO,Saint Charles,30,False,False,False,Gas
1221757,a7a42673c84df7a89f55a698bc9d968cfdadac02,2020-08-20 16:50:00 UTC,cool,hold,743,760,760,MO,Cape Girardeau,29,True,False,False,Gas
1221758,89adaaa04c21f0d58670e72b592cf52bc97d47ea,2020-08-28 12:45:00 UTC,cool,hold,713,760,760,MO,Crestwood,60,False,False,False,Gas
1221759,5e57f6ea3b333d562db1b5fc900a5f271e328416,2020-08-02 13:45:00 UTC,cool,hold,762,760,760,MO,Warrensburg,0,False,False,True,Electric


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/MO/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MO/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MO_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MO/aug/" + file)
    MO_aug = pd.concat([MO_aug, df])
    
MO_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0056cebd6cd72164f185e551f894f028d3d2e319,aug,2017,cool,hold,Clarkson Valley,743.280000,741.440000,738.560000,45.0,True,False,True
1,005b44ffe2812f645179dccec3443da1ec58a68d,aug,2017,cool,hold,Kirkwood,740.748356,740.000000,740.000000,0.0,False,False,False
2,01077fe00e6bddb7480aa62c3efecbfc1b5de9c4,aug,2017,auto,auto,Lee's Summit,728.818182,707.818182,652.727273,17.0,False,False,True
3,01088b49408ab4b9d13e0d7132362623ea0db85c,aug,2017,auto,auto,Independence,766.343137,765.000000,715.000000,20.0,False,False,False
4,01088b49408ab4b9d13e0d7132362623ea0db85c,aug,2017,auto,hold,Independence,757.437745,761.000000,695.000000,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1534,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,aug,2020,cool,hold,Kansas city,681.572503,680.273598,680.273598,0.0,False,False,False
1535,ffede669ccc5cc0e508b9ce744c4ae67cef9181b,aug,2020,cool,auto,Clayton,716.243243,661.324324,680.945946,0.0,True,False,False
1536,ffede669ccc5cc0e508b9ce744c4ae67cef9181b,aug,2020,cool,hold,Clayton,740.074627,730.000000,730.000000,0.0,True,False,False
1537,fff9c9fc157b3018c7b44f37084f7b481f5ea638,aug,2020,cool,auto,Lake Saint Louis,681.696466,680.016632,680.016632,5.0,False,False,False


In [160]:
MO_aug.to_csv("Scraper_Output/State_Month_Day/MO/MO_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/MO-day/2017-dec-day-MO.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,03d081f652048a691168648e95540d11a37325f7,2017-12-09 17:05:00 UTC,heat,auto,707,760,710,MO,holden,15,True,False,True,Electric
1,d3219bd08fc418ffb9b84f006bd1010b2c41f73a,2017-12-24 15:25:00 UTC,heat,auto,744,730,730,MO,Maplewood,70,False,False,False,Gas
2,5028e280a8514a928d35f2168ed86ea0dfcc1a66,2017-12-29 18:25:00 UTC,heat,auto,686,730,680,MO,Pacific,45,False,False,False,Gas
3,8586e6494c7705799e2c81e2fe847607e6550d6b,2017-12-27 16:00:00 UTC,heat,auto,678,680,680,MO,Grandview,15,False,False,False,Gas
5,8bff1c0cc73f36a5d2778831a3980a82d58a9e9f,2017-12-26 09:40:00 UTC,heat,hold,617,650,610,MO,Camdenron,35,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
689807,9ae82166c423634cffc854d75d43d5ad88bf7993,2017-12-21 15:30:00 UTC,heat,hold,647,650,650,MO,Maryland heights,25,False,False,False,Gas
689808,9ae82166c423634cffc854d75d43d5ad88bf7993,2017-12-01 17:50:00 UTC,heat,hold,677,650,650,MO,Maryland heights,25,False,False,False,Gas
689809,2ad5826064172ca49eef4682793678f0de3a8309,2017-12-09 14:00:00 UTC,heat,hold,697,680,680,MO,Maryland heights,50,False,False,False,Gas
689810,9ae82166c423634cffc854d75d43d5ad88bf7993,2017-12-01 15:20:00 UTC,heat,hold,668,650,650,MO,Maryland heights,25,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/MO/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/MO-day/2018-dec-day-MO.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1a5212a58c52175312785d9892f8464adf0127fe,2018-12-01 13:45:00 UTC,heat,hold,667,665,665,MO,Liberty,10,False,False,True,Electric
1,4eb1fe283c524ae50433639cf7287995588a664a,2018-12-30 13:45:00 UTC,auto,hold,725,775,725,MO,St. Louis,17,False,False,False,Gas
2,dd86ee9a15944c7e75bc24581fa82f989473c165,2018-12-07 18:25:00 UTC,heat,hold,731,728,728,MO,O Fallon,10,False,False,False,Gas
3,34bbc9b5362c9360ca5f02a137c2e0e1dd6d5090,2018-12-01 19:15:00 UTC,heat,hold,701,685,685,MO,Maryland Heights,40,False,False,False,Gas
5,e86ab5435f779c83b4c328d0feaca42c932aa535,2018-12-08 18:20:00 UTC,heat,hold,694,695,695,MO,Saint Ann,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1018244,e6b9b85b7dee63103cac7b2dac45386afcd94644,2018-12-11 17:30:00 UTC,auto,hold,698,760,700,MO,Raymore,40,False,False,False,Gas
1018245,e5cd4a96d7caea112d64f19ffd3b9bc8d0d44673,2018-12-30 18:50:00 UTC,auto,hold,683,760,690,MO,Columbia,30,False,False,False,Gas
1018246,f0619c9bc8162a300d697aa57ffe2fc4eeec8f8c,2018-12-11 13:40:00 UTC,auxHeatOnly,auto,712,760,740,MO,Saint Charles,47,False,False,True,Electric
1018247,c8490b30fac5c86ef930bc17242b03c6331d3cec,2018-12-09 15:30:00 UTC,heat,auto,681,760,680,MO,St. Louis,40,True,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/MO/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/MO-day/2019-dec-day-MO.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4334210b03483bae059fd9d3ad3041169fd7eac5,2019-12-29 15:20:00 UTC,heat,hold,697,699,699,MO,Rolla,0,False,False,False,Gas
1,67520692dc5bc29b9de9e93700ecb7d36ceb0ce1,2019-12-20 18:25:00 UTC,heat,hold,711,699,699,MO,O Fallon,0,False,False,False,Gas
2,d12c6291b7a5d6019133e414e68ac5ab11cbe5d0,2019-12-26 12:45:00 UTC,heat,auto,696,732,700,MO,Dexter,0,False,False,False,Gas
3,2945f6efbf9ccd3d436252c771c2357dd168fa6b,2019-12-29 19:25:00 UTC,heat,hold,694,696,696,MO,Republic,0,False,False,False,Gas
4,2945f6efbf9ccd3d436252c771c2357dd168fa6b,2019-12-01 15:25:00 UTC,heat,hold,685,686,686,MO,Republic,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1174994,42c2cf9bf9cf52c1443ae1933728ab58cb5c708e,2019-12-08 16:10:00 UTC,auto,hold,713,760,710,MO,Defiance,30,True,False,True,Electric
1174995,0b0c235dbe75344232115832b71f399b6bb234e7,2019-12-15 19:00:00 UTC,auto,hold,708,760,710,MO,Columbia,10,True,False,False,Gas
1174996,a075e8ef25c98153e35432e06111578f4404ec6c,2019-12-26 17:10:00 UTC,auto,hold,688,760,690,MO,Raytown,55,True,False,False,Gas
1174997,87538a0f41f1200db29b0a92952ca6c630d37f15,2019-12-02 15:30:00 UTC,auto,hold,685,760,690,MO,Warrenton,10,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/MO/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/MO-day/2020-dec-day-MO.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c4b7aec378d8e2dce181074d7d284794fd5e7d2d,2020-12-12 13:55:00 UTC,heat,auto,709,718,710,MO,Springfield,105,True,False,False,Gas
1,9c5698a095500af0c12ab6e32ae7db2620bea84e,2020-12-22 19:50:00 UTC,heat,hold,718,709,709,MO,Ellisville,19,False,False,False,Gas
2,37a63d9a5bde59ef7a305fa83522d5e3f64c09c7,2020-12-22 13:10:00 UTC,auto,hold,713,755,685,MO,Pacific,30,True,False,False,Gas
3,ce08afd7223a5cea5ab62820bda19b6fa68e8c00,2020-12-05 18:55:00 UTC,heat,hold,692,675,675,MO,Jefferson City,5,False,False,False,Gas
4,602e1aecb042915eaa8f38151f3a882e906514f6,2020-12-07 19:45:00 UTC,heat,auto,684,684,650,MO,Columbia,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
997445,abff7f3b6e16858cf0003927dd372858553ab671,2020-12-17 16:15:00 UTC,auto,auto,676,760,680,MO,Wildwood,30,True,False,True,Electric
997446,9c64521bbcb30284159172b256f7be9c3fb43719,2020-12-13 19:50:00 UTC,heat,auto,715,760,720,MO,St. Charles,18,False,False,False,Gas
997447,abff7f3b6e16858cf0003927dd372858553ab671,2020-12-27 15:20:00 UTC,auto,hold,674,760,680,MO,Wildwood,30,True,False,True,Electric
997448,fd3506af03b181b50f67018c08c7ac6365428eb7,2020-12-07 14:55:00 UTC,heat,auto,762,760,760,MO,Camdenton,20,False,False,True,Electric


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/MO/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MO/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MO_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MO/dec/" + file)
    MO_dec = pd.concat([MO_dec, df])
    
MO_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0034438232b9c99f7034f4e329ea2203949d0e9a,dec,2017,heat,auto,Manchester,697.310680,780.281553,699.577670,40.0,False,False,False
1,005b44ffe2812f645179dccec3443da1ec58a68d,dec,2017,heat,hold,Kirkwood,688.633333,691.169444,691.169444,0.0,False,False,False
2,01077fe00e6bddb7480aa62c3efecbfc1b5de9c4,dec,2017,auto,hold,Lee's Summit,678.422222,760.000000,680.011111,17.0,False,False,True
3,01088b49408ab4b9d13e0d7132362623ea0db85c,dec,2017,auto,hold,Independence,697.874406,770.000000,700.398417,20.0,False,False,False
4,010b10cbfd269afb215575dcb8620c0e9baf1e55,dec,2017,heat,hold,St. Louis,696.371930,705.905702,699.260965,90.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1511,ffab02b11c20f10ac2c4ca23edd3d249b627d274,dec,2020,heat,hold,Wildwood,695.583333,736.458333,640.583333,16.0,False,False,False
1512,ffd2af4ae17ba4fc7304cbad80f57105d2451543,dec,2020,heat,hold,Columbia,705.658506,706.528190,702.903561,5.0,False,False,False
1513,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,dec,2020,heat,auto,Kansas city,678.426908,697.679172,680.023286,0.0,False,False,False
1514,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,dec,2020,heat,hold,Kansas city,689.373353,691.698389,691.691069,0.0,False,False,False


In [187]:
MO_dec.to_csv("Scraper_Output/State_Month_Day/MO/MO_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/MO/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MO_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/MO/" + file)
    MO_all = pd.concat([MO_all, df])
    
MO_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0056cebd6cd72164f185e551f894f028d3d2e319,aug,2017,cool,hold,Clarkson Valley,743.280000,741.440000,738.560000,45.0,True,False,True
1,005b44ffe2812f645179dccec3443da1ec58a68d,aug,2017,cool,hold,Kirkwood,740.748356,740.000000,740.000000,0.0,False,False,False
2,01077fe00e6bddb7480aa62c3efecbfc1b5de9c4,aug,2017,auto,auto,Lee's Summit,728.818182,707.818182,652.727273,17.0,False,False,True
3,01088b49408ab4b9d13e0d7132362623ea0db85c,aug,2017,auto,auto,Independence,766.343137,765.000000,715.000000,20.0,False,False,False
4,01088b49408ab4b9d13e0d7132362623ea0db85c,aug,2017,auto,hold,Independence,757.437745,761.000000,695.000000,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6469,ffab02b11c20f10ac2c4ca23edd3d249b627d274,jun,2021,cool,hold,Wildwood,751.306828,749.646500,749.404494,16.0,False,False,False
6470,ffd2af4ae17ba4fc7304cbad80f57105d2451543,jun,2021,cool,hold,Columbia,699.434567,715.335773,715.335773,5.0,False,False,False
6471,ffe3dd2ebf461c6fe1085a577daa4fec78c7c48f,jun,2021,cool,hold,Kansas city,744.761179,783.081456,719.473137,0.0,False,False,False
6472,ffede669ccc5cc0e508b9ce744c4ae67cef9181b,jun,2021,cool,hold,Clayton,744.196850,722.535433,722.535433,0.0,True,False,False


In [190]:
MO_all.to_csv("Scraper_Output/State_Month_Day/MO_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mMOe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['MO']
Unique jan_2018: ['MO']
Unique jan_2019: ['MO']
Unique jan_2020: ['MO']
Unique jan_2021: ['MO']
Unique feb_2017: ['MO']
Unique feb_2018: ['MO']
Unique feb_2019: ['MO']
Unique feb_2020: ['MO']
Unique feb_2021: ['MO']
Unique jun_2017: ['MO']
Unique jun_2018: ['MO']
Unique jun_2019: ['MO']
Unique jun_2020: ['MO']
Unique jun_2021: ['MO']
Unique jul_2017: ['MO']
Unique jul_2018: ['MO']
Unique jul_2019: ['MO']
Unique jul_2020: ['MO']
Unique jul_2021: ['MO']
Unique aug_2017: ['MO']
Unique aug_2018: ['MO']
Unique aug_2019: ['MO']
Unique aug_2020: ['MO']
Unique dec_2017: ['MO']
Unique dec_2018: ['MO']
Unique dec_2019: ['MO']
Unique dec_2020: ['MO']
