# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/SD-day/2017-jan-day-SD.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c286bb38a8bbe2053013f8899f27ce0e8fe7e2fc,2017-01-03T19:50:00Z,heat,auto,687,690,690,SD,Watertown,16,False,False,False,Gas
1,c286bb38a8bbe2053013f8899f27ce0e8fe7e2fc,2017-01-03T19:25:00Z,heat,auto,691,690,690,SD,Watertown,16,False,False,False,Gas
2,c286bb38a8bbe2053013f8899f27ce0e8fe7e2fc,2017-01-03T19:45:00Z,heat,auto,688,690,690,SD,Watertown,16,False,False,False,Gas
3,c286bb38a8bbe2053013f8899f27ce0e8fe7e2fc,2017-01-03T19:00:00Z,heat,auto,695,690,690,SD,Watertown,16,False,False,False,Gas
4,c286bb38a8bbe2053013f8899f27ce0e8fe7e2fc,2017-01-03T19:40:00Z,heat,auto,689,690,690,SD,Watertown,16,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4321,d37e2aadd7774c2112b3d30f1c00556868867274,2017-01-13T19:05:00Z,heat,hold,721,730,730,SD,Vermillion,45,False,False,False,Gas
4322,d37e2aadd7774c2112b3d30f1c00556868867274,2017-01-13T19:20:00Z,heat,hold,725,730,730,SD,Vermillion,45,False,False,False,Gas
4323,d37e2aadd7774c2112b3d30f1c00556868867274,2017-01-13T19:15:00Z,heat,hold,726,730,730,SD,Vermillion,45,False,False,False,Gas
4324,51e6bafa5298e3366ba871e35aed1d974abe184e,2017-01-30T11:20:00Z,heat,hold,709,730,730,SD,Sioux Falls,19,True,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
011eb806f7782fc048072fabfb035b54f5d71e67,Jan,2017,auto,hold,Sioux Falls,678.278027,738.2287,680.224215,100.0,False,False,False
045e533587d8a51c00e08e459243ce7ec3cbaaf9,Jan,2017,auto,hold,Rapid City,670.206897,780.0,679.655172,70.0,True,False,False
045e533587d8a51c00e08e459243ce7ec3cbaaf9,Jan,2017,heat,auto,Rapid City,696.348485,780.0,699.0,70.0,True,False,False
05b6a87c2c20aa57b357426c9107705c0bd7ec20,Jan,2017,heat,hold,Sioux Falls,694.484848,700.0,700.0,15.0,True,False,False
0968b16b919c5e8b72ff8de3bda124068f7f322d,Jan,2017,auto,auto,Sioux Falls,687.166667,820.0,690.0,0.0,False,False,False
0968b16b919c5e8b72ff8de3bda124068f7f322d,Jan,2017,auto,hold,Sioux Falls,697.199074,820.0,700.0,0.0,False,False,False
17c1dbdbd6e3202b0db49420e6fc9d36e4cac454,Jan,2017,auto,auto,Sioux Falls,699.851852,760.388889,701.472222,5.0,False,False,False
18cd13efb32ea1f3caa01bc30656d0afbe3d1413,Jan,2017,heat,auto,Sioux Falls,680.520833,683.270833,683.270833,16.0,False,False,False
18cd13efb32ea1f3caa01bc30656d0afbe3d1413,Jan,2017,heat,hold,Sioux Falls,686.6,688.75,687.625,16.0,False,False,False
20f09a9c1d296b532e398fdefb4a44700a36a621,Jan,2017,auto,auto,Mitchell,642.362205,770.0,620.0,40.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/SD/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/SD-day/2018-jan-day-SD.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d8a5080a26ef033201748f6bb21a14041b3e5089,2018-01-13T18:55:00Z,heat,hold,739,735,735,SD,Sioux Falls,15,False,False,False,Gas
1,d37e2aadd7774c2112b3d30f1c00556868867274,2018-01-07T16:30:00Z,heat,hold,700,709,709,SD,Vermillion,45,False,False,False,Gas
2,e5a50f71271de0f30b2f068f1e6e3368ddfec11b,2018-01-05T19:45:00Z,heat,hold,670,745,673,SD,Sioux Falls,5,False,False,False,Gas
3,3cb5cd50805e286cf3cd45001f0c9e54f6fed6c3,2018-01-06T13:50:00Z,heat,auto,659,738,653,SD,Sioux Falls,20,False,False,False,Gas
4,fbb6b3184e085ae4fe5c86bab30afe6a29793469,2018-01-26T13:15:00Z,heat,hold,689,708,684,SD,Mitchell,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32494,32f1e16c8515275838f75cf8af034ea81b74d96a,2018-01-29T17:50:00Z,heat,hold,668,720,705,SD,Tea,40,False,False,True,Electric
32495,32f1e16c8515275838f75cf8af034ea81b74d96a,2018-01-29T19:35:00Z,heat,hold,697,720,705,SD,Tea,40,False,False,True,Electric
32496,32f1e16c8515275838f75cf8af034ea81b74d96a,2018-01-29T18:55:00Z,heat,hold,691,720,705,SD,Tea,40,False,False,True,Electric
32497,32f1e16c8515275838f75cf8af034ea81b74d96a,2018-01-29T14:40:00Z,heat,hold,643,720,705,SD,Tea,40,False,False,True,Electric


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/SD/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/SD-day/2019-jan-day-SD.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5ea1c0a7cd268a906cded2c8487eee85713feb0e,2019-01-19 16:40:00 UTC,heat,hold,722,743,724,SD,Tea,8,False,False,False,Gas
1,2f9a16dd49a7a56c5d2e1747bad9d2569792cc33,2019-01-12 17:05:00 UTC,heat,hold,703,712,712,SD,Sioux Falls,5,False,False,False,Gas
3,9890e8311a56a9abe9923f5fc2c000c196edf928,2019-01-27 16:45:00 UTC,auto,auto,699,759,709,SD,Sioux Falls,25,False,False,False,Gas
4,217f82385aba5f1ef9dab06f1f59f06467f31658,2019-01-09 11:45:00 UTC,heat,auto,724,755,721,SD,Sioux Falls,65,False,False,False,Gas
6,aa8c27c5be7a5261298e19887aa06a314b0c1308,2019-01-30 14:55:00 UTC,auto,hold,674,742,682,SD,Aberdeen,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62336,b71168e8b8e81bd538dd6cc70ab898bc25b42e13,2019-01-28 17:50:00 UTC,heat,hold,749,740,760,SD,Sioux Falls,0,True,False,False,Gas
62337,b71168e8b8e81bd538dd6cc70ab898bc25b42e13,2019-01-29 12:10:00 UTC,heat,hold,757,760,760,SD,Sioux Falls,0,True,False,False,Gas
62338,b71168e8b8e81bd538dd6cc70ab898bc25b42e13,2019-01-29 11:50:00 UTC,heat,hold,755,760,760,SD,Sioux Falls,0,True,False,False,Gas
62339,b71168e8b8e81bd538dd6cc70ab898bc25b42e13,2019-01-29 12:20:00 UTC,heat,hold,755,760,760,SD,Sioux Falls,0,True,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/SD/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/SD-day/2020-jan-day-SD.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2020-01-04 14:00:00 UTC,heat,hold,694,698,698,SD,Brookings,5,True,False,False,Gas
1,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2020-01-04 13:45:00 UTC,heat,hold,699,698,698,SD,Brookings,5,True,False,False,Gas
2,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2020-01-25 14:30:00 UTC,heat,hold,694,698,698,SD,Brookings,5,True,False,False,Gas
3,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2020-01-25 13:30:00 UTC,heat,hold,692,698,698,SD,Brookings,5,True,False,False,Gas
4,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2020-01-09 13:10:00 UTC,heat,hold,696,698,698,SD,Brookings,5,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64709,b71168e8b8e81bd538dd6cc70ab898bc25b42e13,2020-01-26 17:40:00 UTC,heat,auto,763,740,760,SD,Sioux Falls,0,True,False,False,Gas
64710,b71168e8b8e81bd538dd6cc70ab898bc25b42e13,2020-01-25 15:45:00 UTC,heat,auto,751,740,760,SD,Sioux Falls,0,True,False,False,Gas
64711,b71168e8b8e81bd538dd6cc70ab898bc25b42e13,2020-01-25 15:05:00 UTC,heat,auto,759,740,760,SD,Sioux Falls,0,True,False,False,Gas
64712,b71168e8b8e81bd538dd6cc70ab898bc25b42e13,2020-01-14 18:15:00 UTC,heat,auto,757,700,760,SD,Sioux Falls,0,True,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/SD/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/SD-day/2021-jan-day-SD.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5a98c243db66a23760e9b890ea746441b7493bdc,2021-01-30 19:00:00 UTC,heat,hold,641,671,671,SD,Vermillion,120,False,False,False,Gas
2,eff352eb35883cf29e1e7c2da025cf60f45f03b8,2021-01-17 16:55:00 UTC,heat,hold,686,752,684,SD,Rapid City,17,False,False,False,Gas
3,fbb6b3184e085ae4fe5c86bab30afe6a29793469,2021-01-17 19:40:00 UTC,auto,hold,684,731,681,SD,Mitchell,30,False,False,False,Gas
4,5a98c243db66a23760e9b890ea746441b7493bdc,2021-01-30 19:35:00 UTC,heat,hold,659,671,671,SD,Vermillion,120,False,False,False,Gas
5,011eb806f7782fc048072fabfb035b54f5d71e67,2021-01-02 17:15:00 UTC,heat,hold,672,719,667,SD,Sioux Falls,100,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45109,fbba43bcb505094b711bbda734c4088425953b97,2021-01-14 11:25:00 UTC,auto,hold,709,770,710,SD,Sioux Falls,20,False,False,False,Gas
45110,fbba43bcb505094b711bbda734c4088425953b97,2021-01-14 10:20:00 UTC,auto,hold,707,770,710,SD,Sioux Falls,20,False,False,False,Gas
45111,fbba43bcb505094b711bbda734c4088425953b97,2021-01-24 17:40:00 UTC,auto,hold,704,770,710,SD,Sioux Falls,20,False,False,False,Gas
45112,fbba43bcb505094b711bbda734c4088425953b97,2021-01-24 11:35:00 UTC,auto,hold,707,770,710,SD,Sioux Falls,20,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/SD/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/SD/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
SD_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/SD/jan/" + file)
    SD_jan = pd.concat([SD_jan, df])
    
SD_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,011eb806f7782fc048072fabfb035b54f5d71e67,Jan,2017,auto,hold,Sioux Falls,678.278027,738.228700,680.224215,100.0,False,False,False
1,045e533587d8a51c00e08e459243ce7ec3cbaaf9,Jan,2017,auto,hold,Rapid City,670.206897,780.000000,679.655172,70.0,True,False,False
2,045e533587d8a51c00e08e459243ce7ec3cbaaf9,Jan,2017,heat,auto,Rapid City,696.348485,780.000000,699.000000,70.0,True,False,False
3,05b6a87c2c20aa57b357426c9107705c0bd7ec20,Jan,2017,heat,hold,Sioux Falls,694.484848,700.000000,700.000000,15.0,True,False,False
4,0968b16b919c5e8b72ff8de3bda124068f7f322d,Jan,2017,auto,auto,Sioux Falls,687.166667,820.000000,690.000000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,Jan,2021,auto,hold,Sioux Falls,726.882353,732.000000,682.000000,30.0,False,False,False
57,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,Jan,2021,heat,hold,Sioux Falls,723.469302,722.886459,722.844407,30.0,False,False,False
58,f55618457070bf0edd01780be7adab261f0423ce,Jan,2021,heat,hold,Sioux Falls,686.000000,740.000000,690.000000,0.0,True,False,False
59,fbb6b3184e085ae4fe5c86bab30afe6a29793469,Jan,2021,auto,hold,Mitchell,677.115385,732.131868,682.131868,30.0,False,False,False


In [34]:
SD_jan.to_csv("Scraper_Output/State_Month_Day/SD/SD_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/SD-day/2017-feb-day-SD.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b82f636e91b827f8c3335e14b4784f234c7fa83e,2017-02-05T14:50:00Z,heat,hold,614,650,631,SD,Sioux Falls,90,False,False,False,Gas
2,d37e2aadd7774c2112b3d30f1c00556868867274,2017-02-02T19:10:00Z,heat,hold,701,725,686,SD,Vermillion,45,False,False,False,Gas
3,d37e2aadd7774c2112b3d30f1c00556868867274,2017-02-10T17:50:00Z,heat,hold,694,711,687,SD,Vermillion,45,False,False,False,Gas
4,e5a50f71271de0f30b2f068f1e6e3368ddfec11b,2017-02-07T11:25:00Z,heat,hold,662,754,664,SD,Sioux Falls,5,False,False,False,Gas
5,d37e2aadd7774c2112b3d30f1c00556868867274,2017-02-16T18:15:00Z,heat,hold,696,735,689,SD,Vermillion,45,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5608,17c1dbdbd6e3202b0db49420e6fc9d36e4cac454,2017-02-24T14:20:00Z,auto,hold,720,770,720,SD,Sioux Falls,5,False,False,False,Gas
5609,17c1dbdbd6e3202b0db49420e6fc9d36e4cac454,2017-02-24T14:25:00Z,auto,hold,720,770,720,SD,Sioux Falls,5,False,False,False,Gas
5610,17c1dbdbd6e3202b0db49420e6fc9d36e4cac454,2017-02-24T14:10:00Z,auto,hold,712,770,720,SD,Sioux Falls,5,False,False,False,Gas
5611,45a85784a761b7df6242c82ee3b01c01c9d4b647,2017-02-21T19:40:00Z,heat,hold,639,740,740,SD,Hayti,8,True,False,True,Electric


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/SD/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/SD-day/2018-feb-day-SD.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c286bb38a8bbe2053013f8899f27ce0e8fe7e2fc,2018-02-27T15:30:00Z,heat,auto,667,670,670,SD,Watertown,16,False,False,False,Gas
1,c286bb38a8bbe2053013f8899f27ce0e8fe7e2fc,2018-02-22T16:10:00Z,heat,hold,665,670,670,SD,Watertown,16,False,False,False,Gas
2,c286bb38a8bbe2053013f8899f27ce0e8fe7e2fc,2018-02-27T19:20:00Z,heat,auto,670,670,670,SD,Watertown,16,False,False,False,Gas
3,c286bb38a8bbe2053013f8899f27ce0e8fe7e2fc,2018-02-27T12:30:00Z,heat,auto,673,670,670,SD,Watertown,16,False,False,False,Gas
4,c286bb38a8bbe2053013f8899f27ce0e8fe7e2fc,2018-02-22T16:00:00Z,heat,hold,650,670,670,SD,Watertown,16,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30951,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,2018-02-20T17:30:00Z,heat,auto,708,700,700,SD,Sioux Falls,30,False,False,False,Gas
30952,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,2018-02-23T13:50:00Z,heat,auto,705,700,700,SD,Sioux Falls,30,False,False,False,Gas
30953,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,2018-02-21T15:40:00Z,heat,auto,701,700,700,SD,Sioux Falls,30,False,False,False,Gas
30954,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,2018-02-23T15:20:00Z,heat,auto,706,700,700,SD,Sioux Falls,30,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/SD/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/SD-day/2019-feb-day-SD.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,aa8c27c5be7a5261298e19887aa06a314b0c1308,2019-02-15 19:50:00 UTC,auto,hold,698,752,702,SD,Aberdeen,0,False,False,False,Gas
1,2f9a16dd49a7a56c5d2e1747bad9d2569792cc33,2019-02-03 14:50:00 UTC,heat,hold,699,702,702,SD,Sioux Falls,5,False,False,False,Gas
2,2f9a16dd49a7a56c5d2e1747bad9d2569792cc33,2019-02-03 18:05:00 UTC,heat,hold,702,702,702,SD,Sioux Falls,5,False,False,False,Gas
3,aa8c27c5be7a5261298e19887aa06a314b0c1308,2019-02-15 18:50:00 UTC,auto,hold,697,752,702,SD,Aberdeen,0,False,False,False,Gas
4,a97d99e22e6def34473c297c299e223a3519c19b,2019-02-17 14:20:00 UTC,auto,hold,599,759,656,SD,Rapid City,85,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
38570,b71168e8b8e81bd538dd6cc70ab898bc25b42e13,2019-02-22 19:10:00 UTC,heat,hold,757,760,760,SD,Sioux Falls,0,True,False,False,Gas
38571,b71168e8b8e81bd538dd6cc70ab898bc25b42e13,2019-02-21 14:50:00 UTC,heat,hold,758,760,760,SD,Sioux Falls,0,True,False,False,Gas
38572,b71168e8b8e81bd538dd6cc70ab898bc25b42e13,2019-02-20 19:55:00 UTC,heat,hold,756,760,760,SD,Sioux Falls,0,True,False,False,Gas
38573,b71168e8b8e81bd538dd6cc70ab898bc25b42e13,2019-02-24 18:25:00 UTC,heat,hold,760,760,760,SD,Sioux Falls,0,True,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/SD/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/SD-day/2020-feb-day-SD.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2020-02-23 14:05:00 UTC,heat,hold,682,678,678,SD,Brookings,5,True,False,False,Gas
1,66fafd5b7131163da637a7660ba442db0a070d92,2020-02-13 19:00:00 UTC,heat,auto,688,691,688,SD,Sioux Falls,6,False,False,False,Gas
2,5ea1c0a7cd268a906cded2c8487eee85713feb0e,2020-02-02 19:25:00 UTC,heat,hold,706,650,645,SD,Tea,8,False,False,False,Gas
3,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2020-02-02 14:35:00 UTC,heat,hold,682,678,678,SD,Brookings,5,True,False,False,Gas
4,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2020-02-17 13:55:00 UTC,heat,auto,702,723,678,SD,Brookings,5,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57890,51e6bafa5298e3366ba871e35aed1d974abe184e,2020-02-09 18:55:00 UTC,heat,hold,760,760,760,SD,Sioux Falls,19,True,False,False,Gas
57891,51e6bafa5298e3366ba871e35aed1d974abe184e,2020-02-09 19:35:00 UTC,heat,hold,755,760,760,SD,Sioux Falls,19,True,False,False,Gas
57892,51e6bafa5298e3366ba871e35aed1d974abe184e,2020-02-28 17:55:00 UTC,heat,hold,755,760,760,SD,Sioux Falls,19,True,False,False,Gas
57893,51e6bafa5298e3366ba871e35aed1d974abe184e,2020-02-28 15:25:00 UTC,heat,hold,754,760,760,SD,Sioux Falls,19,True,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/SD/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/SD-day/2021-feb-day-SD.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,880406a81fa4bf281a21e6e7788c6d32b611ec04,2021-02-14 19:20:00 UTC,heat,hold,700,703,703,SD,Sioux Falls,0,False,False,False,Gas
1,880406a81fa4bf281a21e6e7788c6d32b611ec04,2021-02-15 12:50:00 UTC,heat,hold,701,703,703,SD,Sioux Falls,0,False,False,False,Gas
2,880406a81fa4bf281a21e6e7788c6d32b611ec04,2021-02-14 14:05:00 UTC,heat,hold,702,703,703,SD,Sioux Falls,0,False,False,False,Gas
5,880406a81fa4bf281a21e6e7788c6d32b611ec04,2021-02-19 13:55:00 UTC,heat,hold,682,683,683,SD,Sioux Falls,0,False,False,False,Gas
8,880406a81fa4bf281a21e6e7788c6d32b611ec04,2021-02-14 12:50:00 UTC,heat,hold,700,703,703,SD,Sioux Falls,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40984,fbba43bcb505094b711bbda734c4088425953b97,2021-02-21 14:05:00 UTC,auto,hold,703,770,710,SD,Sioux Falls,20,False,False,False,Gas
40985,fbba43bcb505094b711bbda734c4088425953b97,2021-02-09 12:05:00 UTC,auto,hold,705,770,710,SD,Sioux Falls,20,False,False,False,Gas
40986,fbba43bcb505094b711bbda734c4088425953b97,2021-02-11 09:45:00 UTC,auto,hold,709,770,710,SD,Sioux Falls,20,False,False,False,Gas
40987,fbba43bcb505094b711bbda734c4088425953b97,2021-02-21 12:35:00 UTC,auto,hold,711,770,710,SD,Sioux Falls,20,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/SD/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/SD/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
SD_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/SD/feb/" + file)
    SD_feb = pd.concat([SD_feb, df])
    
SD_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,011eb806f7782fc048072fabfb035b54f5d71e67,feb,2017,auto,hold,Sioux Falls,667.900000,744.600000,679.100000,100.0,False,False,False
1,045e533587d8a51c00e08e459243ce7ec3cbaaf9,feb,2017,auto,auto,Rapid City,632.523529,819.529412,640.705882,70.0,True,False,False
2,0968b16b919c5e8b72ff8de3bda124068f7f322d,feb,2017,auto,hold,Sioux Falls,687.416667,770.000000,690.000000,0.0,False,False,False
3,17c1dbdbd6e3202b0db49420e6fc9d36e4cac454,feb,2017,auto,hold,Sioux Falls,716.222222,770.000000,720.000000,5.0,False,False,False
4,18cd13efb32ea1f3caa01bc30656d0afbe3d1413,feb,2017,heat,auto,Sioux Falls,686.000000,690.000000,690.000000,16.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
50,eff352eb35883cf29e1e7c2da025cf60f45f03b8,feb,2021,heat,hold,Rapid City,685.770492,702.196721,688.688525,17.0,False,False,False
51,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,feb,2021,heat,hold,Sioux Falls,702.500000,703.390838,702.650097,30.0,False,False,False
52,f55618457070bf0edd01780be7adab261f0423ce,feb,2021,heat,hold,Sioux Falls,697.666667,700.000000,700.000000,0.0,True,False,False
53,fbb6b3184e085ae4fe5c86bab30afe6a29793469,feb,2021,auto,hold,Mitchell,674.745174,731.625483,680.729730,30.0,False,False,False


In [67]:
SD_feb.to_csv("Scraper_Output/State_Month_Day/SD/SD_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/SD-day/2017-jun-day-SD.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e5a50f71271de0f30b2f068f1e6e3368ddfec11b,2017-06-18T13:40:00Z,cool,hold,759,770,770,SD,Sioux Falls,5,False,False,False,Gas
1,e5a50f71271de0f30b2f068f1e6e3368ddfec11b,2017-06-18T14:00:00Z,cool,hold,760,770,770,SD,Sioux Falls,5,False,False,False,Gas
2,d37e2aadd7774c2112b3d30f1c00556868867274,2017-06-25T19:20:00Z,cool,hold,717,770,770,SD,Vermillion,45,False,False,False,Gas
3,d37e2aadd7774c2112b3d30f1c00556868867274,2017-06-25T18:55:00Z,cool,hold,714,770,770,SD,Vermillion,45,False,False,False,Gas
4,e5a50f71271de0f30b2f068f1e6e3368ddfec11b,2017-06-18T13:15:00Z,cool,hold,759,770,770,SD,Sioux Falls,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18829,51e6bafa5298e3366ba871e35aed1d974abe184e,2017-06-20T10:05:00Z,cool,hold,740,760,760,SD,Sioux Falls,19,True,False,False,Gas
18830,51e6bafa5298e3366ba871e35aed1d974abe184e,2017-06-17T19:30:00Z,cool,hold,746,760,760,SD,Sioux Falls,19,True,False,False,Gas
18831,51e6bafa5298e3366ba871e35aed1d974abe184e,2017-06-17T13:00:00Z,cool,hold,715,760,760,SD,Sioux Falls,19,True,False,False,Gas
18832,51e6bafa5298e3366ba871e35aed1d974abe184e,2017-06-17T13:35:00Z,cool,hold,750,760,760,SD,Sioux Falls,19,True,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/SD/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/SD-day/2018-jun-day-SD.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3cb5cd50805e286cf3cd45001f0c9e54f6fed6c3,2018-06-20 10:35:00 UTC,cool,hold,767,770,770,SD,Sioux Falls,20,False,False,False,Gas
1,d8a5080a26ef033201748f6bb21a14041b3e5089,2018-06-24 16:25:00 UTC,cool,hold,753,755,755,SD,Sioux Falls,15,False,False,False,Gas
2,3cb5cd50805e286cf3cd45001f0c9e54f6fed6c3,2018-06-23 17:30:00 UTC,cool,hold,769,770,770,SD,Sioux Falls,20,False,False,False,Gas
3,3cb5cd50805e286cf3cd45001f0c9e54f6fed6c3,2018-06-18 10:30:00 UTC,cool,hold,771,770,770,SD,Sioux Falls,20,False,False,False,Gas
4,3cb5cd50805e286cf3cd45001f0c9e54f6fed6c3,2018-06-28 12:00:00 UTC,cool,auto,765,770,770,SD,Sioux Falls,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
44935,ba824fb3fced9de90a39628266d6ba8bdc24270c,2018-06-26 08:00:00 UTC,cool,hold,758,760,760,SD,Rapid City,5,False,False,False,Gas
44936,ba824fb3fced9de90a39628266d6ba8bdc24270c,2018-06-26 09:30:00 UTC,cool,hold,755,760,760,SD,Rapid City,5,False,False,False,Gas
44937,ba824fb3fced9de90a39628266d6ba8bdc24270c,2018-06-26 08:05:00 UTC,cool,hold,757,760,760,SD,Rapid City,5,False,False,False,Gas
44938,e6dbbaa5309abde52529bb8af3025c5f2fb84ab3,2018-06-19 18:30:00 UTC,cool,hold,750,760,760,SD,Mitchell,27,False,False,True,Electric


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/SD/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/SD-day/2019-jun-day-SD.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2e3f052d974a5f34ab5e4a430a975718633a71c5,2019-06-01 18:15:00 UTC,cool,hold,687,675,655,SD,Sisseton,40,False,False,False,Gas
1,30c9b455802f58170c2be006aab99c5c41a05977,2019-06-04 13:00:00 UTC,auto,hold,729,725,655,SD,aberdeen,45,True,False,False,Gas
2,30c9b455802f58170c2be006aab99c5c41a05977,2019-06-01 15:10:00 UTC,auto,hold,702,745,655,SD,aberdeen,45,True,False,False,Gas
3,30c9b455802f58170c2be006aab99c5c41a05977,2019-06-04 13:20:00 UTC,auto,hold,724,725,655,SD,aberdeen,45,True,False,False,Gas
4,30c9b455802f58170c2be006aab99c5c41a05977,2019-06-01 15:05:00 UTC,auto,hold,701,745,655,SD,aberdeen,45,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68416,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2019-06-09 13:00:00 UTC,cool,auto,740,750,760,SD,Sioux Falls,75,False,False,False,Gas
68417,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2019-06-02 11:00:00 UTC,cool,auto,709,770,760,SD,Sioux Falls,75,False,False,False,Gas
68418,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2019-06-30 11:10:00 UTC,cool,auto,752,750,760,SD,Sioux Falls,75,False,False,False,Gas
68419,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2019-06-09 12:00:00 UTC,cool,auto,742,750,760,SD,Sioux Falls,75,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/SD/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/SD-day/2020-jun-day-SD.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9c7a789c7d1d74537a88db58945540af52b04c0e,2020-06-13 18:30:00 UTC,auto,auto,745,810,610,SD,Sioux Falls,30,True,False,True,Electric
1,011eb806f7782fc048072fabfb035b54f5d71e67,2020-06-20 15:10:00 UTC,cool,hold,723,731,687,SD,Sioux Falls,100,False,False,False,Gas
2,fbb6b3184e085ae4fe5c86bab30afe6a29793469,2020-06-07 17:15:00 UTC,cool,hold,728,686,686,SD,Mitchell,30,False,False,False,Gas
3,b64acb20839d0b9c1433741624014f6a4b7ce5bd,2020-06-07 13:35:00 UTC,cool,hold,732,725,725,SD,Sioux Falls,17,False,False,False,Gas
4,b64acb20839d0b9c1433741624014f6a4b7ce5bd,2020-06-15 14:40:00 UTC,cool,hold,729,725,725,SD,Sioux Falls,17,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
67266,b64acb20839d0b9c1433741624014f6a4b7ce5bd,2020-06-02 14:00:00 UTC,cool,hold,714,715,715,SD,Sioux Falls,17,False,False,False,Gas
67267,b64acb20839d0b9c1433741624014f6a4b7ce5bd,2020-06-02 12:35:00 UTC,cool,hold,718,715,715,SD,Sioux Falls,17,False,False,False,Gas
67268,b64acb20839d0b9c1433741624014f6a4b7ce5bd,2020-06-02 12:25:00 UTC,cool,hold,722,715,715,SD,Sioux Falls,17,False,False,False,Gas
67269,328bca1050aa07835f0bab2868d5eefde6e06ae2,2020-06-30 13:30:00 UTC,cool,hold,723,715,715,SD,Sioux Falls,80,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/SD/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/SD-day/2021-jun-day-SD.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c569b776f9ecb947ab7a457398e9949d6f68d7f3,2021-06-22 18:25:00 UTC,cool,hold,666,670,670,SD,Elkton,19,False,False,False,Gas
1,c569b776f9ecb947ab7a457398e9949d6f68d7f3,2021-06-21 18:40:00 UTC,cool,hold,665,670,670,SD,Elkton,19,False,False,False,Gas
2,c569b776f9ecb947ab7a457398e9949d6f68d7f3,2021-06-22 15:20:00 UTC,cool,hold,676,670,670,SD,Elkton,19,False,False,False,Gas
3,c569b776f9ecb947ab7a457398e9949d6f68d7f3,2021-06-21 16:25:00 UTC,cool,hold,664,670,670,SD,Elkton,19,False,False,False,Gas
4,c569b776f9ecb947ab7a457398e9949d6f68d7f3,2021-06-21 11:25:00 UTC,cool,hold,668,670,670,SD,Elkton,19,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
45457,ad7b6e90b80dcab9f18c995fe36408f2a12dbc28,2021-06-21 19:05:00 UTC,cool,hold,708,760,760,SD,Sioux Falls,10,False,False,False,Gas
45458,ad7b6e90b80dcab9f18c995fe36408f2a12dbc28,2021-06-21 14:15:00 UTC,cool,hold,718,760,760,SD,Sioux Falls,10,False,False,False,Gas
45459,ad7b6e90b80dcab9f18c995fe36408f2a12dbc28,2021-06-21 15:15:00 UTC,cool,hold,700,760,760,SD,Sioux Falls,10,False,False,False,Gas
45460,ad7b6e90b80dcab9f18c995fe36408f2a12dbc28,2021-06-21 18:10:00 UTC,cool,hold,701,760,760,SD,Sioux Falls,10,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/SD/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/SD/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
SD_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/SD/jun/" + file)
    SD_jun = pd.concat([SD_jun, df])
    
SD_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,011eb806f7782fc048072fabfb035b54f5d71e67,jun,2017,auto,hold,Sioux Falls,730.928571,740.000000,640.000000,100.0,False,False,False
1,045e533587d8a51c00e08e459243ce7ec3cbaaf9,jun,2017,auto,hold,Rapid City,702.142857,702.000000,652.000000,70.0,True,False,False
2,05b6a87c2c20aa57b357426c9107705c0bd7ec20,jun,2017,cool,auto,Sioux Falls,738.913043,740.000000,680.000000,15.0,True,False,False
3,05b6a87c2c20aa57b357426c9107705c0bd7ec20,jun,2017,cool,hold,Sioux Falls,735.309524,747.095238,740.809524,15.0,True,False,False
4,0968b16b919c5e8b72ff8de3bda124068f7f322d,jun,2017,auto,auto,Sioux Falls,748.558366,752.192607,680.000000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,f54879bd1a53b5447fa7998054e94ab1621745eb,jun,2021,cool,hold,Sioux Falls,715.089552,700.268657,700.268657,95.0,False,False,False
62,f55618457070bf0edd01780be7adab261f0423ce,jun,2021,cool,hold,Sioux Falls,734.083333,738.073333,692.890000,0.0,True,False,False
63,fb9e07d5242ec2ffc3c0820c3e4956038ac410af,jun,2021,auto,hold,Yankton,736.492754,705.405797,655.405797,10.0,False,False,True
64,fbb6b3184e085ae4fe5c86bab30afe6a29793469,jun,2021,auto,hold,Mitchell,717.666667,751.206349,701.206349,30.0,False,False,False


In [100]:
SD_jun.to_csv("Scraper_Output/State_Month_Day/SD/SD_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/SD-day/2017-jul-day-SD.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e5a50f71271de0f30b2f068f1e6e3368ddfec11b,2017-07-18T18:25:00Z,cool,hold,766,771,755,SD,Sioux Falls,5,False,False,False,Gas
1,e5a50f71271de0f30b2f068f1e6e3368ddfec11b,2017-07-04T15:50:00Z,cool,hold,768,770,770,SD,Sioux Falls,5,False,False,False,Gas
2,e5a50f71271de0f30b2f068f1e6e3368ddfec11b,2017-07-04T17:05:00Z,cool,hold,763,773,735,SD,Sioux Falls,5,False,False,False,Gas
3,e5a50f71271de0f30b2f068f1e6e3368ddfec11b,2017-07-04T16:50:00Z,cool,hold,770,770,770,SD,Sioux Falls,5,False,False,False,Gas
4,e5a50f71271de0f30b2f068f1e6e3368ddfec11b,2017-07-12T19:35:00Z,cool,hold,772,770,770,SD,Sioux Falls,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20025,c286bb38a8bbe2053013f8899f27ce0e8fe7e2fc,2017-07-02T16:45:00Z,cool,auto,713,720,700,SD,Watertown,16,False,False,False,Gas
20026,c286bb38a8bbe2053013f8899f27ce0e8fe7e2fc,2017-07-03T19:35:00Z,cool,auto,708,720,700,SD,Watertown,16,False,False,False,Gas
20027,c286bb38a8bbe2053013f8899f27ce0e8fe7e2fc,2017-07-02T12:00:00Z,cool,auto,701,720,700,SD,Watertown,16,False,False,False,Gas
20028,c286bb38a8bbe2053013f8899f27ce0e8fe7e2fc,2017-07-02T14:05:00Z,cool,auto,704,720,700,SD,Watertown,16,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/SD/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/SD-day/2018-jul-day-SD.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ba6e220a85539b203580b1486b6e183220f6488c,2018-07-11 12:05:00 UTC,auto,hold,707,710,660,SD,Sioux Falls,0,False,False,False,Gas
1,995ba2f4d2b74e5e9c936ec683b65b980cb10a3b,2018-07-02 12:25:00 UTC,auto,hold,748,750,660,SD,Dakota Dunes,27,False,False,False,Gas
2,995ba2f4d2b74e5e9c936ec683b65b980cb10a3b,2018-07-07 11:50:00 UTC,auto,hold,751,750,660,SD,Dakota Dunes,27,False,False,False,Gas
3,ba6e220a85539b203580b1486b6e183220f6488c,2018-07-09 12:45:00 UTC,auto,hold,716,710,660,SD,Sioux Falls,0,False,False,False,Gas
4,995ba2f4d2b74e5e9c936ec683b65b980cb10a3b,2018-07-04 12:00:00 UTC,auto,hold,751,750,660,SD,Dakota Dunes,27,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52313,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,2018-07-23 11:35:00 UTC,cool,hold,712,710,710,SD,Sioux Falls,30,False,False,False,Gas
52314,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,2018-07-23 12:10:00 UTC,cool,hold,728,710,710,SD,Sioux Falls,30,False,False,False,Gas
52315,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,2018-07-23 12:15:00 UTC,cool,hold,728,710,710,SD,Sioux Falls,30,False,False,False,Gas
52316,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,2018-07-23 11:40:00 UTC,cool,hold,716,710,710,SD,Sioux Falls,30,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/SD/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/SD-day/2019-jul-day-SD.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0cdb29450b96ad16bdcd65b24b70e0f11aae8af1,2019-07-31 12:20:00 UTC,auto,auto,697,713,663,SD,Sioux Falls,20,False,False,True,Electric
2,b64acb20839d0b9c1433741624014f6a4b7ce5bd,2019-07-02 18:50:00 UTC,cool,hold,753,755,755,SD,Sioux Falls,17,False,False,False,Gas
5,d8a5080a26ef033201748f6bb21a14041b3e5089,2019-07-13 18:05:00 UTC,cool,hold,762,757,757,SD,Sioux Falls,15,False,False,False,Gas
6,d8a5080a26ef033201748f6bb21a14041b3e5089,2019-07-13 15:10:00 UTC,cool,hold,764,757,757,SD,Sioux Falls,15,False,False,False,Gas
8,0cdb29450b96ad16bdcd65b24b70e0f11aae8af1,2019-07-09 11:55:00 UTC,auto,hold,696,713,663,SD,Sioux Falls,20,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74814,c569b776f9ecb947ab7a457398e9949d6f68d7f3,2019-07-20 18:15:00 UTC,cool,auto,708,710,710,SD,Elkton,19,False,False,False,Gas
74815,c569b776f9ecb947ab7a457398e9949d6f68d7f3,2019-07-20 18:20:00 UTC,cool,auto,708,710,710,SD,Elkton,19,False,False,False,Gas
74816,c569b776f9ecb947ab7a457398e9949d6f68d7f3,2019-07-20 19:15:00 UTC,cool,auto,711,710,710,SD,Elkton,19,False,False,False,Gas
74817,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,2019-07-08 13:35:00 UTC,cool,hold,739,744,710,SD,Sioux Falls,30,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/SD/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/SD-day/2020-jul-day-SD.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,880406a81fa4bf281a21e6e7788c6d32b611ec04,2020-07-10 18:55:00 UTC,cool,auto,719,721,702,SD,Sioux Falls,0,False,False,False,Gas
1,011eb806f7782fc048072fabfb035b54f5d71e67,2020-07-17 12:40:00 UTC,cool,auto,739,770,770,SD,Sioux Falls,100,False,False,False,Gas
2,011eb806f7782fc048072fabfb035b54f5d71e67,2020-07-26 19:35:00 UTC,cool,auto,748,780,780,SD,Sioux Falls,100,False,False,False,Gas
3,3ed0e9cb7b0043eaf232cd07b55d33cb67e0debe,2020-07-15 09:05:00 UTC,cool,hold,711,700,697,SD,Mitchell,15,True,False,True,Electric
4,011eb806f7782fc048072fabfb035b54f5d71e67,2020-07-17 13:30:00 UTC,cool,auto,742,770,770,SD,Sioux Falls,100,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71118,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2020-07-22 12:15:00 UTC,cool,hold,734,760,760,SD,Sioux Falls,75,False,False,False,Gas
71119,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2020-07-22 13:10:00 UTC,cool,hold,736,760,760,SD,Sioux Falls,75,False,False,False,Gas
71120,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2020-07-22 12:10:00 UTC,cool,hold,733,760,760,SD,Sioux Falls,75,False,False,False,Gas
71121,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2020-07-20 13:15:00 UTC,cool,hold,733,760,760,SD,Sioux Falls,75,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/SD/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/SD-day/2021-jul-day-SD.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2021-07-04 13:55:00 UTC,cool,hold,723,719,709,SD,Brookings,5,True,False,False,Gas
1,fbb6b3184e085ae4fe5c86bab30afe6a29793469,2021-07-11 19:50:00 UTC,auto,hold,707,695,645,SD,Mitchell,30,False,False,False,Gas
2,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2021-07-21 12:50:00 UTC,cool,hold,709,687,687,SD,Brookings,5,True,False,False,Gas
3,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2021-07-04 13:15:00 UTC,cool,hold,718,717,717,SD,Brookings,5,True,False,False,Gas
4,8f424d66255e8c5985527f298fd93efdf9fee43d,2021-07-14 12:55:00 UTC,auto,hold,666,700,610,SD,Sioux Falls,39,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46604,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2021-07-28 11:45:00 UTC,cool,hold,762,760,760,SD,Sioux Falls,75,False,False,False,Gas
46605,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2021-07-27 12:25:00 UTC,cool,hold,759,760,760,SD,Sioux Falls,75,False,False,False,Gas
46606,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2021-07-17 12:55:00 UTC,cool,hold,760,760,760,SD,Sioux Falls,75,False,False,False,Gas
46607,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2021-07-28 12:05:00 UTC,cool,hold,763,762,760,SD,Sioux Falls,75,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/SD/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/SD/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
SD_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/SD/jul/" + file)
    SD_jul = pd.concat([SD_jul, df])
    
SD_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,011eb806f7782fc048072fabfb035b54f5d71e67,jul,2017,auto,hold,Sioux Falls,747.710526,720.263158,640.000000,100.0,False,False,False
1,045e533587d8a51c00e08e459243ce7ec3cbaaf9,jul,2017,auto,auto,Rapid City,712.500000,711.000000,661.000000,70.0,True,False,False
2,05b6a87c2c20aa57b357426c9107705c0bd7ec20,jul,2017,cool,hold,Sioux Falls,725.187500,720.000000,720.000000,15.0,True,False,False
3,0968b16b919c5e8b72ff8de3bda124068f7f322d,jul,2017,auto,hold,Sioux Falls,751.188612,753.416370,680.000000,0.0,False,False,False
4,17c1dbdbd6e3202b0db49420e6fc9d36e4cac454,jul,2017,auto,auto,Sioux Falls,744.600000,735.000000,685.000000,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
51,f54879bd1a53b5447fa7998054e94ab1621745eb,jul,2021,cool,hold,Sioux Falls,716.863636,700.045455,700.045455,95.0,False,False,False
52,f55618457070bf0edd01780be7adab261f0423ce,jul,2021,cool,hold,Sioux Falls,748.000000,760.000000,760.000000,0.0,True,False,False
53,fb9e07d5242ec2ffc3c0820c3e4956038ac410af,jul,2021,auto,hold,Yankton,723.800000,700.000000,650.000000,10.0,False,False,True
54,fbb6b3184e085ae4fe5c86bab30afe6a29793469,jul,2021,auto,hold,Mitchell,711.015873,702.968254,652.968254,30.0,False,False,False


In [133]:
SD_jul.to_csv("Scraper_Output/State_Month_Day/SD/SD_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/SD-day/2017-aug-day-SD.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9c7a789c7d1d74537a88db58945540af52b04c0e,2017-08-17T15:50:00Z,auto,hold,717,750,660,SD,Sioux Falls,30,True,False,True,Electric
1,9c7a789c7d1d74537a88db58945540af52b04c0e,2017-08-17T17:10:00Z,auto,hold,723,750,660,SD,Sioux Falls,30,True,False,True,Electric
2,9c7a789c7d1d74537a88db58945540af52b04c0e,2017-08-17T17:30:00Z,auto,hold,730,750,660,SD,Sioux Falls,30,True,False,True,Electric
3,9c7a789c7d1d74537a88db58945540af52b04c0e,2017-08-17T15:25:00Z,auto,hold,716,750,660,SD,Sioux Falls,30,True,False,True,Electric
4,9c7a789c7d1d74537a88db58945540af52b04c0e,2017-08-17T14:35:00Z,auto,hold,716,750,660,SD,Sioux Falls,30,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19853,45a85784a761b7df6242c82ee3b01c01c9d4b647,2017-08-20T16:55:00Z,cool,hold,687,690,690,SD,Hayti,8,True,False,True,Electric
19854,45a85784a761b7df6242c82ee3b01c01c9d4b647,2017-08-12T16:05:00Z,cool,hold,694,690,690,SD,Hayti,8,True,False,True,Electric
19855,45a85784a761b7df6242c82ee3b01c01c9d4b647,2017-08-20T16:10:00Z,cool,hold,688,690,690,SD,Hayti,8,True,False,True,Electric
19856,45a85784a761b7df6242c82ee3b01c01c9d4b647,2017-08-19T18:10:00Z,cool,hold,695,690,690,SD,Hayti,8,True,False,True,Electric


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/SD/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/SD-day/2018-aug-day-SD.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,995ba2f4d2b74e5e9c936ec683b65b980cb10a3b,2018-08-12 12:20:00 UTC,cool,hold,754,770,770,SD,Dakota Dunes,27,False,False,False,Gas
1,995ba2f4d2b74e5e9c936ec683b65b980cb10a3b,2018-08-11 15:30:00 UTC,cool,hold,770,770,770,SD,Dakota Dunes,27,False,False,False,Gas
2,995ba2f4d2b74e5e9c936ec683b65b980cb10a3b,2018-08-11 14:45:00 UTC,cool,hold,772,770,770,SD,Dakota Dunes,27,False,False,False,Gas
3,a351d877220e28248c343ed9b9c1869bc2ec6f2a,2018-08-14 19:45:00 UTC,cool,hold,787,780,770,SD,Sioux Falls,25,False,False,False,Gas
4,c3353456a861b50590e2848ac724e0fdaae797b6,2018-08-13 12:30:00 UTC,cool,hold,769,770,770,SD,Dakota Dunes,27,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51158,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,2018-08-26 12:55:00 UTC,cool,hold,715,710,710,SD,Sioux Falls,30,False,False,False,Gas
51159,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,2018-08-26 12:40:00 UTC,cool,hold,710,710,710,SD,Sioux Falls,30,False,False,False,Gas
51160,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,2018-08-26 12:20:00 UTC,cool,hold,713,710,710,SD,Sioux Falls,30,False,False,False,Gas
51161,f0a97f2cedfca1f192ad1210ee94c47bbac3d003,2018-08-26 11:30:00 UTC,cool,hold,713,710,710,SD,Sioux Falls,30,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/SD/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/SD-day/2019-aug-day-SD.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0cdb29450b96ad16bdcd65b24b70e0f11aae8af1,2019-08-23 12:50:00 UTC,auto,auto,694,713,663,SD,Sioux Falls,20,False,False,True,Electric
1,9890e8311a56a9abe9923f5fc2c000c196edf928,2019-08-13 11:55:00 UTC,auto,hold,711,712,662,SD,Sioux Falls,25,False,False,False,Gas
2,3ed0e9cb7b0043eaf232cd07b55d33cb67e0debe,2019-08-07 14:10:00 UTC,cool,hold,694,693,673,SD,Mitchell,15,True,False,True,Electric
3,d8a5080a26ef033201748f6bb21a14041b3e5089,2019-08-11 19:00:00 UTC,cool,hold,750,747,747,SD,Sioux Falls,15,False,False,False,Gas
4,0cdb29450b96ad16bdcd65b24b70e0f11aae8af1,2019-08-04 11:35:00 UTC,auto,auto,694,713,663,SD,Sioux Falls,20,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68663,8056f7851f2860fec55c8ffaf094efff7a46ada7,2019-08-04 14:50:00 UTC,cool,hold,751,760,760,SD,Sioux Falls,0,False,False,False,Gas
68664,8056f7851f2860fec55c8ffaf094efff7a46ada7,2019-08-04 12:20:00 UTC,cool,hold,745,760,760,SD,Sioux Falls,0,False,False,False,Gas
68665,ba824fb3fced9de90a39628266d6ba8bdc24270c,2019-08-24 13:25:00 UTC,cool,hold,749,762,760,SD,Rapid City,5,False,False,False,Gas
68666,ba824fb3fced9de90a39628266d6ba8bdc24270c,2019-08-01 10:20:00 UTC,cool,hold,756,760,760,SD,Rapid City,5,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/SD/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/SD-day/2020-aug-day-SD.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2020-08-15 12:55:00 UTC,cool,hold,711,703,703,SD,Brookings,5,True,False,False,Gas
1,cc7492974b078d6eceba17e49929a139635baebb,2020-08-08 14:15:00 UTC,cool,hold,726,725,725,SD,Yankton,45,False,False,False,Gas
2,3cb5cd50805e286cf3cd45001f0c9e54f6fed6c3,2020-08-13 11:10:00 UTC,cool,auto,725,737,712,SD,Sioux Falls,20,False,False,False,Gas
3,cc7492974b078d6eceba17e49929a139635baebb,2020-08-09 17:05:00 UTC,cool,hold,729,725,725,SD,Yankton,45,False,False,False,Gas
4,cc7492974b078d6eceba17e49929a139635baebb,2020-08-08 15:00:00 UTC,cool,hold,730,725,725,SD,Yankton,45,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73072,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2020-08-20 11:25:00 UTC,cool,auto,741,740,760,SD,Sioux Falls,75,False,False,False,Gas
73073,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2020-08-20 11:30:00 UTC,cool,auto,741,740,760,SD,Sioux Falls,75,False,False,False,Gas
73074,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2020-08-20 12:00:00 UTC,cool,auto,743,740,760,SD,Sioux Falls,75,False,False,False,Gas
73075,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2020-08-20 11:50:00 UTC,cool,auto,740,740,760,SD,Sioux Falls,75,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/SD/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/SD/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
SD_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/SD/aug/" + file)
    SD_aug = pd.concat([SD_aug, df])
    
SD_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004e7f286453271cd8b2b9367719102dc96392c2,aug,2017,auto,auto,Elk Point,748.661654,744.503759,678.390977,20.0,False,False,False
1,004e7f286453271cd8b2b9367719102dc96392c2,aug,2017,auto,hold,Elk Point,738.937500,741.000000,679.750000,20.0,False,False,False
2,011eb806f7782fc048072fabfb035b54f5d71e67,aug,2017,auto,hold,Sioux Falls,727.986486,721.351351,640.000000,100.0,False,False,False
3,045e533587d8a51c00e08e459243ce7ec3cbaaf9,aug,2017,auto,hold,Rapid City,700.777778,701.888889,651.888889,70.0,True,False,False
4,05b6a87c2c20aa57b357426c9107705c0bd7ec20,aug,2017,cool,auto,Sioux Falls,702.000000,740.000000,680.000000,15.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,f55618457070bf0edd01780be7adab261f0423ce,aug,2020,cool,hold,Sioux Falls,740.122449,724.448980,713.326531,0.0,True,False,False
107,fb9e07d5242ec2ffc3c0820c3e4956038ac410af,aug,2020,auto,hold,Yankton,721.000000,720.000000,670.000000,10.0,False,False,True
108,fbb6b3184e085ae4fe5c86bab30afe6a29793469,aug,2020,cool,hold,Mitchell,710.725275,718.010989,718.010989,30.0,False,False,False
109,fbba43bcb505094b711bbda734c4088425953b97,aug,2020,auto,auto,Sioux Falls,720.408602,719.935484,650.000000,20.0,False,False,False


In [160]:
SD_aug.to_csv("Scraper_Output/State_Month_Day/SD/SD_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/SD-day/2017-dec-day-SD.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a97d99e22e6def34473c297c299e223a3519c19b,2017-12-30T15:55:00Z,heat,hold,671,758,614,SD,Rapid City,85,False,False,False,Gas
1,32f1e16c8515275838f75cf8af034ea81b74d96a,2017-12-18T13:35:00Z,heat,auto,650,720,673,SD,Tea,40,False,False,True,Electric
2,e5a50f71271de0f30b2f068f1e6e3368ddfec11b,2017-12-15T13:05:00Z,heat,hold,675,755,672,SD,Sioux Falls,5,False,False,False,Gas
3,d37e2aadd7774c2112b3d30f1c00556868867274,2017-12-16T15:40:00Z,heat,hold,710,741,703,SD,Vermillion,45,False,False,False,Gas
4,66fafd5b7131163da637a7660ba442db0a070d92,2017-12-21T12:40:00Z,heat,auto,701,701,699,SD,Sioux Falls,6,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27636,a6340c03cdc29b13a96131be745189257be7a35c,2017-12-17T17:15:00Z,heat,hold,698,750,750,SD,Arlington,15,False,False,False,Gas
27637,a6340c03cdc29b13a96131be745189257be7a35c,2017-12-17T17:35:00Z,heat,hold,703,750,750,SD,Arlington,15,False,False,False,Gas
27638,045e533587d8a51c00e08e459243ce7ec3cbaaf9,2017-12-03T19:40:00Z,heat,hold,720,760,760,SD,Rapid City,70,True,False,False,Gas
27639,045e533587d8a51c00e08e459243ce7ec3cbaaf9,2017-12-03T19:50:00Z,heat,hold,729,760,760,SD,Rapid City,70,True,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/SD/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/SD-day/2018-dec-day-SD.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,eff352eb35883cf29e1e7c2da025cf60f45f03b8,2018-12-16 16:20:00 UTC,heat,hold,693,707,698,SD,Rapid City,17,False,False,False,Gas
1,aa8c27c5be7a5261298e19887aa06a314b0c1308,2018-12-01 18:50:00 UTC,auto,hold,701,752,702,SD,Aberdeen,0,False,False,False,Gas
2,aa8c27c5be7a5261298e19887aa06a314b0c1308,2018-12-10 17:25:00 UTC,auto,hold,693,742,692,SD,Aberdeen,0,False,False,False,Gas
3,aa8c27c5be7a5261298e19887aa06a314b0c1308,2018-12-01 18:15:00 UTC,auto,hold,676,752,702,SD,Aberdeen,0,False,False,False,Gas
4,aa8c27c5be7a5261298e19887aa06a314b0c1308,2018-12-20 14:30:00 UTC,auto,hold,675,662,602,SD,Aberdeen,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
58651,431feca8dbf15f07c25ba7065d5935bf21d4cfd6,2018-12-02 15:55:00 UTC,heat,hold,749,750,750,SD,Sioux Falls,5,True,False,False,Gas
58652,431feca8dbf15f07c25ba7065d5935bf21d4cfd6,2018-12-02 16:15:00 UTC,heat,hold,747,750,750,SD,Sioux Falls,5,True,False,False,Gas
58653,431feca8dbf15f07c25ba7065d5935bf21d4cfd6,2018-12-02 15:15:00 UTC,heat,hold,747,750,750,SD,Sioux Falls,5,True,False,False,Gas
58654,431feca8dbf15f07c25ba7065d5935bf21d4cfd6,2018-12-02 15:25:00 UTC,heat,hold,745,750,750,SD,Sioux Falls,5,True,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/SD/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/SD-day/2019-dec-day-SD.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2019-12-19 13:55:00 UTC,heat,hold,692,697,687,SD,Brookings,5,True,False,False,Gas
1,49e91e8df5ec4323dfe01dcc99a1809c297e5f50,2019-12-14 16:20:00 UTC,heat,hold,691,722,722,SD,Sioux Falls,5,True,False,False,Gas
2,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2019-12-24 13:35:00 UTC,heat,hold,688,668,668,SD,Brookings,5,True,False,False,Gas
3,49e91e8df5ec4323dfe01dcc99a1809c297e5f50,2019-12-14 17:55:00 UTC,heat,hold,695,722,722,SD,Sioux Falls,5,True,False,False,Gas
4,1e4743e67aa7bdf1aac8a257b298b7d514329fcb,2019-12-27 13:55:00 UTC,heat,hold,692,699,696,SD,Brookings,5,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64427,51e6bafa5298e3366ba871e35aed1d974abe184e,2019-12-28 16:50:00 UTC,heat,hold,760,760,760,SD,Sioux Falls,19,True,False,False,Gas
64428,51e6bafa5298e3366ba871e35aed1d974abe184e,2019-12-09 15:30:00 UTC,heat,hold,754,760,760,SD,Sioux Falls,19,True,False,False,Gas
64429,51e6bafa5298e3366ba871e35aed1d974abe184e,2019-12-09 18:05:00 UTC,heat,hold,762,760,760,SD,Sioux Falls,19,True,False,False,Gas
64430,51e6bafa5298e3366ba871e35aed1d974abe184e,2019-12-09 16:00:00 UTC,heat,hold,758,760,760,SD,Sioux Falls,19,True,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/SD/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/SD-day/2020-dec-day-SD.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,161b7899b0bd5766e93d559d878a514694dec8dc,2020-12-03 14:30:00 UTC,heat,hold,650,655,655,SD,Sioux Falls,59,False,False,False,Gas
1,161b7899b0bd5766e93d559d878a514694dec8dc,2020-12-03 13:55:00 UTC,heat,hold,655,655,655,SD,Sioux Falls,59,False,False,False,Gas
2,161b7899b0bd5766e93d559d878a514694dec8dc,2020-12-03 17:05:00 UTC,heat,hold,649,655,655,SD,Sioux Falls,59,False,False,False,Gas
3,161b7899b0bd5766e93d559d878a514694dec8dc,2020-12-03 16:05:00 UTC,heat,hold,653,655,655,SD,Sioux Falls,59,False,False,False,Gas
4,161b7899b0bd5766e93d559d878a514694dec8dc,2020-12-03 17:15:00 UTC,heat,hold,655,655,655,SD,Sioux Falls,59,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57797,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2020-12-07 12:20:00 UTC,heat,auto,690,670,690,SD,Sioux Falls,75,False,False,False,Gas
57798,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2020-12-07 12:15:00 UTC,heat,auto,686,670,690,SD,Sioux Falls,75,False,False,False,Gas
57799,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2020-12-07 13:25:00 UTC,heat,auto,685,670,690,SD,Sioux Falls,75,False,False,False,Gas
57800,442bbae1b4d42fdbfed83178e3a7f95b0e305c58,2020-12-07 12:25:00 UTC,heat,auto,689,670,690,SD,Sioux Falls,75,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/SD/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/SD/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
SD_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/SD/dec/" + file)
    SD_dec = pd.concat([SD_dec, df])
    
SD_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004e7f286453271cd8b2b9367719102dc96392c2,dec,2017,auto,hold,Elk Point,697.627358,752.966981,698.297170,20.0,False,False,False
1,011eb806f7782fc048072fabfb035b54f5d71e67,dec,2017,auto,hold,Sioux Falls,656.218182,740.745455,660.345455,100.0,False,False,False
2,045e533587d8a51c00e08e459243ce7ec3cbaaf9,dec,2017,heat,hold,Rapid City,717.714286,760.000000,760.000000,70.0,True,False,False
3,05b6a87c2c20aa57b357426c9107705c0bd7ec20,dec,2017,heat,auto,Sioux Falls,685.129630,729.506173,677.160494,15.0,True,False,False
4,05b6a87c2c20aa57b357426c9107705c0bd7ec20,dec,2017,heat,hold,Sioux Falls,693.867403,701.505525,698.411602,15.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
84,f54879bd1a53b5447fa7998054e94ab1621745eb,dec,2020,heat,hold,Sioux Falls,719.863636,710.000000,710.000000,95.0,False,False,False
85,f55618457070bf0edd01780be7adab261f0423ce,dec,2020,heat,auto,Sioux Falls,697.687500,700.000000,700.000000,0.0,True,False,False
86,f55618457070bf0edd01780be7adab261f0423ce,dec,2020,heat,hold,Sioux Falls,688.642857,740.000000,690.000000,0.0,True,False,False
87,fbb6b3184e085ae4fe5c86bab30afe6a29793469,dec,2020,auto,hold,Mitchell,678.758621,734.827586,684.827586,30.0,False,False,False


In [187]:
SD_dec.to_csv("Scraper_Output/State_Month_Day/SD/SD_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/SD/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
SD_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/SD/" + file)
    SD_all = pd.concat([SD_all, df])
    
SD_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004e7f286453271cd8b2b9367719102dc96392c2,aug,2017,auto,auto,Elk Point,748.661654,744.503759,678.390977,20.0,False,False,False
1,004e7f286453271cd8b2b9367719102dc96392c2,aug,2017,auto,hold,Elk Point,738.937500,741.000000,679.750000,20.0,False,False,False
2,011eb806f7782fc048072fabfb035b54f5d71e67,aug,2017,auto,hold,Sioux Falls,727.986486,721.351351,640.000000,100.0,False,False,False
3,045e533587d8a51c00e08e459243ce7ec3cbaaf9,aug,2017,auto,hold,Rapid City,700.777778,701.888889,651.888889,70.0,True,False,False
4,05b6a87c2c20aa57b357426c9107705c0bd7ec20,aug,2017,cool,auto,Sioux Falls,702.000000,740.000000,680.000000,15.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
466,f54879bd1a53b5447fa7998054e94ab1621745eb,jun,2021,cool,hold,Sioux Falls,715.089552,700.268657,700.268657,95.0,False,False,False
467,f55618457070bf0edd01780be7adab261f0423ce,jun,2021,cool,hold,Sioux Falls,734.083333,738.073333,692.890000,0.0,True,False,False
468,fb9e07d5242ec2ffc3c0820c3e4956038ac410af,jun,2021,auto,hold,Yankton,736.492754,705.405797,655.405797,10.0,False,False,True
469,fbb6b3184e085ae4fe5c86bab30afe6a29793469,jun,2021,auto,hold,Mitchell,717.666667,751.206349,701.206349,30.0,False,False,False


In [190]:
SD_all.to_csv("Scraper_Output/State_Month_Day/SD_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mSDe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['SD']
Unique jan_2018: ['SD']
Unique jan_2019: ['SD']
Unique jan_2020: ['SD']
Unique jan_2021: ['SD']
Unique feb_2017: ['SD']
Unique feb_2018: ['SD']
Unique feb_2019: ['SD']
Unique feb_2020: ['SD']
Unique feb_2021: ['SD']
Unique jun_2017: ['SD']
Unique jun_2018: ['SD']
Unique jun_2019: ['SD']
Unique jun_2020: ['SD']
Unique jun_2021: ['SD']
Unique jul_2017: ['SD']
Unique jul_2018: ['SD']
Unique jul_2019: ['SD']
Unique jul_2020: ['SD']
Unique jul_2021: ['SD']
Unique aug_2017: ['SD']
Unique aug_2018: ['SD']
Unique aug_2019: ['SD']
Unique aug_2020: ['SD']
Unique dec_2017: ['SD']
Unique dec_2018: ['SD']
Unique dec_2019: ['SD']
Unique dec_2020: ['SD']
