# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/KS-day/2017-jan-day-KS.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2017-01-12 12:40:00 UTC,heat,hold,675,680,680,KS,Eastborough,79,True,False,False,Gas
1,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2017-01-13 14:00:00 UTC,heat,hold,678,680,680,KS,Eastborough,79,True,False,False,Gas
2,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2017-01-01 17:55:00 UTC,heat,hold,673,670,670,KS,Eastborough,79,True,False,False,Gas
3,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2017-01-14 18:35:00 UTC,heat,hold,687,690,690,KS,Eastborough,79,True,False,False,Gas
4,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2017-01-13 16:00:00 UTC,heat,hold,681,680,680,KS,Eastborough,79,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95247,a1b0dc41acf5beef99934faecb9e1460d7b78173,2017-01-14 19:30:00 UTC,auto,auto,671,740,690,KS,Lawrence,120,False,False,False,Gas
95248,a1b0dc41acf5beef99934faecb9e1460d7b78173,2017-01-22 15:25:00 UTC,auto,auto,638,720,640,KS,Lawrence,120,False,False,False,Gas
95249,a1b0dc41acf5beef99934faecb9e1460d7b78173,2017-01-21 16:25:00 UTC,auto,auto,654,740,640,KS,Lawrence,120,False,False,False,Gas
95250,a1b0dc41acf5beef99934faecb9e1460d7b78173,2017-01-21 16:45:00 UTC,auto,auto,653,740,640,KS,Lawrence,120,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
00d2d0e5e17f6dc9a19aa132916fdc6ddacb983e,Jan,2017,auto,auto,Fairway,669.450980,820.000000,640.000000,80.0,False,False,False
01a0d7a563a1755d37bb856a01a56e452deb2010,Jan,2017,auto,hold,Olathe,688.583048,799.958904,690.002568,30.0,False,False,False
01a0d7a563a1755d37bb856a01a56e452deb2010,Jan,2017,cool,hold,Olathe,652.261905,713.261905,713.142857,30.0,False,False,False
01a0d7a563a1755d37bb856a01a56e452deb2010,Jan,2017,heat,hold,Olathe,697.493721,698.607535,698.607535,30.0,False,False,False
0448b121ebca96144ab5e5feb5ab88942e98de42,Jan,2017,auxHeatOnly,auto,Olathe,691.707317,780.000000,690.000000,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fdadb8143c9f8d15c6bfbffab7628b015c5db3cb,Jan,2017,heat,auto,Olathe,627.546154,653.153846,636.669231,35.0,False,False,False
fdadb8143c9f8d15c6bfbffab7628b015c5db3cb,Jan,2017,heat,hold,Olathe,679.081633,680.489796,680.489796,35.0,False,False,False
fe779904e8eede7395997bd2762cd654983d363a,Jan,2017,heat,auto,Andover,671.520000,650.000000,620.000000,0.0,False,False,False
fec0465e4c757d26b069a20df5b2451426614367,Jan,2017,heat,auto,Overland Park,682.363636,765.454545,683.636364,25.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/KS/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/KS-day/2018-jan-day-KS.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2018-01-29 19:05:00 UTC,heat,hold,672,670,670,KS,Paola,117,False,False,False,Gas
1,7f9688a0b01344c1f2804913546cad73007d0cf4,2018-01-26 15:35:00 UTC,heat,hold,734,740,740,KS,Oswego,106,False,False,False,Gas
2,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2018-01-08 17:45:00 UTC,heat,hold,675,650,650,KS,Paola,117,False,False,False,Gas
3,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2018-01-17 18:05:00 UTC,heat,hold,678,680,680,KS,Paola,117,False,False,False,Gas
4,af58a413d1a41205bebf1c507761a5ab74a84725,2018-01-14 19:05:00 UTC,heat,hold,694,700,700,KS,Ellis,100,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
279087,842c02deec3009c2808be94f10e56264890a12f4,2018-01-30 14:30:00 UTC,heat,hold,671,680,680,KS,Scranton,120,False,False,False,Gas
279088,842c02deec3009c2808be94f10e56264890a12f4,2018-01-29 16:25:00 UTC,heat,hold,687,680,680,KS,Scranton,120,False,False,False,Gas
279089,842c02deec3009c2808be94f10e56264890a12f4,2018-01-10 17:20:00 UTC,heat,hold,693,690,690,KS,Scranton,120,False,False,False,Gas
279090,842c02deec3009c2808be94f10e56264890a12f4,2018-01-24 11:40:00 UTC,heat,hold,675,680,680,KS,Scranton,120,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/KS/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/KS-day/2019-jan-day-KS.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,62a890bb783d07af9497a160b6a7f4ae41579c67,2019-01-05 13:50:00 UTC,heat,hold,640,650,640,KS,Murdock,118,False,False,False,Gas
1,62a890bb783d07af9497a160b6a7f4ae41579c67,2019-01-05 13:10:00 UTC,heat,hold,639,650,640,KS,Murdock,118,False,False,False,Gas
2,62a890bb783d07af9497a160b6a7f4ae41579c67,2019-01-15 13:25:00 UTC,heat,hold,659,670,670,KS,Murdock,118,False,False,False,Gas
3,7f9688a0b01344c1f2804913546cad73007d0cf4,2019-01-16 17:45:00 UTC,heat,auto,746,760,760,KS,Oswego,106,False,False,False,Gas
4,62a890bb783d07af9497a160b6a7f4ae41579c67,2019-01-31 19:00:00 UTC,heat,hold,642,650,640,KS,Murdock,118,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
405084,842c02deec3009c2808be94f10e56264890a12f4,2019-01-03 09:30:00 UTC,heat,auto,669,670,670,KS,Scranton,120,False,False,False,Gas
405085,842c02deec3009c2808be94f10e56264890a12f4,2019-01-28 13:40:00 UTC,heat,hold,687,690,690,KS,Scranton,120,False,False,False,Gas
405086,842c02deec3009c2808be94f10e56264890a12f4,2019-01-23 10:35:00 UTC,heat,hold,667,670,670,KS,Scranton,120,False,False,False,Gas
405087,842c02deec3009c2808be94f10e56264890a12f4,2019-01-22 13:20:00 UTC,heat,hold,697,700,700,KS,Scranton,120,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/KS/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/KS-day/2020-jan-day-KS.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2020-01-05 18:10:00 UTC,heat,hold,682,700,700,KS,Tooeka,67,False,False,False,Gas
1,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2020-01-12 14:15:00 UTC,heat,hold,676,680,680,KS,Tooeka,67,False,False,False,Gas
2,e73d2071803a834c945f4eedd70daf1ac7d08fea,2020-01-13 15:55:00 UTC,heat,auto,713,720,720,KS,Paola,99,False,False,False,Gas
3,af58a413d1a41205bebf1c507761a5ab74a84725,2020-01-11 14:05:00 UTC,auto,hold,699,750,700,KS,Ellis,100,False,False,False,Gas
4,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2020-01-11 12:20:00 UTC,heat,hold,675,700,700,KS,Tooeka,67,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
439140,842c02deec3009c2808be94f10e56264890a12f4,2020-01-02 09:10:00 UTC,heat,hold,649,650,640,KS,Scranton,120,False,False,False,Gas
439141,842c02deec3009c2808be94f10e56264890a12f4,2020-01-15 15:20:00 UTC,heat,hold,695,700,700,KS,Scranton,120,False,False,False,Gas
439142,842c02deec3009c2808be94f10e56264890a12f4,2020-01-07 08:40:00 UTC,heat,hold,655,650,650,KS,Scranton,120,False,False,False,Gas
439143,842c02deec3009c2808be94f10e56264890a12f4,2020-01-14 07:40:00 UTC,heat,auto,697,650,650,KS,Scranton,120,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/KS/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/KS-day/2021-jan-day-KS.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2021-01-15 18:20:00 UTC,heat,hold,702,700,700,KS,Tooeka,67,False,False,False,Gas
1,e73d2071803a834c945f4eedd70daf1ac7d08fea,2021-01-09 17:15:00 UTC,heat,hold,706,710,710,KS,Paola,99,False,False,False,Gas
2,62a890bb783d07af9497a160b6a7f4ae41579c67,2021-01-24 15:10:00 UTC,auto,hold,630,740,640,KS,Murdock,118,False,False,False,Gas
3,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2021-01-30 17:00:00 UTC,heat,hold,696,700,700,KS,Tooeka,67,False,False,False,Gas
4,62a890bb783d07af9497a160b6a7f4ae41579c67,2021-01-23 13:25:00 UTC,auto,hold,639,740,650,KS,Murdock,118,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
288751,842c02deec3009c2808be94f10e56264890a12f4,2021-01-28 15:05:00 UTC,heat,hold,685,690,690,KS,Scranton,120,False,False,False,Gas
288752,842c02deec3009c2808be94f10e56264890a12f4,2021-01-11 13:20:00 UTC,heat,hold,691,690,690,KS,Scranton,120,False,False,False,Gas
288753,842c02deec3009c2808be94f10e56264890a12f4,2021-01-04 19:35:00 UTC,heat,hold,671,670,670,KS,Scranton,120,False,False,False,Gas
288754,842c02deec3009c2808be94f10e56264890a12f4,2021-01-18 14:40:00 UTC,heat,hold,684,690,690,KS,Scranton,120,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/KS/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/KS/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
KS_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/KS/jan/" + file)
    KS_jan = pd.concat([KS_jan, df])
    
KS_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00d2d0e5e17f6dc9a19aa132916fdc6ddacb983e,Jan,2017,auto,auto,Fairway,669.450980,820.000000,640.000000,80.0,False,False,False
1,01a0d7a563a1755d37bb856a01a56e452deb2010,Jan,2017,auto,hold,Olathe,688.583048,799.958904,690.002568,30.0,False,False,False
2,01a0d7a563a1755d37bb856a01a56e452deb2010,Jan,2017,cool,hold,Olathe,652.261905,713.261905,713.142857,30.0,False,False,False
3,01a0d7a563a1755d37bb856a01a56e452deb2010,Jan,2017,heat,hold,Olathe,697.493721,698.607535,698.607535,30.0,False,False,False
4,0448b121ebca96144ab5e5feb5ab88942e98de42,Jan,2017,auxHeatOnly,auto,Olathe,691.707317,780.000000,690.000000,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
405,fdf82bf64cb1a67c28f91aa7246724223df57851,Jan,2021,heat,hold,Atchison,691.846491,693.903509,693.903509,40.0,False,False,False
406,fe779904e8eede7395997bd2762cd654983d363a,Jan,2021,heat,hold,Andover,747.720698,750.708229,750.708229,0.0,False,False,False
407,fe84f839012ce8d6db47aa28b3b5d7bafb8ece60,Jan,2021,heat,hold,Eastborough,703.534555,709.582015,709.473772,90.0,False,False,False
408,ff0c14deff346a8a3dc87a89fb1ab6632be73334,Jan,2021,heat,hold,Tonganoxie,675.750000,680.000000,680.000000,10.0,False,False,False


In [34]:
KS_jan.to_csv("Scraper_Output/State_Month_Day/KS/KS_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/KS-day/2017-feb-day-KS.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7f9688a0b01344c1f2804913546cad73007d0cf4,2017-02-21 19:35:00 UTC,heat,auto,752,820,650,KS,Oswego,106,False,False,False,Gas
1,7f9688a0b01344c1f2804913546cad73007d0cf4,2017-02-22 18:20:00 UTC,heat,auto,751,820,650,KS,Oswego,106,False,False,False,Gas
2,7f9688a0b01344c1f2804913546cad73007d0cf4,2017-02-11 17:50:00 UTC,heat,auto,760,820,650,KS,Oswego,106,False,False,False,Gas
3,7f9688a0b01344c1f2804913546cad73007d0cf4,2017-02-22 19:50:00 UTC,heat,auto,757,820,650,KS,Oswego,106,False,False,False,Gas
4,7f9688a0b01344c1f2804913546cad73007d0cf4,2017-02-28 15:30:00 UTC,heat,auto,743,820,650,KS,Oswego,106,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83063,a1b0dc41acf5beef99934faecb9e1460d7b78173,2017-02-19 16:25:00 UTC,auto,auto,645,720,620,KS,Lawrence,120,False,False,False,Gas
83064,a1b0dc41acf5beef99934faecb9e1460d7b78173,2017-02-19 18:20:00 UTC,auto,auto,657,720,620,KS,Lawrence,120,False,False,False,Gas
83065,a1b0dc41acf5beef99934faecb9e1460d7b78173,2017-02-25 19:10:00 UTC,auto,auto,677,740,680,KS,Lawrence,120,False,False,False,Gas
83066,a1b0dc41acf5beef99934faecb9e1460d7b78173,2017-02-12 16:20:00 UTC,auto,auto,647,740,650,KS,Lawrence,120,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/KS/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/KS-day/2018-feb-day-KS.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2018-02-13 12:40:00 UTC,heat,hold,660,670,670,KS,Paola,117,False,False,False,Gas
1,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2018-02-20 10:05:00 UTC,heat,hold,636,650,640,KS,Paola,117,False,False,False,Gas
2,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2018-02-06 12:50:00 UTC,heat,auto,629,650,630,KS,Paola,117,False,False,False,Gas
3,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2018-02-26 13:15:00 UTC,heat,hold,643,670,670,KS,Paola,117,False,False,False,Gas
4,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2018-02-23 13:30:00 UTC,heat,hold,647,670,670,KS,Paola,117,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255665,a1b0dc41acf5beef99934faecb9e1460d7b78173,2018-02-04 19:30:00 UTC,auto,auto,688,760,690,KS,Lawrence,120,False,False,False,Gas
255666,a1b0dc41acf5beef99934faecb9e1460d7b78173,2018-02-04 19:50:00 UTC,auto,auto,690,760,690,KS,Lawrence,120,False,False,False,Gas
255667,a1b0dc41acf5beef99934faecb9e1460d7b78173,2018-02-04 17:10:00 UTC,auto,auto,646,820,620,KS,Lawrence,120,False,False,False,Gas
255668,a1b0dc41acf5beef99934faecb9e1460d7b78173,2018-02-04 17:05:00 UTC,auto,auto,647,820,620,KS,Lawrence,120,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/KS/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/KS-day/2019-feb-day-KS.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7bf217daebd9144ce543a1754527180e81166ccf,2019-02-18 13:35:00 UTC,auto,auto,677,750,680,KS,El Dorado,67,False,False,False,Gas
1,7bf217daebd9144ce543a1754527180e81166ccf,2019-02-18 13:30:00 UTC,auto,auto,675,750,680,KS,El Dorado,67,False,False,False,Gas
2,7f9688a0b01344c1f2804913546cad73007d0cf4,2019-02-21 16:05:00 UTC,heat,hold,749,760,760,KS,Oswego,106,False,False,False,Gas
3,7bf217daebd9144ce543a1754527180e81166ccf,2019-02-12 13:35:00 UTC,auto,hold,684,750,680,KS,El Dorado,67,False,False,False,Gas
4,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2019-02-24 16:30:00 UTC,heat,hold,681,680,680,KS,Eastborough,79,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
275215,842c02deec3009c2808be94f10e56264890a12f4,2019-02-28 14:20:00 UTC,heat,hold,701,720,720,KS,Scranton,120,False,False,False,Gas
275216,842c02deec3009c2808be94f10e56264890a12f4,2019-02-07 12:50:00 UTC,heat,hold,659,700,700,KS,Scranton,120,False,False,False,Gas
275217,842c02deec3009c2808be94f10e56264890a12f4,2019-02-13 11:10:00 UTC,heat,auto,671,670,670,KS,Scranton,120,False,False,False,Gas
275218,842c02deec3009c2808be94f10e56264890a12f4,2019-02-05 16:05:00 UTC,heat,auto,677,690,690,KS,Scranton,120,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/KS/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/KS-day/2020-feb-day-KS.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7bf217daebd9144ce543a1754527180e81166ccf,2020-02-06 12:30:00 UTC,auto,auto,707,780,710,KS,El Dorado,67,False,False,False,Gas
1,7f9688a0b01344c1f2804913546cad73007d0cf4,2020-02-09 17:55:00 UTC,heat,hold,747,730,730,KS,Oswego,106,False,False,False,Gas
2,e73d2071803a834c945f4eedd70daf1ac7d08fea,2020-02-17 17:00:00 UTC,heat,hold,716,720,720,KS,Paola,99,False,False,False,Gas
3,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2020-02-22 18:15:00 UTC,heat,hold,678,680,680,KS,Tooeka,67,False,False,False,Gas
4,7f9688a0b01344c1f2804913546cad73007d0cf4,2020-02-09 19:35:00 UTC,heat,hold,729,730,730,KS,Oswego,106,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
398602,842c02deec3009c2808be94f10e56264890a12f4,2020-02-03 15:40:00 UTC,heat,hold,685,690,690,KS,Scranton,120,False,False,False,Gas
398603,842c02deec3009c2808be94f10e56264890a12f4,2020-02-04 15:05:00 UTC,heat,hold,699,700,700,KS,Scranton,120,False,False,False,Gas
398604,842c02deec3009c2808be94f10e56264890a12f4,2020-02-13 14:25:00 UTC,heat,hold,688,700,700,KS,Scranton,120,False,False,False,Gas
398605,842c02deec3009c2808be94f10e56264890a12f4,2020-02-20 14:10:00 UTC,heat,hold,697,710,710,KS,Scranton,120,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/KS/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/KS-day/2021-feb-day-KS.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e73d2071803a834c945f4eedd70daf1ac7d08fea,2021-02-20 17:15:00 UTC,heat,hold,727,730,730,KS,Paola,99,False,False,False,Gas
1,e73d2071803a834c945f4eedd70daf1ac7d08fea,2021-02-07 16:55:00 UTC,heat,hold,705,720,720,KS,Paola,99,False,False,False,Gas
3,fc006352113dcc3ae14023c9e52f72483f64b65e,2021-02-27 18:10:00 UTC,heat,hold,664,650,650,KS,Arkansas City,100,True,False,False,Gas
5,e73d2071803a834c945f4eedd70daf1ac7d08fea,2021-02-17 15:40:00 UTC,heat,hold,691,710,710,KS,Paola,99,False,False,False,Gas
6,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2021-02-10 11:35:00 UTC,heat,hold,693,700,700,KS,Tooeka,67,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
267597,842c02deec3009c2808be94f10e56264890a12f4,2021-02-11 14:05:00 UTC,heat,hold,664,670,670,KS,Scranton,120,False,False,False,Gas
267598,842c02deec3009c2808be94f10e56264890a12f4,2021-02-15 10:40:00 UTC,heat,hold,656,670,670,KS,Scranton,120,False,False,False,Gas
267599,842c02deec3009c2808be94f10e56264890a12f4,2021-02-01 19:25:00 UTC,heat,hold,672,660,660,KS,Scranton,120,False,False,False,Gas
267600,842c02deec3009c2808be94f10e56264890a12f4,2021-02-03 09:55:00 UTC,heat,hold,646,650,650,KS,Scranton,120,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/KS/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/KS/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
KS_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/KS/feb/" + file)
    KS_feb = pd.concat([KS_feb, df])
    
KS_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00d2d0e5e17f6dc9a19aa132916fdc6ddacb983e,feb,2017,auto,hold,Fairway,706.272727,787.878788,687.878788,80.0,False,False,False
1,01a0d7a563a1755d37bb856a01a56e452deb2010,feb,2017,auto,hold,Olathe,700.478645,780.000000,700.000000,30.0,False,False,False
2,01a0d7a563a1755d37bb856a01a56e452deb2010,feb,2017,heat,hold,Olathe,692.076023,690.000000,690.000000,30.0,False,False,False
3,025ec7c58b524de74d8986f4896917c31052701a,feb,2017,heat,auto,Overland Park,653.779614,650.000000,630.000000,5.0,False,False,True
4,0448b121ebca96144ab5e5feb5ab88942e98de42,feb,2017,auxHeatOnly,auto,Olathe,699.107143,780.000000,700.000000,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
424,fc006352113dcc3ae14023c9e52f72483f64b65e,feb,2021,heat,hold,Arkansas City,674.253521,661.408451,661.408451,100.0,True,False,False
425,fdadb8143c9f8d15c6bfbffab7628b015c5db3cb,feb,2021,auto,hold,Olathe,702.958946,755.431288,704.079516,35.0,False,False,False
426,fdf82bf64cb1a67c28f91aa7246724223df57851,feb,2021,heat,hold,Atchison,693.121196,695.846739,695.847283,40.0,False,False,False
427,fe779904e8eede7395997bd2762cd654983d363a,feb,2021,heat,hold,Andover,753.238827,756.745810,756.745810,0.0,False,False,False


In [67]:
KS_feb.to_csv("Scraper_Output/State_Month_Day/KS/KS_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/KS-day/2017-jun-day-KS.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2017-06-16 15:00:00 UTC,cool,hold,755,760,760,KS,Paola,117,False,False,False,Gas
1,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2017-06-08 12:50:00 UTC,cool,hold,741,780,780,KS,Eastborough,79,True,False,False,Gas
2,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2017-06-11 14:40:00 UTC,cool,hold,760,750,750,KS,Paola,117,False,False,False,Gas
3,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2017-06-18 17:20:00 UTC,cool,hold,765,780,780,KS,Paola,117,False,False,False,Gas
4,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2017-06-28 19:40:00 UTC,cool,hold,771,750,750,KS,Eastborough,79,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
173561,842c02deec3009c2808be94f10e56264890a12f4,2017-06-30 19:35:00 UTC,cool,auto,775,780,760,KS,Scranton,120,False,False,False,Gas
173562,842c02deec3009c2808be94f10e56264890a12f4,2017-06-20 13:20:00 UTC,cool,auto,736,750,760,KS,Scranton,120,False,False,False,Gas
173563,842c02deec3009c2808be94f10e56264890a12f4,2017-06-20 17:20:00 UTC,cool,hold,770,780,760,KS,Scranton,120,False,False,False,Gas
173564,842c02deec3009c2808be94f10e56264890a12f4,2017-06-21 16:15:00 UTC,cool,hold,779,780,760,KS,Scranton,120,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/KS/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/KS-day/2018-jun-day-KS.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,781ec8442b037999e55ca700ac584e460a0304d6,2018-06-14 11:40:00 UTC,cool,hold,686,730,670,KS,Salina,67,False,False,False,Gas
1,fc006352113dcc3ae14023c9e52f72483f64b65e,2018-06-23 17:15:00 UTC,auto,hold,728,730,640,KS,Arkansas City,100,True,False,False,Gas
2,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2018-06-03 14:30:00 UTC,cool,hold,715,730,730,KS,Tooeka,67,False,False,False,Gas
3,f4ca0722190325549f736870c8f8b5a4560c0708,2018-06-02 18:30:00 UTC,cool,auto,700,700,700,KS,Conway Springs,118,True,True,True,Electric
4,f4ca0722190325549f736870c8f8b5a4560c0708,2018-06-08 18:05:00 UTC,cool,auto,698,700,700,KS,Conway Springs,118,True,True,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
369468,842c02deec3009c2808be94f10e56264890a12f4,2018-06-25 08:15:00 UTC,cool,auto,719,720,720,KS,Scranton,120,False,False,False,Gas
369469,842c02deec3009c2808be94f10e56264890a12f4,2018-06-11 15:05:00 UTC,cool,hold,759,770,770,KS,Scranton,120,False,False,False,Gas
369470,842c02deec3009c2808be94f10e56264890a12f4,2018-06-18 18:10:00 UTC,cool,hold,753,760,760,KS,Scranton,120,False,False,False,Gas
369471,842c02deec3009c2808be94f10e56264890a12f4,2018-06-28 13:20:00 UTC,cool,hold,760,760,760,KS,Scranton,120,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/KS/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/KS-day/2019-jun-day-KS.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,030a1ce1894ca9d7e049cc41dc57f72e597cf256,2019-06-10 12:05:00 UTC,cool,auto,729,770,630,KS,Westwood,99,True,False,False,Gas
1,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2019-06-27 18:35:00 UTC,cool,hold,754,750,750,KS,Eastborough,79,True,False,False,Gas
2,e73d2071803a834c945f4eedd70daf1ac7d08fea,2019-06-28 18:40:00 UTC,cool,hold,746,740,740,KS,Paola,99,False,False,False,Gas
3,7f9688a0b01344c1f2804913546cad73007d0cf4,2019-06-27 16:45:00 UTC,cool,auto,749,760,760,KS,Oswego,106,False,False,False,Gas
4,030a1ce1894ca9d7e049cc41dc57f72e597cf256,2019-06-07 12:40:00 UTC,cool,auto,762,770,630,KS,Westwood,99,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
482113,842c02deec3009c2808be94f10e56264890a12f4,2019-06-03 15:10:00 UTC,cool,hold,733,770,770,KS,Scranton,120,False,False,False,Gas
482114,842c02deec3009c2808be94f10e56264890a12f4,2019-06-12 07:00:00 UTC,cool,hold,746,800,800,KS,Scranton,120,False,False,False,Gas
482115,842c02deec3009c2808be94f10e56264890a12f4,2019-06-20 13:05:00 UTC,cool,hold,721,790,790,KS,Scranton,120,False,False,False,Gas
482116,842c02deec3009c2808be94f10e56264890a12f4,2019-06-10 18:55:00 UTC,heat,hold,721,720,720,KS,Scranton,120,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/KS/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/KS-day/2020-jun-day-KS.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2020-06-21 18:20:00 UTC,auto,hold,692,690,615,KS,Tooeka,67,False,False,False,Gas
1,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2020-06-01 19:40:00 UTC,cool,hold,689,669,669,KS,Eastborough,79,True,False,False,Gas
2,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2020-06-14 11:10:00 UTC,auto,hold,693,690,615,KS,Tooeka,67,False,False,False,Gas
3,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2020-06-18 10:35:00 UTC,auto,hold,691,690,615,KS,Tooeka,67,False,False,False,Gas
4,db4a1cf416aa978037224279ee7abb5a7f3e57f1,2020-06-18 12:55:00 UTC,auto,auto,739,720,640,KS,Dodge City,105,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
492149,7aa978dfdfd62ddb38b4a3cbb14f2b26ae1a026c,2020-06-30 15:10:00 UTC,cool,auto,723,700,700,KS,Wichita,120,True,False,True,Electric
492150,a1b0dc41acf5beef99934faecb9e1460d7b78173,2020-06-14 19:45:00 UTC,auto,auto,770,780,660,KS,Lawrence,120,False,False,False,Gas
492151,a1b0dc41acf5beef99934faecb9e1460d7b78173,2020-06-13 19:20:00 UTC,auto,auto,767,730,660,KS,Lawrence,120,False,False,False,Gas
492152,a1b0dc41acf5beef99934faecb9e1460d7b78173,2020-06-13 17:45:00 UTC,auto,auto,755,780,660,KS,Lawrence,120,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/KS/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/KS-day/2021-jun-day-KS.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,62a890bb783d07af9497a160b6a7f4ae41579c67,2021-06-12 12:55:00 UTC,cool,hold,728,720,720,KS,Murdock,118,False,False,False,Gas
1,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2021-06-19 12:55:00 UTC,cool,hold,695,690,690,KS,Tooeka,67,False,False,False,Gas
2,62a890bb783d07af9497a160b6a7f4ae41579c67,2021-06-19 17:10:00 UTC,cool,hold,730,720,720,KS,Murdock,118,False,False,False,Gas
3,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2021-06-11 15:40:00 UTC,cool,hold,697,695,695,KS,Eastborough,79,True,False,False,Gas
4,7bf217daebd9144ce543a1754527180e81166ccf,2021-06-04 11:40:00 UTC,auto,hold,710,710,660,KS,El Dorado,67,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
311073,842c02deec3009c2808be94f10e56264890a12f4,2021-06-03 13:10:00 UTC,cool,hold,694,740,740,KS,Scranton,120,False,False,False,Gas
311074,842c02deec3009c2808be94f10e56264890a12f4,2021-06-21 07:40:00 UTC,cool,hold,714,690,690,KS,Scranton,120,False,False,False,Gas
311075,842c02deec3009c2808be94f10e56264890a12f4,2021-06-17 08:00:00 UTC,cool,hold,735,680,680,KS,Scranton,120,False,False,False,Gas
311076,842c02deec3009c2808be94f10e56264890a12f4,2021-06-09 08:50:00 UTC,cool,hold,709,680,680,KS,Scranton,120,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/KS/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/KS/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
KS_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/KS/jun/" + file)
    KS_jun = pd.concat([KS_jun, df])
    
KS_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00d2d0e5e17f6dc9a19aa132916fdc6ddacb983e,jun,2017,cool,hold,Fairway,760.782609,736.869565,736.173913,80.0,False,False,False
1,01a0d7a563a1755d37bb856a01a56e452deb2010,jun,2017,auto,hold,Olathe,749.553657,754.137520,677.127186,30.0,False,False,False
2,025ec7c58b524de74d8986f4896917c31052701a,jun,2017,cool,hold,Overland Park,721.419820,720.009610,719.966366,5.0,False,False,True
3,0448b121ebca96144ab5e5feb5ab88942e98de42,jun,2017,cool,auto,Olathe,740.900826,750.000000,700.000000,10.0,False,False,False
4,0448b121ebca96144ab5e5feb5ab88942e98de42,jun,2017,cool,hold,Olathe,743.645833,750.000000,749.312500,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
431,fc006352113dcc3ae14023c9e52f72483f64b65e,jun,2021,auto,hold,Arkansas City,732.458333,730.000000,670.000000,100.0,True,False,False
432,fdf82bf64cb1a67c28f91aa7246724223df57851,jun,2021,auto,hold,Atchison,705.988506,730.689655,680.517241,40.0,False,False,False
433,fe779904e8eede7395997bd2762cd654983d363a,jun,2021,cool,hold,Andover,752.840278,748.888889,748.888889,0.0,False,False,False
434,fe84f839012ce8d6db47aa28b3b5d7bafb8ece60,jun,2021,cool,hold,Eastborough,702.469314,696.205776,696.054152,90.0,False,False,False


In [100]:
KS_jun.to_csv("Scraper_Output/State_Month_Day/KS/KS_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/KS-day/2017-jul-day-KS.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2017-07-29 18:55:00 UTC,cool,hold,742,760,760,KS,Paola,117,False,False,False,Gas
1,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2017-07-20 17:00:00 UTC,cool,hold,765,760,760,KS,Paola,117,False,False,False,Gas
2,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2017-07-26 19:45:00 UTC,cool,hold,758,755,755,KS,Paola,117,False,False,False,Gas
3,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2017-07-05 19:05:00 UTC,cool,hold,748,750,750,KS,Paola,117,False,False,False,Gas
4,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2017-07-01 19:30:00 UTC,cool,hold,761,760,760,KS,Paola,117,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203528,842c02deec3009c2808be94f10e56264890a12f4,2017-07-08 17:30:00 UTC,cool,hold,796,770,760,KS,Scranton,120,False,False,False,Gas
203529,842c02deec3009c2808be94f10e56264890a12f4,2017-07-10 16:50:00 UTC,cool,auto,786,770,760,KS,Scranton,120,False,False,False,Gas
203530,842c02deec3009c2808be94f10e56264890a12f4,2017-07-02 17:50:00 UTC,cool,hold,758,770,760,KS,Scranton,120,False,False,False,Gas
203531,a1b0dc41acf5beef99934faecb9e1460d7b78173,2017-07-09 16:40:00 UTC,auto,auto,739,730,660,KS,Lawrence,120,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/KS/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/KS-day/2018-jul-day-KS.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,af58a413d1a41205bebf1c507761a5ab74a84725,2018-07-28 15:20:00 UTC,cool,hold,770,800,790,KS,Ellis,100,False,False,False,Gas
1,af58a413d1a41205bebf1c507761a5ab74a84725,2018-07-29 15:55:00 UTC,cool,hold,794,800,790,KS,Ellis,100,False,False,False,Gas
2,db4a1cf416aa978037224279ee7abb5a7f3e57f1,2018-07-24 11:10:00 UTC,cool,auto,724,720,720,KS,Dodge City,105,False,False,False,Gas
3,db4a1cf416aa978037224279ee7abb5a7f3e57f1,2018-07-24 15:10:00 UTC,cool,auto,722,720,720,KS,Dodge City,105,False,False,False,Gas
4,af58a413d1a41205bebf1c507761a5ab74a84725,2018-07-08 16:55:00 UTC,cool,hold,795,800,790,KS,Ellis,100,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
384284,a1b0dc41acf5beef99934faecb9e1460d7b78173,2018-07-01 17:20:00 UTC,heat,auto,854,820,620,KS,Lawrence,120,False,False,False,Gas
384285,a1b0dc41acf5beef99934faecb9e1460d7b78173,2018-07-01 18:30:00 UTC,heat,auto,855,820,620,KS,Lawrence,120,False,False,False,Gas
384286,a1b0dc41acf5beef99934faecb9e1460d7b78173,2018-07-01 15:40:00 UTC,heat,auto,852,820,620,KS,Lawrence,120,False,False,False,Gas
384287,a1b0dc41acf5beef99934faecb9e1460d7b78173,2018-07-01 15:55:00 UTC,heat,auto,853,820,620,KS,Lawrence,120,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/KS/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/KS-day/2019-jul-day-KS.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7bf217daebd9144ce543a1754527180e81166ccf,2019-07-10 12:30:00 UTC,auto,auto,725,720,650,KS,El Dorado,67,False,False,False,Gas
1,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2019-07-14 13:55:00 UTC,cool,auto,702,700,650,KS,Tooeka,67,False,False,False,Gas
2,fc006352113dcc3ae14023c9e52f72483f64b65e,2019-07-14 17:10:00 UTC,auto,hold,733,732,672,KS,Arkansas City,100,True,False,False,Gas
3,7f9688a0b01344c1f2804913546cad73007d0cf4,2019-07-27 16:40:00 UTC,cool,hold,737,710,710,KS,Oswego,106,False,False,False,Gas
4,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2019-07-06 12:55:00 UTC,cool,hold,741,740,740,KS,Tooeka,67,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
513999,7aa978dfdfd62ddb38b4a3cbb14f2b26ae1a026c,2019-07-28 12:45:00 UTC,cool,auto,718,720,717,KS,Wichita,120,True,False,True,Electric
514000,7aa978dfdfd62ddb38b4a3cbb14f2b26ae1a026c,2019-07-21 18:40:00 UTC,cool,auto,736,730,730,KS,Wichita,120,True,False,True,Electric
514001,a1b0dc41acf5beef99934faecb9e1460d7b78173,2019-07-28 19:10:00 UTC,auto,auto,751,680,630,KS,Lawrence,120,False,False,False,Gas
514002,a1b0dc41acf5beef99934faecb9e1460d7b78173,2019-07-28 19:20:00 UTC,auto,auto,751,680,630,KS,Lawrence,120,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/KS/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/KS-day/2020-jul-day-KS.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,db4a1cf416aa978037224279ee7abb5a7f3e57f1,2020-07-19 12:10:00 UTC,cool,auto,737,720,720,KS,Dodge City,105,False,False,False,Gas
1,af58a413d1a41205bebf1c507761a5ab74a84725,2020-07-19 16:20:00 UTC,cool,hold,725,730,730,KS,Ellis,100,False,False,False,Gas
2,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2020-07-14 10:40:00 UTC,auto,hold,704,700,615,KS,Tooeka,67,False,False,False,Gas
3,c8dad0e2cdfb05dd04f8dbb5ec56efa00fb9db03,2020-07-18 19:05:00 UTC,cool,auto,833,840,620,KS,Parsons,49,False,False,False,Gas
4,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2020-07-12 10:45:00 UTC,auto,hold,698,700,615,KS,Tooeka,67,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
480669,842c02deec3009c2808be94f10e56264890a12f4,2020-07-15 18:10:00 UTC,cool,auto,736,740,660,KS,Scranton,120,False,False,False,Gas
480670,842c02deec3009c2808be94f10e56264890a12f4,2020-07-15 14:40:00 UTC,cool,auto,724,740,660,KS,Scranton,120,False,False,False,Gas
480671,842c02deec3009c2808be94f10e56264890a12f4,2020-07-28 17:50:00 UTC,cool,auto,735,740,680,KS,Scranton,120,False,False,False,Gas
480672,842c02deec3009c2808be94f10e56264890a12f4,2020-07-13 10:10:00 UTC,cool,hold,683,680,680,KS,Scranton,120,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/KS/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/KS-day/2021-jul-day-KS.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2021-07-25 19:50:00 UTC,cool,hold,688,690,690,KS,Tooeka,67,False,False,False,Gas
1,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2021-07-03 18:50:00 UTC,cool,hold,690,690,690,KS,Tooeka,67,False,False,False,Gas
2,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2021-07-10 18:45:00 UTC,cool,hold,692,690,690,KS,Tooeka,67,False,False,False,Gas
3,62a890bb783d07af9497a160b6a7f4ae41579c67,2021-07-18 12:05:00 UTC,cool,hold,720,710,710,KS,Murdock,118,False,False,False,Gas
4,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2021-07-29 11:00:00 UTC,cool,hold,689,690,690,KS,Tooeka,67,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295917,842c02deec3009c2808be94f10e56264890a12f4,2021-07-15 13:25:00 UTC,cool,hold,702,740,740,KS,Scranton,120,False,False,False,Gas
295918,842c02deec3009c2808be94f10e56264890a12f4,2021-07-07 15:45:00 UTC,cool,hold,727,760,760,KS,Scranton,120,False,False,False,Gas
295919,842c02deec3009c2808be94f10e56264890a12f4,2021-07-15 12:40:00 UTC,cool,hold,692,740,740,KS,Scranton,120,False,False,False,Gas
295920,842c02deec3009c2808be94f10e56264890a12f4,2021-07-07 15:10:00 UTC,cool,hold,720,760,760,KS,Scranton,120,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/KS/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/KS/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
KS_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/KS/jul/" + file)
    KS_jul = pd.concat([KS_jul, df])
    
KS_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00d2d0e5e17f6dc9a19aa132916fdc6ddacb983e,jul,2017,cool,auto,Fairway,769.333333,741.083333,690.000000,80.0,False,False,False
1,00d2d0e5e17f6dc9a19aa132916fdc6ddacb983e,jul,2017,cool,hold,Fairway,749.854839,738.274194,736.677419,80.0,False,False,False
2,01a0d7a563a1755d37bb856a01a56e452deb2010,jul,2017,auto,hold,Olathe,755.055806,754.794830,690.000000,30.0,False,False,False
3,025ec7c58b524de74d8986f4896917c31052701a,jul,2017,cool,hold,Overland Park,725.541578,724.058991,719.939588,5.0,False,False,True
4,050b44b61d052cae8d52e817cc0f2bb78bb91535,jul,2017,cool,auto,Overland Park,731.580645,726.903226,679.677419,55.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
381,fc006352113dcc3ae14023c9e52f72483f64b65e,jul,2021,auto,hold,Arkansas City,730.883333,730.100000,669.933333,100.0,True,False,False
382,fdf82bf64cb1a67c28f91aa7246724223df57851,jul,2021,auto,hold,Atchison,694.779134,690.724832,620.043319,40.0,False,False,False
383,fe779904e8eede7395997bd2762cd654983d363a,jul,2021,cool,hold,Andover,745.988889,738.000000,738.000000,0.0,False,False,False
384,fe84f839012ce8d6db47aa28b3b5d7bafb8ece60,jul,2021,cool,hold,Eastborough,701.937838,698.797297,698.797297,90.0,False,False,False


In [133]:
KS_jul.to_csv("Scraper_Output/State_Month_Day/KS/KS_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/KS-day/2017-aug-day-KS.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2017-08-23 14:05:00 UTC,cool,hold,736,750,750,KS,Paola,117,False,False,False,Gas
1,7f9688a0b01344c1f2804913546cad73007d0cf4,2017-08-29 19:30:00 UTC,cool,auto,745,750,680,KS,Oswego,106,False,False,False,Gas
2,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2017-08-22 09:40:00 UTC,cool,hold,754,750,750,KS,Paola,117,False,False,False,Gas
3,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2017-08-18 18:30:00 UTC,cool,hold,756,770,770,KS,Paola,117,False,False,False,Gas
4,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2017-08-14 13:35:00 UTC,cool,hold,752,750,750,KS,Paola,117,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201029,b17eb28878c2d7b571d48d9558d8145afaa4757a,2017-08-27 16:00:00 UTC,auto,hold,749,750,690,KS,Olathe,20,False,False,False,Gas
201030,b17eb28878c2d7b571d48d9558d8145afaa4757a,2017-08-10 19:00:00 UTC,auto,auto,750,750,690,KS,Olathe,20,False,False,False,Gas
201031,b17eb28878c2d7b571d48d9558d8145afaa4757a,2017-08-29 15:20:00 UTC,auto,hold,737,750,690,KS,Olathe,20,False,False,False,Gas
201032,b17eb28878c2d7b571d48d9558d8145afaa4757a,2017-08-12 13:50:00 UTC,auto,auto,737,750,690,KS,Olathe,20,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/KS/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/KS-day/2018-aug-day-KS.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f4ca0722190325549f736870c8f8b5a4560c0708,2018-08-18 17:30:00 UTC,cool,auto,702,700,690,KS,Conway Springs,118,True,True,True,Electric
1,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2018-08-19 18:55:00 UTC,cool,hold,763,760,760,KS,Eastborough,79,True,False,False,Gas
2,db4a1cf416aa978037224279ee7abb5a7f3e57f1,2018-08-28 16:25:00 UTC,cool,hold,735,730,730,KS,Dodge City,105,False,False,False,Gas
3,db4a1cf416aa978037224279ee7abb5a7f3e57f1,2018-08-13 11:00:00 UTC,cool,hold,737,740,740,KS,Dodge City,105,False,False,False,Gas
4,db4a1cf416aa978037224279ee7abb5a7f3e57f1,2018-08-20 11:35:00 UTC,cool,hold,742,750,750,KS,Dodge City,105,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
371706,842c02deec3009c2808be94f10e56264890a12f4,2018-08-07 10:15:00 UTC,cool,hold,701,700,700,KS,Scranton,120,False,False,False,Gas
371707,842c02deec3009c2808be94f10e56264890a12f4,2018-08-22 19:05:00 UTC,cool,hold,733,770,770,KS,Scranton,120,False,False,False,Gas
371708,842c02deec3009c2808be94f10e56264890a12f4,2018-08-07 08:15:00 UTC,cool,hold,683,700,700,KS,Scranton,120,False,False,False,Gas
371709,842c02deec3009c2808be94f10e56264890a12f4,2018-08-20 16:50:00 UTC,cool,auto,735,780,780,KS,Scranton,120,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/KS/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/KS-day/2019-aug-day-KS.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fc006352113dcc3ae14023c9e52f72483f64b65e,2019-08-24 15:25:00 UTC,auto,hold,732,733,644,KS,Arkansas City,100,True,False,False,Gas
1,7f9688a0b01344c1f2804913546cad73007d0cf4,2019-08-09 15:20:00 UTC,cool,auto,731,730,730,KS,Oswego,106,False,False,False,Gas
2,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2019-08-31 15:15:00 UTC,cool,hold,686,690,690,KS,Tooeka,67,False,False,False,Gas
3,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2019-08-12 16:50:00 UTC,cool,hold,750,750,750,KS,Eastborough,79,True,False,False,Gas
4,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2019-08-12 16:10:00 UTC,cool,hold,756,750,750,KS,Eastborough,79,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
490024,7aa978dfdfd62ddb38b4a3cbb14f2b26ae1a026c,2019-08-24 18:25:00 UTC,cool,auto,726,720,693,KS,Wichita,120,True,False,True,Electric
490025,7aa978dfdfd62ddb38b4a3cbb14f2b26ae1a026c,2019-08-25 11:05:00 UTC,cool,auto,729,720,693,KS,Wichita,120,True,False,True,Electric
490026,7aa978dfdfd62ddb38b4a3cbb14f2b26ae1a026c,2019-08-25 17:20:00 UTC,cool,auto,717,710,710,KS,Wichita,120,True,False,True,Electric
490027,7aa978dfdfd62ddb38b4a3cbb14f2b26ae1a026c,2019-08-25 09:30:00 UTC,cool,auto,720,720,693,KS,Wichita,120,True,False,True,Electric


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/KS/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/KS-day/2020-aug-day-KS.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fc006352113dcc3ae14023c9e52f72483f64b65e,2020-08-15 19:55:00 UTC,auto,hold,738,732,672,KS,Arkansas City,100,True,False,False,Gas
1,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2020-08-28 17:25:00 UTC,cool,hold,696,695,695,KS,Tooeka,67,False,False,False,Gas
2,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2020-08-07 14:35:00 UTC,cool,auto,696,690,650,KS,Tooeka,67,False,False,False,Gas
3,7bf217daebd9144ce543a1754527180e81166ccf,2020-08-12 13:00:00 UTC,auto,auto,730,730,650,KS,El Dorado,67,False,False,False,Gas
4,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2020-08-15 14:50:00 UTC,cool,hold,703,700,700,KS,Tooeka,67,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
463907,842c02deec3009c2808be94f10e56264890a12f4,2020-08-31 09:25:00 UTC,cool,auto,681,680,660,KS,Scranton,120,False,False,False,Gas
463908,842c02deec3009c2808be94f10e56264890a12f4,2020-08-25 07:15:00 UTC,cool,hold,681,666,666,KS,Scranton,120,False,False,False,Gas
463909,842c02deec3009c2808be94f10e56264890a12f4,2020-08-24 15:45:00 UTC,cool,auto,735,740,660,KS,Scranton,120,False,False,False,Gas
463910,842c02deec3009c2808be94f10e56264890a12f4,2020-08-31 12:35:00 UTC,cool,auto,702,740,660,KS,Scranton,120,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/KS/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/KS/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
KS_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/KS/aug/" + file)
    KS_aug = pd.concat([KS_aug, df])
    
KS_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00d2d0e5e17f6dc9a19aa132916fdc6ddacb983e,aug,2017,cool,hold,Fairway,759.714286,746.342857,745.914286,80.0,False,False,False
1,01a0d7a563a1755d37bb856a01a56e452deb2010,aug,2017,auto,hold,Olathe,753.352660,761.052565,671.215992,30.0,False,False,False
2,025ec7c58b524de74d8986f4896917c31052701a,aug,2017,cool,hold,Overland Park,730.612368,740.000000,720.000000,5.0,False,False,True
3,0448b121ebca96144ab5e5feb5ab88942e98de42,aug,2017,cool,hold,Olathe,733.516129,742.516129,741.741935,10.0,False,False,False
4,050b44b61d052cae8d52e817cc0f2bb78bb91535,aug,2017,auto,auto,Overland Park,728.400000,749.200000,680.000000,55.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
684,fdf82bf64cb1a67c28f91aa7246724223df57851,aug,2020,auto,auto,Atchison,700.440442,699.397290,621.079529,40.0,False,False,False
685,fdf82bf64cb1a67c28f91aa7246724223df57851,aug,2020,auto,hold,Atchison,705.173611,699.565972,627.795139,40.0,False,False,False
686,fe779904e8eede7395997bd2762cd654983d363a,aug,2020,cool,hold,Andover,762.723881,780.000000,780.000000,0.0,False,False,False
687,fe84f839012ce8d6db47aa28b3b5d7bafb8ece60,aug,2020,cool,auto,Eastborough,702.521368,692.076923,692.076923,90.0,False,False,False


In [160]:
KS_aug.to_csv("Scraper_Output/State_Month_Day/KS/KS_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/KS-day/2017-dec-day-KS.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,beb1c75205a06d7650da1b7f6c7784ebd2cd0322,2017-12-25 14:15:00 UTC,heat,hold,695,700,700,KS,Eastborough,79,True,False,False,Gas
1,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2017-12-31 17:35:00 UTC,heat,hold,617,680,680,KS,Tooeka,67,False,False,False,Gas
2,7f9688a0b01344c1f2804913546cad73007d0cf4,2017-12-14 18:35:00 UTC,heat,hold,756,760,760,KS,Oswego,106,False,False,False,Gas
3,7f9688a0b01344c1f2804913546cad73007d0cf4,2017-12-13 18:10:00 UTC,heat,hold,779,780,780,KS,Oswego,106,False,False,False,Gas
5,6348b0d66bdaf7ddf0f2dbec20785a694ac00335,2017-12-26 11:00:00 UTC,heat,hold,628,650,635,KS,Paola,117,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
260796,a1b0dc41acf5beef99934faecb9e1460d7b78173,2017-12-31 15:20:00 UTC,auto,auto,612,730,619,KS,Lawrence,120,False,False,False,Gas
260797,a1b0dc41acf5beef99934faecb9e1460d7b78173,2017-12-31 15:15:00 UTC,auto,auto,609,730,619,KS,Lawrence,120,False,False,False,Gas
260798,a1b0dc41acf5beef99934faecb9e1460d7b78173,2017-12-17 19:15:00 UTC,auto,auto,683,820,620,KS,Lawrence,120,False,False,False,Gas
260799,a1b0dc41acf5beef99934faecb9e1460d7b78173,2017-12-17 18:55:00 UTC,auto,auto,691,820,620,KS,Lawrence,120,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/KS/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/KS-day/2018-dec-day-KS.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,62a890bb783d07af9497a160b6a7f4ae41579c67,2018-12-26 17:10:00 UTC,heat,hold,628,650,630,KS,Murdock,118,False,False,False,Gas
1,62a890bb783d07af9497a160b6a7f4ae41579c67,2018-12-13 18:30:00 UTC,heat,hold,676,680,680,KS,Murdock,118,False,False,False,Gas
2,62a890bb783d07af9497a160b6a7f4ae41579c67,2018-12-12 16:00:00 UTC,heat,hold,677,680,680,KS,Murdock,118,False,False,False,Gas
3,f4ca0722190325549f736870c8f8b5a4560c0708,2018-12-23 16:05:00 UTC,heat,auto,693,730,700,KS,Conway Springs,118,True,True,True,Electric
4,62a890bb783d07af9497a160b6a7f4ae41579c67,2018-12-09 15:40:00 UTC,heat,hold,652,670,670,KS,Murdock,118,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
400642,842c02deec3009c2808be94f10e56264890a12f4,2018-12-13 15:50:00 UTC,heat,hold,691,680,680,KS,Scranton,120,False,False,False,Gas
400643,842c02deec3009c2808be94f10e56264890a12f4,2018-12-24 11:20:00 UTC,heat,hold,668,660,660,KS,Scranton,120,False,False,False,Gas
400644,842c02deec3009c2808be94f10e56264890a12f4,2018-12-24 10:00:00 UTC,heat,auto,676,690,680,KS,Scranton,120,False,False,False,Gas
400645,842c02deec3009c2808be94f10e56264890a12f4,2018-12-03 11:55:00 UTC,heat,hold,658,660,669,KS,Scranton,120,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/KS/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/KS-day/2019-dec-day-KS.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,781ec8442b037999e55ca700ac584e460a0304d6,2019-12-13 13:15:00 UTC,heat,hold,622,678,637,KS,Salina,67,False,False,False,Gas
1,7f9688a0b01344c1f2804913546cad73007d0cf4,2019-12-11 18:15:00 UTC,heat,auto,722,720,720,KS,Oswego,106,False,False,False,Gas
2,fc006352113dcc3ae14023c9e52f72483f64b65e,2019-12-07 16:15:00 UTC,auto,hold,682,732,682,KS,Arkansas City,100,True,False,False,Gas
3,f7a1ab91ae0f0bad91b2b3dcb13d4fffbeaeb9b5,2019-12-28 17:05:00 UTC,heat,hold,665,670,670,KS,Tooeka,67,False,False,False,Gas
4,db4a1cf416aa978037224279ee7abb5a7f3e57f1,2019-12-16 16:05:00 UTC,heat,auto,657,660,660,KS,Dodge City,105,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
449661,842c02deec3009c2808be94f10e56264890a12f4,2019-12-24 18:10:00 UTC,heat,auto,678,680,680,KS,Scranton,120,False,False,False,Gas
449662,842c02deec3009c2808be94f10e56264890a12f4,2019-12-31 13:30:00 UTC,heat,hold,696,710,710,KS,Scranton,120,False,False,False,Gas
449663,842c02deec3009c2808be94f10e56264890a12f4,2019-12-03 10:25:00 UTC,heat,hold,658,660,660,KS,Scranton,120,False,False,False,Gas
449664,842c02deec3009c2808be94f10e56264890a12f4,2019-12-05 08:00:00 UTC,heat,hold,664,650,650,KS,Scranton,120,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/KS/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/KS-day/2020-dec-day-KS.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e73d2071803a834c945f4eedd70daf1ac7d08fea,2020-12-20 17:15:00 UTC,heat,auto,705,710,710,KS,Paola,99,False,False,False,Gas
1,62a890bb783d07af9497a160b6a7f4ae41579c67,2020-12-26 18:45:00 UTC,auto,hold,646,750,650,KS,Murdock,118,False,False,False,Gas
2,db4a1cf416aa978037224279ee7abb5a7f3e57f1,2020-12-08 18:05:00 UTC,auto,auto,670,750,670,KS,Dodge City,105,False,False,False,Gas
3,7f9688a0b01344c1f2804913546cad73007d0cf4,2020-12-20 17:20:00 UTC,heat,auto,734,730,730,KS,Oswego,106,False,False,False,Gas
4,7bf217daebd9144ce543a1754527180e81166ccf,2020-12-06 12:40:00 UTC,auto,auto,713,800,720,KS,El Dorado,67,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
429117,842c02deec3009c2808be94f10e56264890a12f4,2020-12-02 11:50:00 UTC,heat,auto,669,700,680,KS,Scranton,120,False,False,False,Gas
429118,842c02deec3009c2808be94f10e56264890a12f4,2020-12-08 10:30:00 UTC,heat,auto,663,650,650,KS,Scranton,120,False,False,False,Gas
429119,842c02deec3009c2808be94f10e56264890a12f4,2020-12-07 14:10:00 UTC,heat,hold,698,680,680,KS,Scranton,120,False,False,False,Gas
429120,842c02deec3009c2808be94f10e56264890a12f4,2020-12-07 16:35:00 UTC,heat,hold,677,680,680,KS,Scranton,120,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/KS/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/KS/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
KS_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/KS/dec/" + file)
    KS_dec = pd.concat([KS_dec, df])
    
KS_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00d2d0e5e17f6dc9a19aa132916fdc6ddacb983e,dec,2017,auto,auto,Fairway,698.909091,760.000000,700.000000,80.0,False,False,False
1,00d2d0e5e17f6dc9a19aa132916fdc6ddacb983e,dec,2017,heat,auto,Fairway,631.895833,820.000000,640.000000,80.0,False,False,False
2,00d2d0e5e17f6dc9a19aa132916fdc6ddacb983e,dec,2017,heat,hold,Fairway,699.307692,700.923077,700.000000,80.0,False,False,False
3,01a0d7a563a1755d37bb856a01a56e452deb2010,dec,2017,auto,hold,Olathe,694.353659,798.077475,693.259326,30.0,False,False,False
4,025ec7c58b524de74d8986f4896917c31052701a,dec,2017,heat,hold,Overland Park,680.000000,680.000000,680.000000,5.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
691,fe779904e8eede7395997bd2762cd654983d363a,dec,2020,heat,hold,Andover,753.827957,750.473118,750.473118,0.0,False,False,False
692,fe84f839012ce8d6db47aa28b3b5d7bafb8ece60,dec,2020,auto,auto,Eastborough,686.310160,736.684492,686.684492,90.0,False,False,False
693,fe84f839012ce8d6db47aa28b3b5d7bafb8ece60,dec,2020,auto,hold,Eastborough,704.208804,761.207675,709.655756,90.0,False,False,False
694,ff0c14deff346a8a3dc87a89fb1ab6632be73334,dec,2020,heat,hold,Tonganoxie,676.125000,680.000000,680.000000,10.0,False,False,False


In [187]:
KS_dec.to_csv("Scraper_Output/State_Month_Day/KS/KS_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/KS/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
KS_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/KS/" + file)
    KS_all = pd.concat([KS_all, df])
    
KS_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00d2d0e5e17f6dc9a19aa132916fdc6ddacb983e,aug,2017,cool,hold,Fairway,759.714286,746.342857,745.914286,80.0,False,False,False
1,01a0d7a563a1755d37bb856a01a56e452deb2010,aug,2017,auto,hold,Olathe,753.352660,761.052565,671.215992,30.0,False,False,False
2,025ec7c58b524de74d8986f4896917c31052701a,aug,2017,cool,hold,Overland Park,730.612368,740.000000,720.000000,5.0,False,False,True
3,0448b121ebca96144ab5e5feb5ab88942e98de42,aug,2017,cool,hold,Olathe,733.516129,742.516129,741.741935,10.0,False,False,False
4,050b44b61d052cae8d52e817cc0f2bb78bb91535,aug,2017,auto,auto,Overland Park,728.400000,749.200000,680.000000,55.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3019,fc006352113dcc3ae14023c9e52f72483f64b65e,jun,2021,auto,hold,Arkansas City,732.458333,730.000000,670.000000,100.0,True,False,False
3020,fdf82bf64cb1a67c28f91aa7246724223df57851,jun,2021,auto,hold,Atchison,705.988506,730.689655,680.517241,40.0,False,False,False
3021,fe779904e8eede7395997bd2762cd654983d363a,jun,2021,cool,hold,Andover,752.840278,748.888889,748.888889,0.0,False,False,False
3022,fe84f839012ce8d6db47aa28b3b5d7bafb8ece60,jun,2021,cool,hold,Eastborough,702.469314,696.205776,696.054152,90.0,False,False,False


In [190]:
KS_all.to_csv("Scraper_Output/State_Month_Day/KS_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mKSe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['KS']
Unique jan_2018: ['KS']
Unique jan_2019: ['KS']
Unique jan_2020: ['KS']
Unique jan_2021: ['KS']
Unique feb_2017: ['KS']
Unique feb_2018: ['KS']
Unique feb_2019: ['KS']
Unique feb_2020: ['KS']
Unique feb_2021: ['KS']
Unique jun_2017: ['KS']
Unique jun_2018: ['KS']
Unique jun_2019: ['KS']
Unique jun_2020: ['KS']
Unique jun_2021: ['KS']
Unique jul_2017: ['KS']
Unique jul_2018: ['KS']
Unique jul_2019: ['KS']
Unique jul_2020: ['KS']
Unique jul_2021: ['KS']
Unique aug_2017: ['KS']
Unique aug_2018: ['KS']
Unique aug_2019: ['KS']
Unique aug_2020: ['KS']
Unique dec_2017: ['KS']
Unique dec_2018: ['KS']
Unique dec_2019: ['KS']
Unique dec_2020: ['KS']
