# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/KY-day/2017-jan-day-KY.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,96a4ef74ea4ae0b41c2902ac436f65ef8260bc24,2017-01-07 16:55:00 UTC,heat,auto,605,780,620,KY,Prospect,46,True,False,True,Electric
1,96a4ef74ea4ae0b41c2902ac436f65ef8260bc24,2017-01-23 15:20:00 UTC,heat,hold,673,730,640,KY,Prospect,46,True,False,True,Electric
2,96a4ef74ea4ae0b41c2902ac436f65ef8260bc24,2017-01-06 16:20:00 UTC,heat,auto,628,730,640,KY,Prospect,46,True,False,True,Electric
3,96a4ef74ea4ae0b41c2902ac436f65ef8260bc24,2017-01-11 17:50:00 UTC,heat,auto,633,730,640,KY,Prospect,46,True,False,True,Electric
4,96a4ef74ea4ae0b41c2902ac436f65ef8260bc24,2017-01-06 19:40:00 UTC,heat,auto,630,730,640,KY,Prospect,46,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113212,ed058081234a88c2d304bb87c8c6a24895c46ec0,2017-01-15 14:40:00 UTC,auto,auto,689,807,653,KY,Nicholasville,10,True,False,True,Electric
113213,ed058081234a88c2d304bb87c8c6a24895c46ec0,2017-01-10 18:25:00 UTC,auto,auto,680,730,670,KY,Nicholasville,10,True,False,True,Electric
113214,ed058081234a88c2d304bb87c8c6a24895c46ec0,2017-01-12 18:20:00 UTC,auto,auto,700,740,690,KY,Nicholasville,10,True,False,True,Electric
113215,ed058081234a88c2d304bb87c8c6a24895c46ec0,2017-01-10 19:00:00 UTC,auto,auto,673,730,670,KY,Nicholasville,10,True,False,True,Electric


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
008cfa92481086d7def321eeabb85c0d3eb846de,Jan,2017,heat,auto,Campbellsville,690.518519,707.222222,695.703704,5.0,True,False,True
020ded524bca03fd8503e9676cbd617ae62f12d4,Jan,2017,auto,auto,Louisville,703.571429,740.250000,710.178571,15.0,False,False,False
020ded524bca03fd8503e9676cbd617ae62f12d4,Jan,2017,auto,hold,Louisville,696.992366,741.167939,693.796438,15.0,False,False,False
0475b61636d4d1ba679fd87d374f57927081105b,Jan,2017,heat,auto,Independence,673.257713,771.941924,682.733212,20.0,False,False,False
0475b61636d4d1ba679fd87d374f57927081105b,Jan,2017,heat,hold,Independence,706.769231,750.000000,748.230769,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fb40300d76c790bc649a1c76c50cc653a033bdf2,Jan,2017,cool,auto,Lexington,647.333333,723.333333,659.333333,15.0,False,False,True
fbc6ca24b330ad08582428e1e6c4b41ada6665ba,Jan,2017,heat,auto,Glasgow,703.083333,697.509259,697.268519,60.0,False,False,False
fbc6ca24b330ad08582428e1e6c4b41ada6665ba,Jan,2017,heat,hold,Glasgow,723.048866,723.361838,723.311809,60.0,False,False,False
ff1640c73f967b557d8c64dff6f8139a85e71d38,Jan,2017,heat,auto,Georgetown,715.790960,739.531073,726.785311,15.0,True,False,True


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/KY/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/KY-day/2018-jan-day-KY.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cbdb5db5ce0fcdae40f28f81305bfbbfa747dc4c,2018-01-27 15:40:00 UTC,auto,hold,675,760,680,KY,Crestview Hills,38,True,False,True,Electric
1,d117ee90b026d4dc02dbe36b7e9d1d250274bb5d,2018-01-03 10:20:00 UTC,heat,auto,677,680,680,KY,Mt. Washington,47,True,False,True,Electric
3,56f54fb3fa14bf1c16c79b63de32965c79394dc5,2018-01-06 11:10:00 UTC,auto,auto,691,745,695,KY,Goshen,36,False,False,True,Electric
4,cbdb5db5ce0fcdae40f28f81305bfbbfa747dc4c,2018-01-25 17:25:00 UTC,auto,hold,680,760,680,KY,Crestview Hills,38,True,False,True,Electric
5,cbdb5db5ce0fcdae40f28f81305bfbbfa747dc4c,2018-01-28 14:05:00 UTC,auto,hold,658,760,660,KY,Crestview Hills,38,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
300280,628b89c27b7633e93a2abe430b726e3018266a4a,2018-01-08 10:40:00 UTC,heat,hold,651,670,670,KY,Georgetown,120,False,False,True,Electric
300281,628b89c27b7633e93a2abe430b726e3018266a4a,2018-01-10 17:15:00 UTC,heat,hold,635,650,630,KY,Georgetown,120,False,False,True,Electric
300283,1284a01c8a2c0320c4d7a69983a5e2e33658f3ce,2018-01-02 11:20:00 UTC,heat,hold,644,650,640,KY,Georgetown,120,True,False,True,Electric
300284,628b89c27b7633e93a2abe430b726e3018266a4a,2018-01-08 11:40:00 UTC,heat,hold,662,670,670,KY,Georgetown,120,False,False,True,Electric


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/KY/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/KY-day/2019-jan-day-KY.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3d0e9a4e04c2fee543eef7b52013cf815ae50345,2019-01-05 18:55:00 UTC,auto,hold,694,770,700,KY,Wilmore,118,True,False,True,Electric
1,c2c20f09a15771decb656a0e7fd0be6f529101cd,2019-01-20 16:30:00 UTC,heat,auto,695,690,690,KY,Vine Grove,28,True,False,True,Electric
2,a85176f7816955f77859fb3fbc12843017170f61,2019-01-14 12:15:00 UTC,auto,hold,696,770,700,KY,Newport,110,False,False,False,Gas
3,a85176f7816955f77859fb3fbc12843017170f61,2019-01-27 14:55:00 UTC,auto,hold,719,790,710,KY,Newport,110,False,False,False,Gas
4,00105867bef9463f0a62d5257d0e91b8c8d19dee,2019-01-26 16:45:00 UTC,heat,hold,645,650,640,KY,Southgate,75,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
484766,628b89c27b7633e93a2abe430b726e3018266a4a,2019-01-01 15:15:00 UTC,heat,hold,686,690,690,KY,Georgetown,120,False,False,True,Electric
484767,1284a01c8a2c0320c4d7a69983a5e2e33658f3ce,2019-01-30 11:05:00 UTC,heat,hold,673,670,670,KY,Georgetown,120,True,False,True,Electric
484768,1284a01c8a2c0320c4d7a69983a5e2e33658f3ce,2019-01-24 16:40:00 UTC,heat,hold,693,700,700,KY,Georgetown,120,True,False,True,Electric
484769,628b89c27b7633e93a2abe430b726e3018266a4a,2019-01-24 15:35:00 UTC,heat,hold,683,700,700,KY,Georgetown,120,False,False,True,Electric


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/KY/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/KY-day/2020-jan-day-KY.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a85176f7816955f77859fb3fbc12843017170f61,2020-01-25 14:25:00 UTC,heat,auto,684,700,700,KY,Newport,110,False,False,False,Gas
1,c2c20f09a15771decb656a0e7fd0be6f529101cd,2020-01-18 14:50:00 UTC,heat,hold,685,700,700,KY,Vine Grove,28,True,False,True,Electric
2,c2c20f09a15771decb656a0e7fd0be6f529101cd,2020-01-19 18:00:00 UTC,heat,auto,687,680,680,KY,Vine Grove,28,True,False,True,Electric
3,4f4ffa6020a9cb61a4b227fc6a0768e8fa5e8d22,2020-01-02 19:10:00 UTC,heat,auto,681,680,680,KY,Carrollton,19,False,False,False,Gas
4,c2c20f09a15771decb656a0e7fd0be6f529101cd,2020-01-18 16:05:00 UTC,heat,hold,698,700,700,KY,Vine Grove,28,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
543087,628b89c27b7633e93a2abe430b726e3018266a4a,2020-01-07 12:40:00 UTC,heat,hold,665,670,670,KY,Georgetown,120,False,False,True,Electric
543088,628b89c27b7633e93a2abe430b726e3018266a4a,2020-01-17 13:15:00 UTC,heat,hold,672,680,680,KY,Georgetown,120,False,False,True,Electric
543089,628b89c27b7633e93a2abe430b726e3018266a4a,2020-01-20 19:50:00 UTC,heat,hold,686,700,700,KY,Georgetown,120,False,False,True,Electric
543090,628b89c27b7633e93a2abe430b726e3018266a4a,2020-01-09 17:15:00 UTC,heat,hold,698,700,700,KY,Georgetown,120,False,False,True,Electric


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/KY/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/KY-day/2021-jan-day-KY.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,56f54fb3fa14bf1c16c79b63de32965c79394dc5,2021-01-26 19:15:00 UTC,heat,hold,683,680,680,KY,Goshen,36,False,False,True,Electric
1,56f54fb3fa14bf1c16c79b63de32965c79394dc5,2021-01-22 19:10:00 UTC,heat,hold,677,680,680,KY,Goshen,36,False,False,True,Electric
2,3ddcf8c99249bbefb2b23450d38a8b3f25dc19a7,2021-01-02 17:00:00 UTC,heat,hold,682,680,680,KY,Highlands,100,False,False,False,Gas
3,31ed9378d5a4cc5c6883c1f09c95c15cec33373d,2021-01-20 18:45:00 UTC,heat,hold,690,695,695,KY,Mount Washington,39,False,False,True,Electric
4,00105867bef9463f0a62d5257d0e91b8c8d19dee,2021-01-17 14:50:00 UTC,heat,hold,675,700,700,KY,Southgate,75,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
309051,628b89c27b7633e93a2abe430b726e3018266a4a,2021-01-17 18:35:00 UTC,heat,hold,703,710,710,KY,Georgetown,120,False,False,True,Electric
309052,628b89c27b7633e93a2abe430b726e3018266a4a,2021-01-25 16:00:00 UTC,heat,hold,698,700,700,KY,Georgetown,120,False,False,True,Electric
309053,628b89c27b7633e93a2abe430b726e3018266a4a,2021-01-16 18:15:00 UTC,heat,hold,707,710,710,KY,Georgetown,120,False,False,True,Electric
309054,628b89c27b7633e93a2abe430b726e3018266a4a,2021-01-13 16:50:00 UTC,heat,hold,692,690,690,KY,Georgetown,120,False,False,True,Electric


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/KY/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/KY/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
KY_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/KY/jan/" + file)
    KY_jan = pd.concat([KY_jan, df])
    
KY_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,008cfa92481086d7def321eeabb85c0d3eb846de,Jan,2017,heat,auto,Campbellsville,690.518519,707.222222,695.703704,5.0,True,False,True
1,020ded524bca03fd8503e9676cbd617ae62f12d4,Jan,2017,auto,auto,Louisville,703.571429,740.250000,710.178571,15.0,False,False,False
2,020ded524bca03fd8503e9676cbd617ae62f12d4,Jan,2017,auto,hold,Louisville,696.992366,741.167939,693.796438,15.0,False,False,False
3,0475b61636d4d1ba679fd87d374f57927081105b,Jan,2017,heat,auto,Independence,673.257713,771.941924,682.733212,20.0,False,False,False
4,0475b61636d4d1ba679fd87d374f57927081105b,Jan,2017,heat,hold,Independence,706.769231,750.000000,748.230769,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
369,fbc6ca24b330ad08582428e1e6c4b41ada6665ba,Jan,2021,heat,hold,Glasgow,725.780488,736.926829,734.585366,60.0,False,False,False
370,fbdfee9610a20af314ae084b9e6b9f48c07e0fc2,Jan,2021,heat,hold,Louisville,697.044994,699.695817,699.695817,45.0,False,False,False
371,fca3aecc617d455ff79c8654219bbc7628348f90,Jan,2021,heat,hold,Whitley City,710.460000,716.260000,715.580000,0.0,True,False,False
372,fd49781b1b98414ed0495e079dd5435a549a7a86,Jan,2021,heat,hold,Alexandria,685.934104,690.113873,690.113873,5.0,False,False,True


In [34]:
KY_jan.to_csv("Scraper_Output/State_Month_Day/KY/KY_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/KY-day/2017-feb-day-KY.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,56f54fb3fa14bf1c16c79b63de32965c79394dc5,2017-02-24 11:15:00 UTC,auto,auto,683,715,665,KY,Goshen,36,False,False,True,Electric
1,56f54fb3fa14bf1c16c79b63de32965c79394dc5,2017-02-23 11:30:00 UTC,auto,auto,680,715,665,KY,Goshen,36,False,False,True,Electric
2,56f54fb3fa14bf1c16c79b63de32965c79394dc5,2017-02-23 11:05:00 UTC,auto,auto,680,715,665,KY,Goshen,36,False,False,True,Electric
3,56f54fb3fa14bf1c16c79b63de32965c79394dc5,2017-02-23 11:25:00 UTC,auto,auto,680,715,665,KY,Goshen,36,False,False,True,Electric
4,56f54fb3fa14bf1c16c79b63de32965c79394dc5,2017-02-23 11:45:00 UTC,auto,auto,682,715,665,KY,Goshen,36,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110642,93a99c8c1f855b66361b16e00f1ea69f13216c11,2017-02-16 17:45:00 UTC,auto,hold,695,780,700,KY,Lexington,90,False,False,False,Gas
110643,93a99c8c1f855b66361b16e00f1ea69f13216c11,2017-02-02 19:55:00 UTC,auto,hold,706,780,700,KY,Lexington,90,False,False,False,Gas
110644,93a99c8c1f855b66361b16e00f1ea69f13216c11,2017-02-08 14:35:00 UTC,auto,hold,693,780,680,KY,Lexington,90,False,False,False,Gas
110645,93a99c8c1f855b66361b16e00f1ea69f13216c11,2017-02-22 17:10:00 UTC,auto,hold,713,780,690,KY,Lexington,90,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/KY/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/KY-day/2018-feb-day-KY.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cbdb5db5ce0fcdae40f28f81305bfbbfa747dc4c,2018-02-22 19:20:00 UTC,auto,hold,684,760,680,KY,Crestview Hills,38,True,False,True,Electric
1,3d0e9a4e04c2fee543eef7b52013cf815ae50345,2018-02-23 09:55:00 UTC,auto,hold,675,820,680,KY,Wilmore,118,True,False,True,Electric
2,cbdb5db5ce0fcdae40f28f81305bfbbfa747dc4c,2018-02-13 14:55:00 UTC,heat,auto,690,760,690,KY,Crestview Hills,38,True,False,True,Electric
3,cbdb5db5ce0fcdae40f28f81305bfbbfa747dc4c,2018-02-19 17:15:00 UTC,auto,auto,690,760,690,KY,Crestview Hills,38,True,False,True,Electric
4,3d0e9a4e04c2fee543eef7b52013cf815ae50345,2018-02-04 19:15:00 UTC,auto,hold,668,770,680,KY,Wilmore,118,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
288118,1284a01c8a2c0320c4d7a69983a5e2e33658f3ce,2018-02-10 13:35:00 UTC,heat,hold,628,650,630,KY,Georgetown,120,True,False,True,Electric
288121,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-02-18 18:10:00 UTC,auto,hold,652,780,650,KY,Louisville,120,True,False,False,Gas
288122,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-02-18 14:50:00 UTC,auto,hold,644,780,650,KY,Louisville,120,True,False,False,Gas
288130,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-02-18 19:45:00 UTC,auto,hold,646,780,650,KY,Louisville,120,True,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/KY/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/KY-day/2019-feb-day-KY.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c2c20f09a15771decb656a0e7fd0be6f529101cd,2019-02-22 11:25:00 UTC,heat,auto,684,680,680,KY,Vine Grove,28,True,False,True,Electric
1,a85176f7816955f77859fb3fbc12843017170f61,2019-02-02 15:35:00 UTC,auto,hold,711,780,720,KY,Newport,110,False,False,False,Gas
2,c2c20f09a15771decb656a0e7fd0be6f529101cd,2019-02-24 14:35:00 UTC,heat,auto,676,670,670,KY,Vine Grove,28,True,False,True,Electric
3,a85176f7816955f77859fb3fbc12843017170f61,2019-02-23 16:05:00 UTC,auto,hold,704,770,710,KY,Newport,110,False,False,False,Gas
4,a85176f7816955f77859fb3fbc12843017170f61,2019-02-16 16:10:00 UTC,auto,auto,686,750,690,KY,Newport,110,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
334725,628b89c27b7633e93a2abe430b726e3018266a4a,2019-02-04 10:30:00 UTC,heat,auto,676,680,680,KY,Georgetown,120,False,False,True,Electric
334726,628b89c27b7633e93a2abe430b726e3018266a4a,2019-02-13 11:55:00 UTC,heat,hold,677,680,680,KY,Georgetown,120,False,False,True,Electric
334727,628b89c27b7633e93a2abe430b726e3018266a4a,2019-02-28 16:15:00 UTC,heat,auto,680,680,680,KY,Georgetown,120,False,False,True,Electric
334728,628b89c27b7633e93a2abe430b726e3018266a4a,2019-02-23 18:45:00 UTC,heat,hold,680,680,680,KY,Georgetown,120,False,False,True,Electric


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/KY/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/KY-day/2020-feb-day-KY.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c2c20f09a15771decb656a0e7fd0be6f529101cd,2020-02-20 10:55:00 UTC,heat,auto,674,680,680,KY,Vine Grove,28,True,False,True,Electric
1,a85176f7816955f77859fb3fbc12843017170f61,2020-02-16 15:45:00 UTC,heat,auto,689,710,710,KY,Newport,110,False,False,False,Gas
2,8c1eae3eccaf4af9d9dcaa4e25ddb883d6339700,2020-02-22 16:10:00 UTC,heat,hold,696,700,700,KY,Versailles,39,True,False,True,Electric
3,a85176f7816955f77859fb3fbc12843017170f61,2020-02-02 15:45:00 UTC,heat,auto,653,720,660,KY,Newport,110,False,False,False,Gas
4,3ddcf8c99249bbefb2b23450d38a8b3f25dc19a7,2020-02-25 14:25:00 UTC,heat,hold,648,650,650,KY,Highlands,100,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
491624,628b89c27b7633e93a2abe430b726e3018266a4a,2020-02-11 19:05:00 UTC,heat,hold,677,680,680,KY,Georgetown,120,False,False,True,Electric
491625,628b89c27b7633e93a2abe430b726e3018266a4a,2020-02-02 15:35:00 UTC,heat,hold,697,700,700,KY,Georgetown,120,False,False,True,Electric
491626,1284a01c8a2c0320c4d7a69983a5e2e33658f3ce,2020-02-27 13:00:00 UTC,heat,hold,671,680,680,KY,Georgetown,120,True,False,True,Electric
491627,1284a01c8a2c0320c4d7a69983a5e2e33658f3ce,2020-02-27 16:25:00 UTC,heat,hold,676,680,680,KY,Georgetown,120,True,False,True,Electric


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/KY/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/KY-day/2021-feb-day-KY.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a85176f7816955f77859fb3fbc12843017170f61,2021-02-22 16:10:00 UTC,heat,hold,691,700,700,KY,Newport,110,False,False,False,Gas
1,3ddcf8c99249bbefb2b23450d38a8b3f25dc19a7,2021-02-12 15:20:00 UTC,heat,hold,674,680,680,KY,Highlands,100,False,False,False,Gas
2,3ddcf8c99249bbefb2b23450d38a8b3f25dc19a7,2021-02-08 16:35:00 UTC,heat,hold,675,680,680,KY,Highlands,100,False,False,False,Gas
3,31ed9378d5a4cc5c6883c1f09c95c15cec33373d,2021-02-28 15:20:00 UTC,heat,hold,692,692,692,KY,Mount Washington,39,False,False,True,Electric
4,31ed9378d5a4cc5c6883c1f09c95c15cec33373d,2021-02-26 18:15:00 UTC,heat,hold,678,692,692,KY,Mount Washington,39,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
280109,628b89c27b7633e93a2abe430b726e3018266a4a,2021-02-21 19:00:00 UTC,heat,hold,694,700,700,KY,Georgetown,120,False,False,True,Electric
280110,628b89c27b7633e93a2abe430b726e3018266a4a,2021-02-05 19:30:00 UTC,heat,hold,684,690,690,KY,Georgetown,120,False,False,True,Electric
280111,628b89c27b7633e93a2abe430b726e3018266a4a,2021-02-01 13:15:00 UTC,heat,hold,687,700,700,KY,Georgetown,120,False,False,True,Electric
280112,628b89c27b7633e93a2abe430b726e3018266a4a,2021-02-15 16:00:00 UTC,heat,hold,680,690,690,KY,Georgetown,120,False,False,True,Electric


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/KY/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/KY/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
KY_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/KY/feb/" + file)
    KY_feb = pd.concat([KY_feb, df])
    
KY_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,008cfa92481086d7def321eeabb85c0d3eb846de,feb,2017,auto,auto,Campbellsville,699.600000,699.750000,679.750000,5.0,True,False,True
1,008cfa92481086d7def321eeabb85c0d3eb846de,feb,2017,auto,hold,Campbellsville,698.333333,680.000000,660.000000,5.0,True,False,True
2,008cfa92481086d7def321eeabb85c0d3eb846de,feb,2017,heat,auto,Campbellsville,699.305556,700.166667,699.833333,5.0,True,False,True
3,020ded524bca03fd8503e9676cbd617ae62f12d4,feb,2017,auto,hold,Louisville,697.160171,734.989322,690.764015,15.0,False,False,False
4,0475b61636d4d1ba679fd87d374f57927081105b,feb,2017,heat,auto,Independence,677.318182,771.586364,682.754545,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
384,fbdfee9610a20af314ae084b9e6b9f48c07e0fc2,feb,2021,heat,hold,Louisville,697.073493,700.000000,700.000000,45.0,False,False,False
385,fca3aecc617d455ff79c8654219bbc7628348f90,feb,2021,heat,hold,Whitley City,708.562500,715.500000,714.666667,0.0,True,False,False
386,fd49781b1b98414ed0495e079dd5435a549a7a86,feb,2021,heat,hold,Alexandria,685.110897,687.702025,687.702025,5.0,False,False,True
387,ff1640c73f967b557d8c64dff6f8139a85e71d38,feb,2021,auto,hold,Georgetown,691.000000,730.000000,710.000000,15.0,True,False,True


In [67]:
KY_feb.to_csv("Scraper_Output/State_Month_Day/KY/KY_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/KY-day/2017-jun-day-KY.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
12,a4d27551dc30f97fff14f7a02593970117a80349,2017-06-21 10:20:00 UTC,auto,hold,715,750,650,KY,Union,16,False,False,False,Gas
13,a4d27551dc30f97fff14f7a02593970117a80349,2017-06-18 12:55:00 UTC,auto,hold,769,810,650,KY,Union,16,False,False,False,Gas
14,a4d27551dc30f97fff14f7a02593970117a80349,2017-06-17 19:55:00 UTC,auto,hold,766,810,650,KY,Union,16,False,False,False,Gas
15,a4d27551dc30f97fff14f7a02593970117a80349,2017-06-11 12:40:00 UTC,auto,hold,730,730,650,KY,Union,16,False,False,False,Gas
16,a4d27551dc30f97fff14f7a02593970117a80349,2017-06-10 14:00:00 UTC,auto,hold,709,730,650,KY,Union,16,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197830,262c461792e30453d14b0e95ebe729512187b0f4,2017-06-18 11:05:00 UTC,cool,hold,724,720,720,KY,Louisville,90,False,False,False,Gas
197831,441f78bdc893c61cc6bf174cf95fea1fe597fc91,2017-06-09 11:00:00 UTC,cool,hold,700,740,740,KY,Louisville,90,False,False,False,Gas
197832,262c461792e30453d14b0e95ebe729512187b0f4,2017-06-19 17:00:00 UTC,cool,auto,752,760,720,KY,Louisville,90,False,False,False,Gas
197833,262c461792e30453d14b0e95ebe729512187b0f4,2017-06-08 18:10:00 UTC,cool,hold,712,729,729,KY,Louisville,90,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/KY/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/KY-day/2018-jun-day-KY.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cbdb5db5ce0fcdae40f28f81305bfbbfa747dc4c,2018-06-19 11:05:00 UTC,cool,hold,722,720,720,KY,Crestview Hills,38,True,False,True,Electric
1,d117ee90b026d4dc02dbe36b7e9d1d250274bb5d,2018-06-18 17:05:00 UTC,cool,auto,726,720,720,KY,Mt. Washington,47,True,False,True,Electric
2,d117ee90b026d4dc02dbe36b7e9d1d250274bb5d,2018-06-02 16:35:00 UTC,cool,hold,709,700,700,KY,Mt. Washington,47,True,False,True,Electric
3,a85176f7816955f77859fb3fbc12843017170f61,2018-06-16 18:05:00 UTC,auto,auto,681,680,620,KY,Newport,110,False,False,False,Gas
4,cbdb5db5ce0fcdae40f28f81305bfbbfa747dc4c,2018-06-23 18:25:00 UTC,cool,hold,701,700,700,KY,Crestview Hills,38,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
426855,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-06-15 12:00:00 UTC,auto,hold,711,712,662,KY,Louisville,120,True,False,False,Gas
426856,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-06-16 12:30:00 UTC,auto,hold,717,712,662,KY,Louisville,120,True,False,False,Gas
426857,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-06-22 17:00:00 UTC,auto,hold,715,712,662,KY,Louisville,120,True,False,False,Gas
426858,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-06-16 17:40:00 UTC,auto,hold,716,712,662,KY,Louisville,120,True,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/KY/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/KY-day/2019-jun-day-KY.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6afa8c18218f1a5d0f4532476084ce250c75141f,2019-06-17 12:30:00 UTC,cool,hold,664,660,660,KY,Shepherdsville,10,False,False,False,Gas
1,f32b0994d982668680970ad64e06e3ab916520e9,2019-06-20 16:55:00 UTC,cool,hold,718,740,740,KY,Shepherdsville,10,True,False,False,Gas
2,6afa8c18218f1a5d0f4532476084ce250c75141f,2019-06-23 12:05:00 UTC,cool,auto,670,670,670,KY,Shepherdsville,10,False,False,False,Gas
3,6afa8c18218f1a5d0f4532476084ce250c75141f,2019-06-28 11:50:00 UTC,cool,auto,681,680,680,KY,Shepherdsville,10,False,False,False,Gas
4,f32b0994d982668680970ad64e06e3ab916520e9,2019-06-29 09:20:00 UTC,cool,hold,700,700,700,KY,Shepherdsville,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
589736,6afa8c18218f1a5d0f4532476084ce250c75141f,2019-06-02 11:45:00 UTC,cool,auto,675,670,670,KY,Shepherdsville,10,False,False,False,Gas
589737,f32b0994d982668680970ad64e06e3ab916520e9,2019-06-16 07:30:00 UTC,cool,auto,741,740,735,KY,Shepherdsville,10,True,False,False,Gas
589738,6afa8c18218f1a5d0f4532476084ce250c75141f,2019-06-23 13:50:00 UTC,cool,auto,675,670,670,KY,Shepherdsville,10,False,False,False,Gas
589739,f32b0994d982668680970ad64e06e3ab916520e9,2019-06-09 17:20:00 UTC,cool,hold,704,700,700,KY,Shepherdsville,10,True,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/KY/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/KY-day/2020-jun-day-KY.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c2c20f09a15771decb656a0e7fd0be6f529101cd,2020-06-25 11:30:00 UTC,cool,hold,681,690,690,KY,Vine Grove,28,True,False,True,Electric
1,00105867bef9463f0a62d5257d0e91b8c8d19dee,2020-06-17 11:00:00 UTC,cool,hold,697,700,700,KY,Southgate,75,False,False,False,Gas
2,0c63b911c56082695f9c7416781cd37dbcad2262,2020-06-30 14:55:00 UTC,auto,auto,751,760,690,KY,Murray,58,False,False,False,Gas
3,3ddcf8c99249bbefb2b23450d38a8b3f25dc19a7,2020-06-06 11:25:00 UTC,cool,auto,728,730,730,KY,Highlands,100,False,False,False,Gas
4,c2c20f09a15771decb656a0e7fd0be6f529101cd,2020-06-03 11:25:00 UTC,cool,auto,690,690,640,KY,Vine Grove,28,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
575740,628b89c27b7633e93a2abe430b726e3018266a4a,2020-06-16 16:30:00 UTC,cool,hold,708,740,740,KY,Georgetown,120,False,False,True,Electric
575741,628b89c27b7633e93a2abe430b726e3018266a4a,2020-06-12 14:10:00 UTC,cool,hold,727,730,730,KY,Georgetown,120,False,False,True,Electric
575742,628b89c27b7633e93a2abe430b726e3018266a4a,2020-06-28 16:35:00 UTC,cool,hold,737,740,740,KY,Georgetown,120,False,False,True,Electric
575743,628b89c27b7633e93a2abe430b726e3018266a4a,2020-06-26 17:50:00 UTC,cool,hold,730,730,730,KY,Georgetown,120,False,False,True,Electric


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/KY/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/KY-day/2021-jun-day-KY.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,56f54fb3fa14bf1c16c79b63de32965c79394dc5,2021-06-19 16:10:00 UTC,cool,hold,690,675,675,KY,Goshen,36,False,False,True,Electric
1,00105867bef9463f0a62d5257d0e91b8c8d19dee,2021-06-05 13:15:00 UTC,cool,hold,704,705,705,KY,Southgate,75,False,False,False,Gas
2,36e5a77bf644b37f395da007bc371ed9bcc4160b,2021-06-06 17:50:00 UTC,cool,hold,719,720,720,KY,Benton,19,False,False,True,Electric
3,d61a5a18aab25a00f0f96411957106752a9b4c2e,2021-06-07 19:05:00 UTC,cool,hold,709,700,700,KY,Salyersville,39,True,False,True,Electric
4,a85176f7816955f77859fb3fbc12843017170f61,2021-06-07 18:15:00 UTC,auto,hold,699,700,650,KY,Newport,110,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
347500,628b89c27b7633e93a2abe430b726e3018266a4a,2021-06-15 15:40:00 UTC,cool,hold,742,760,760,KY,Georgetown,120,False,False,True,Electric
347501,628b89c27b7633e93a2abe430b726e3018266a4a,2021-06-20 17:45:00 UTC,cool,hold,755,760,760,KY,Georgetown,120,False,False,True,Electric
347502,628b89c27b7633e93a2abe430b726e3018266a4a,2021-06-24 15:05:00 UTC,cool,hold,708,760,760,KY,Georgetown,120,False,False,True,Electric
347503,628b89c27b7633e93a2abe430b726e3018266a4a,2021-06-19 11:30:00 UTC,cool,hold,750,760,760,KY,Georgetown,120,False,False,True,Electric


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/KY/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/KY/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
KY_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/KY/jun/" + file)
    KY_jun = pd.concat([KY_jun, df])
    
KY_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,008cfa92481086d7def321eeabb85c0d3eb846de,jun,2017,cool,auto,Campbellsville,700.013423,694.348993,680.000000,5.0,True,False,True
1,008cfa92481086d7def321eeabb85c0d3eb846de,jun,2017,cool,hold,Campbellsville,702.457399,695.493274,695.430493,5.0,True,False,True
2,020ded524bca03fd8503e9676cbd617ae62f12d4,jun,2017,auto,auto,Louisville,700.947368,705.000000,645.000000,15.0,False,False,False
3,020ded524bca03fd8503e9676cbd617ae62f12d4,jun,2017,auto,hold,Louisville,694.773554,704.347107,642.375207,15.0,False,False,False
4,0475b61636d4d1ba679fd87d374f57927081105b,jun,2017,auto,auto,Independence,750.932258,774.785484,661.664516,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
422,fd49781b1b98414ed0495e079dd5435a549a7a86,jun,2021,auto,hold,Alexandria,701.587084,696.886497,643.115460,5.0,False,False,True
423,fd49781b1b98414ed0495e079dd5435a549a7a86,jun,2021,heat,hold,Alexandria,703.504950,688.287129,688.287129,5.0,False,False,True
424,fefca5ecf57c50c2f337a1de4577dae3d020f2cd,jun,2021,cool,hold,Lexington,729.707865,720.415730,719.842697,8.0,True,False,True
425,ff1640c73f967b557d8c64dff6f8139a85e71d38,jun,2021,auto,hold,Georgetown,691.604651,686.279070,646.674419,15.0,True,False,True


In [100]:
KY_jun.to_csv("Scraper_Output/State_Month_Day/KY/KY_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/KY-day/2017-jul-day-KY.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,00105867bef9463f0a62d5257d0e91b8c8d19dee,2017-07-09 19:00:00 UTC,cool,hold,715,700,700,KY,Southgate,75,False,False,False,Gas
5,890efba3b0690140b7849184f0b02a28012d6b9a,2017-07-28 11:20:00 UTC,cool,auto,700,720,740,KY,Ashland,85,False,False,False,Gas
6,890efba3b0690140b7849184f0b02a28012d6b9a,2017-07-23 12:10:00 UTC,cool,hold,744,730,730,KY,Ashland,85,False,False,False,Gas
7,890efba3b0690140b7849184f0b02a28012d6b9a,2017-07-30 15:40:00 UTC,cool,auto,693,690,740,KY,Ashland,85,False,False,False,Gas
8,890efba3b0690140b7849184f0b02a28012d6b9a,2017-07-27 11:00:00 UTC,cool,auto,683,680,740,KY,Ashland,85,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
234023,441f78bdc893c61cc6bf174cf95fea1fe597fc91,2017-07-05 18:00:00 UTC,cool,hold,748,750,750,KY,Louisville,90,False,False,False,Gas
234024,262c461792e30453d14b0e95ebe729512187b0f4,2017-07-02 15:25:00 UTC,cool,hold,726,720,720,KY,Louisville,90,False,False,False,Gas
234025,7eb6c3424f78bab4592372fd0a430b52146e201e,2017-07-18 15:25:00 UTC,cool,hold,734,730,730,KY,Louisville,90,False,False,False,Gas
234026,441f78bdc893c61cc6bf174cf95fea1fe597fc91,2017-07-20 12:45:00 UTC,cool,auto,734,730,740,KY,Louisville,90,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/KY/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/KY-day/2018-jul-day-KY.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cbdb5db5ce0fcdae40f28f81305bfbbfa747dc4c,2018-07-29 10:45:00 UTC,cool,hold,699,700,700,KY,Crestview Hills,38,True,False,True,Electric
1,cbdb5db5ce0fcdae40f28f81305bfbbfa747dc4c,2018-07-29 10:50:00 UTC,cool,hold,699,700,700,KY,Crestview Hills,38,True,False,True,Electric
2,cbdb5db5ce0fcdae40f28f81305bfbbfa747dc4c,2018-07-21 12:30:00 UTC,cool,hold,702,700,700,KY,Crestview Hills,38,True,False,True,Electric
3,cbdb5db5ce0fcdae40f28f81305bfbbfa747dc4c,2018-07-12 12:50:00 UTC,cool,auto,710,710,680,KY,Crestview Hills,38,True,False,True,Electric
4,d117ee90b026d4dc02dbe36b7e9d1d250274bb5d,2018-07-09 10:45:00 UTC,cool,auto,727,730,730,KY,Mt. Washington,47,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
490679,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-07-04 16:10:00 UTC,auto,hold,730,722,672,KY,Louisville,120,True,False,False,Gas
490680,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-07-04 14:25:00 UTC,auto,hold,727,722,672,KY,Louisville,120,True,False,False,Gas
490681,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-07-02 16:50:00 UTC,auto,hold,741,722,672,KY,Louisville,120,True,False,False,Gas
490682,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-07-03 14:40:00 UTC,auto,hold,724,722,672,KY,Louisville,120,True,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/KY/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/KY-day/2019-jul-day-KY.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c2c20f09a15771decb656a0e7fd0be6f529101cd,2019-07-30 15:00:00 UTC,cool,auto,695,690,690,KY,Vine Grove,28,True,False,True,Electric
1,0f739392cb73489df16d6bd5cce782aa26047ea0,2019-07-20 13:05:00 UTC,cool,auto,710,710,710,KY,Hickman,85,False,False,False,Gas
2,c2c20f09a15771decb656a0e7fd0be6f529101cd,2019-07-16 14:05:00 UTC,cool,auto,691,690,690,KY,Vine Grove,28,True,False,True,Electric
3,c2c20f09a15771decb656a0e7fd0be6f529101cd,2019-07-16 13:25:00 UTC,cool,auto,696,690,690,KY,Vine Grove,28,True,False,True,Electric
4,0f739392cb73489df16d6bd5cce782aa26047ea0,2019-07-13 12:35:00 UTC,cool,auto,731,730,720,KY,Hickman,85,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
622258,628b89c27b7633e93a2abe430b726e3018266a4a,2019-07-19 16:55:00 UTC,cool,auto,740,740,740,KY,Georgetown,120,False,False,True,Electric
622259,628b89c27b7633e93a2abe430b726e3018266a4a,2019-07-26 09:45:00 UTC,cool,hold,733,740,740,KY,Georgetown,120,False,False,True,Electric
622260,628b89c27b7633e93a2abe430b726e3018266a4a,2019-07-20 14:15:00 UTC,cool,hold,736,740,740,KY,Georgetown,120,False,False,True,Electric
622261,628b89c27b7633e93a2abe430b726e3018266a4a,2019-07-22 16:35:00 UTC,cool,hold,755,750,750,KY,Georgetown,120,False,False,True,Electric


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/KY/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/KY-day/2020-jul-day-KY.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,3ddcf8c99249bbefb2b23450d38a8b3f25dc19a7,2020-07-05 10:55:00 UTC,cool,hold,738,760,760,KY,Highlands,100,False,False,False,Gas
2,3ddcf8c99249bbefb2b23450d38a8b3f25dc19a7,2020-07-01 19:00:00 UTC,auto,auto,719,710,620,KY,Highlands,100,False,False,False,Gas
3,d61a5a18aab25a00f0f96411957106752a9b4c2e,2020-07-22 17:40:00 UTC,cool,hold,705,700,700,KY,Salyersville,39,True,False,True,Electric
4,31ed9378d5a4cc5c6883c1f09c95c15cec33373d,2020-07-26 14:05:00 UTC,cool,auto,687,690,690,KY,Mount Washington,39,False,False,True,Electric
5,c2c20f09a15771decb656a0e7fd0be6f529101cd,2020-07-03 13:55:00 UTC,cool,auto,679,680,630,KY,Vine Grove,28,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595532,628b89c27b7633e93a2abe430b726e3018266a4a,2020-07-27 10:05:00 UTC,cool,hold,740,740,740,KY,Georgetown,120,False,False,True,Electric
595533,628b89c27b7633e93a2abe430b726e3018266a4a,2020-07-28 11:55:00 UTC,cool,hold,736,740,740,KY,Georgetown,120,False,False,True,Electric
595534,628b89c27b7633e93a2abe430b726e3018266a4a,2020-07-31 17:15:00 UTC,cool,hold,718,720,720,KY,Georgetown,120,False,False,True,Electric
595535,628b89c27b7633e93a2abe430b726e3018266a4a,2020-07-11 10:30:00 UTC,cool,hold,728,730,730,KY,Georgetown,120,False,False,True,Electric


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/KY/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/KY-day/2021-jul-day-KY.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3ddcf8c99249bbefb2b23450d38a8b3f25dc19a7,2021-07-01 16:05:00 UTC,cool,hold,705,710,710,KY,Highlands,100,False,False,False,Gas
1,a85176f7816955f77859fb3fbc12843017170f61,2021-07-29 17:45:00 UTC,cool,hold,705,680,680,KY,Newport,110,False,False,False,Gas
2,3ddcf8c99249bbefb2b23450d38a8b3f25dc19a7,2021-07-13 17:40:00 UTC,cool,hold,749,720,720,KY,Highlands,100,False,False,False,Gas
3,3ddcf8c99249bbefb2b23450d38a8b3f25dc19a7,2021-07-14 13:00:00 UTC,cool,hold,718,720,720,KY,Highlands,100,False,False,False,Gas
4,d117ee90b026d4dc02dbe36b7e9d1d250274bb5d,2021-07-21 16:10:00 UTC,cool,hold,746,750,750,KY,Mt. Washington,47,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
318631,628b89c27b7633e93a2abe430b726e3018266a4a,2021-07-13 19:00:00 UTC,cool,hold,716,720,720,KY,Georgetown,120,False,False,True,Electric
318632,628b89c27b7633e93a2abe430b726e3018266a4a,2021-07-01 16:10:00 UTC,cool,hold,742,770,770,KY,Georgetown,120,False,False,True,Electric
318633,628b89c27b7633e93a2abe430b726e3018266a4a,2021-07-13 17:30:00 UTC,cool,hold,728,720,720,KY,Georgetown,120,False,False,True,Electric
318634,628b89c27b7633e93a2abe430b726e3018266a4a,2021-07-04 17:20:00 UTC,cool,hold,736,770,770,KY,Georgetown,120,False,False,True,Electric


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/KY/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/KY/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
KY_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/KY/jul/" + file)
    KY_jul = pd.concat([KY_jul, df])
    
KY_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00105867bef9463f0a62d5257d0e91b8c8d19dee,jul,2017,cool,hold,Southgate,715.631579,700.000000,700.000000,75.0,False,False,False
1,008cfa92481086d7def321eeabb85c0d3eb846de,jul,2017,cool,auto,Campbellsville,706.532468,700.058442,680.000000,5.0,True,False,True
2,008cfa92481086d7def321eeabb85c0d3eb846de,jul,2017,cool,hold,Campbellsville,704.132353,695.573529,695.573529,5.0,True,False,True
3,0475b61636d4d1ba679fd87d374f57927081105b,jul,2017,auto,auto,Independence,763.537634,780.000000,660.000000,20.0,False,False,False
4,06949103a84504053d0e29bef4cb3451160997f0,jul,2017,auto,auto,Louisville,674.350877,665.000000,615.000000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
377,fd49781b1b98414ed0495e079dd5435a549a7a86,jul,2021,auto,hold,Alexandria,692.630926,689.433409,638.193002,5.0,False,False,True
378,fd8aaf4038f451870fc23ab85245f4ca4a83a354,jul,2021,cool,hold,Lexington,666.745665,667.560694,666.693642,18.0,False,False,True
379,fefca5ecf57c50c2f337a1de4577dae3d020f2cd,jul,2021,cool,hold,Lexington,734.782609,726.000000,726.000000,8.0,True,False,True
380,ff1640c73f967b557d8c64dff6f8139a85e71d38,jul,2021,auto,hold,Georgetown,724.770115,724.793103,657.011494,15.0,True,False,True


In [133]:
KY_jul.to_csv("Scraper_Output/State_Month_Day/KY/KY_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/KY-day/2017-aug-day-KY.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,00105867bef9463f0a62d5257d0e91b8c8d19dee,2017-08-26 18:40:00 UTC,cool,hold,727,700,700,KY,Southgate,75,False,False,False,Gas
2,00105867bef9463f0a62d5257d0e91b8c8d19dee,2017-08-26 19:20:00 UTC,cool,hold,723,700,700,KY,Southgate,75,False,False,False,Gas
3,00105867bef9463f0a62d5257d0e91b8c8d19dee,2017-08-26 17:40:00 UTC,cool,hold,744,700,700,KY,Southgate,75,False,False,False,Gas
5,00105867bef9463f0a62d5257d0e91b8c8d19dee,2017-08-26 17:50:00 UTC,cool,hold,741,700,700,KY,Southgate,75,False,False,False,Gas
6,00105867bef9463f0a62d5257d0e91b8c8d19dee,2017-08-26 18:10:00 UTC,cool,hold,733,700,700,KY,Southgate,75,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
240547,df93077f04f2403ad7e0a12bce9f526332cc3cc1,2017-08-28 17:10:00 UTC,cool,auto,722,720,730,KY,Louisville,90,False,False,False,Gas
240548,7eb6c3424f78bab4592372fd0a430b52146e201e,2017-08-24 11:50:00 UTC,cool,hold,725,730,730,KY,Louisville,90,False,False,False,Gas
240549,7eb6c3424f78bab4592372fd0a430b52146e201e,2017-08-09 18:00:00 UTC,cool,hold,735,730,730,KY,Louisville,90,False,False,False,Gas
240550,441f78bdc893c61cc6bf174cf95fea1fe597fc91,2017-08-18 10:30:00 UTC,cool,hold,732,730,730,KY,Louisville,90,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/KY/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/KY-day/2018-aug-day-KY.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cbdb5db5ce0fcdae40f28f81305bfbbfa747dc4c,2018-08-18 19:15:00 UTC,cool,hold,701,700,700,KY,Crestview Hills,38,True,False,True,Electric
1,c2c20f09a15771decb656a0e7fd0be6f529101cd,2018-08-20 12:05:00 UTC,auto,hold,710,709,650,KY,Vine Grove,28,True,False,True,Electric
2,c2c20f09a15771decb656a0e7fd0be6f529101cd,2018-08-28 11:30:00 UTC,auto,hold,699,700,650,KY,Vine Grove,28,True,False,True,Electric
3,a85176f7816955f77859fb3fbc12843017170f61,2018-08-12 14:55:00 UTC,auto,hold,702,700,640,KY,Newport,110,False,False,False,Gas
4,cbdb5db5ce0fcdae40f28f81305bfbbfa747dc4c,2018-08-08 18:50:00 UTC,cool,hold,783,780,780,KY,Crestview Hills,38,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
461153,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-08-08 16:45:00 UTC,auto,hold,747,742,672,KY,Louisville,120,True,False,False,Gas
461154,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-08-08 10:55:00 UTC,auto,hold,744,742,672,KY,Louisville,120,True,False,False,Gas
461155,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-08-08 19:10:00 UTC,auto,hold,743,742,672,KY,Louisville,120,True,False,False,Gas
461156,af074e178c199f0fff346054a7f6c3114bc27bb0,2018-08-08 17:25:00 UTC,auto,hold,742,742,672,KY,Louisville,120,True,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/KY/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/KY-day/2019-aug-day-KY.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3ddcf8c99249bbefb2b23450d38a8b3f25dc19a7,2019-08-03 17:15:00 UTC,cool,auto,735,730,730,KY,Highlands,100,False,False,False,Gas
1,c2c20f09a15771decb656a0e7fd0be6f529101cd,2019-08-07 16:05:00 UTC,cool,auto,705,700,700,KY,Vine Grove,28,True,False,True,Electric
2,c2c20f09a15771decb656a0e7fd0be6f529101cd,2019-08-09 16:10:00 UTC,cool,auto,702,700,700,KY,Vine Grove,28,True,False,True,Electric
3,4f4ffa6020a9cb61a4b227fc6a0768e8fa5e8d22,2019-08-18 18:45:00 UTC,cool,auto,733,730,730,KY,Carrollton,19,False,False,False,Gas
4,c2c20f09a15771decb656a0e7fd0be6f529101cd,2019-08-30 10:55:00 UTC,cool,hold,694,700,700,KY,Vine Grove,28,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
598819,628b89c27b7633e93a2abe430b726e3018266a4a,2019-08-21 12:50:00 UTC,cool,hold,749,750,750,KY,Georgetown,120,False,False,True,Electric
598820,628b89c27b7633e93a2abe430b726e3018266a4a,2019-08-12 09:45:00 UTC,cool,hold,747,750,750,KY,Georgetown,120,False,False,True,Electric
598821,628b89c27b7633e93a2abe430b726e3018266a4a,2019-08-09 19:15:00 UTC,cool,hold,741,740,740,KY,Georgetown,120,False,False,True,Electric
598822,628b89c27b7633e93a2abe430b726e3018266a4a,2019-08-10 11:55:00 UTC,cool,hold,744,750,750,KY,Georgetown,120,False,False,True,Electric


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/KY/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/KY-day/2020-aug-day-KY.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c2c20f09a15771decb656a0e7fd0be6f529101cd,2020-08-05 12:15:00 UTC,cool,hold,689,690,690,KY,Vine Grove,28,True,False,True,Electric
1,31ed9378d5a4cc5c6883c1f09c95c15cec33373d,2020-08-28 15:20:00 UTC,cool,auto,702,710,710,KY,Mount Washington,39,False,False,True,Electric
2,c2c20f09a15771decb656a0e7fd0be6f529101cd,2020-08-17 15:35:00 UTC,cool,auto,679,680,640,KY,Vine Grove,28,True,False,True,Electric
3,a85176f7816955f77859fb3fbc12843017170f61,2020-08-05 13:00:00 UTC,cool,auto,722,740,680,KY,Newport,110,False,False,False,Gas
4,6c752ab573d305a9436acfd7f1d2d90027644b4f,2020-08-26 11:50:00 UTC,cool,hold,696,690,690,KY,LOUISVILLE,95,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
576438,628b89c27b7633e93a2abe430b726e3018266a4a,2020-08-23 11:35:00 UTC,cool,auto,721,740,740,KY,Georgetown,120,False,False,True,Electric
576439,628b89c27b7633e93a2abe430b726e3018266a4a,2020-08-29 18:00:00 UTC,cool,hold,724,720,720,KY,Georgetown,120,False,False,True,Electric
576440,628b89c27b7633e93a2abe430b726e3018266a4a,2020-08-03 19:15:00 UTC,cool,hold,727,730,730,KY,Georgetown,120,False,False,True,Electric
576441,628b89c27b7633e93a2abe430b726e3018266a4a,2020-08-21 10:45:00 UTC,cool,auto,720,720,720,KY,Georgetown,120,False,False,True,Electric


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/KY/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/KY/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
KY_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/KY/aug/" + file)
    KY_aug = pd.concat([KY_aug, df])
    
KY_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00105867bef9463f0a62d5257d0e91b8c8d19dee,aug,2017,cool,hold,Southgate,728.285714,700.000000,700.000000,75.0,False,False,False
1,008cfa92481086d7def321eeabb85c0d3eb846de,aug,2017,cool,auto,Campbellsville,702.335294,699.223529,680.000000,5.0,True,False,True
2,008cfa92481086d7def321eeabb85c0d3eb846de,aug,2017,cool,hold,Campbellsville,706.809160,700.091603,699.908397,5.0,True,False,True
3,06e700cffb486061579c6d328729f3a60e9092a3,aug,2017,cool,auto,louisville,719.000000,740.000000,690.000000,30.0,False,True,True
4,06e700cffb486061579c6d328729f3a60e9092a3,aug,2017,cool,hold,louisville,725.250000,750.000000,750.000000,30.0,False,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
708,fd49781b1b98414ed0495e079dd5435a549a7a86,aug,2020,cool,hold,Alexandria,703.346320,697.038961,697.038961,5.0,False,False,True
709,fd8aaf4038f451870fc23ab85245f4ca4a83a354,aug,2020,cool,auto,Lexington,681.000000,690.000000,663.000000,18.0,False,False,True
710,fd8aaf4038f451870fc23ab85245f4ca4a83a354,aug,2020,cool,hold,Lexington,678.763158,682.868421,680.500000,18.0,False,False,True
711,ff1640c73f967b557d8c64dff6f8139a85e71d38,aug,2020,auto,hold,Georgetown,728.826531,736.326531,656.020408,15.0,True,False,True


In [160]:
KY_aug.to_csv("Scraper_Output/State_Month_Day/KY/KY_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/KY-day/2017-dec-day-KY.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,00105867bef9463f0a62d5257d0e91b8c8d19dee,2017-12-30 19:25:00 UTC,heat,auto,624,650,630,KY,Southgate,75,False,False,False,Gas
1,d117ee90b026d4dc02dbe36b7e9d1d250274bb5d,2017-12-09 19:05:00 UTC,heat,auto,721,710,710,KY,Mt. Washington,47,True,False,True,Electric
2,d117ee90b026d4dc02dbe36b7e9d1d250274bb5d,2017-12-20 11:35:00 UTC,heat,auto,698,690,690,KY,Mt. Washington,47,True,False,True,Electric
3,d117ee90b026d4dc02dbe36b7e9d1d250274bb5d,2017-12-17 17:25:00 UTC,heat,hold,710,700,700,KY,Mt. Washington,47,True,False,True,Electric
4,d117ee90b026d4dc02dbe36b7e9d1d250274bb5d,2017-12-04 16:40:00 UTC,auto,auto,699,745,695,KY,Mt. Washington,47,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299943,1284a01c8a2c0320c4d7a69983a5e2e33658f3ce,2017-12-05 18:35:00 UTC,heat,hold,683,680,680,KY,Georgetown,120,True,False,True,Electric
299944,1284a01c8a2c0320c4d7a69983a5e2e33658f3ce,2017-12-08 10:40:00 UTC,heat,auto,686,680,680,KY,Georgetown,120,True,False,True,Electric
299945,1284a01c8a2c0320c4d7a69983a5e2e33658f3ce,2017-12-21 15:30:00 UTC,heat,hold,678,680,680,KY,Georgetown,120,True,False,True,Electric
299946,1284a01c8a2c0320c4d7a69983a5e2e33658f3ce,2017-12-13 19:05:00 UTC,heat,hold,672,680,680,KY,Georgetown,120,True,False,True,Electric


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/KY/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/KY-day/2018-dec-day-KY.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d117ee90b026d4dc02dbe36b7e9d1d250274bb5d,2018-12-01 16:30:00 UTC,heat,auto,675,780,620,KY,Mt. Washington,47,True,False,True,Electric
1,c2c20f09a15771decb656a0e7fd0be6f529101cd,2018-12-08 16:00:00 UTC,heat,hold,691,700,700,KY,Vine Grove,28,True,False,True,Electric
2,0c63b911c56082695f9c7416781cd37dbcad2262,2018-12-10 13:40:00 UTC,auto,auto,690,780,690,KY,Murray,58,False,False,False,Gas
3,a85176f7816955f77859fb3fbc12843017170f61,2018-12-29 14:50:00 UTC,auto,hold,691,770,690,KY,Newport,110,False,False,False,Gas
4,c2c20f09a15771decb656a0e7fd0be6f529101cd,2018-12-09 16:55:00 UTC,heat,hold,693,700,700,KY,Vine Grove,28,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
461274,1284a01c8a2c0320c4d7a69983a5e2e33658f3ce,2018-12-16 12:20:00 UTC,heat,auto,688,700,700,KY,Georgetown,120,True,False,True,Electric
461275,1284a01c8a2c0320c4d7a69983a5e2e33658f3ce,2018-12-31 18:00:00 UTC,heat,hold,687,680,680,KY,Georgetown,120,True,False,True,Electric
461276,628b89c27b7633e93a2abe430b726e3018266a4a,2018-12-03 17:10:00 UTC,heat,auto,675,680,680,KY,Georgetown,120,False,False,True,Electric
461277,628b89c27b7633e93a2abe430b726e3018266a4a,2018-12-24 15:10:00 UTC,heat,hold,691,700,700,KY,Georgetown,120,False,False,True,Electric


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/KY/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/KY-day/2019-dec-day-KY.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8c1eae3eccaf4af9d9dcaa4e25ddb883d6339700,2019-12-21 19:05:00 UTC,heat,hold,694,670,670,KY,Versailles,39,True,False,True,Electric
1,a85176f7816955f77859fb3fbc12843017170f61,2019-12-01 15:30:00 UTC,heat,auto,692,700,700,KY,Newport,110,False,False,False,Gas
2,3ddcf8c99249bbefb2b23450d38a8b3f25dc19a7,2019-12-14 13:45:00 UTC,heat,hold,627,650,629,KY,Highlands,100,False,False,False,Gas
3,3ddcf8c99249bbefb2b23450d38a8b3f25dc19a7,2019-12-12 12:10:00 UTC,heat,hold,643,650,649,KY,Highlands,100,False,False,False,Gas
4,31ed9378d5a4cc5c6883c1f09c95c15cec33373d,2019-12-19 14:00:00 UTC,heat,hold,679,700,700,KY,Mount Washington,39,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
543801,628b89c27b7633e93a2abe430b726e3018266a4a,2019-12-31 16:20:00 UTC,heat,hold,697,700,700,KY,Georgetown,120,False,False,True,Electric
543802,628b89c27b7633e93a2abe430b726e3018266a4a,2019-12-14 15:30:00 UTC,heat,hold,693,700,700,KY,Georgetown,120,False,False,True,Electric
543803,628b89c27b7633e93a2abe430b726e3018266a4a,2019-12-20 16:15:00 UTC,heat,hold,675,680,680,KY,Georgetown,120,False,False,True,Electric
543804,628b89c27b7633e93a2abe430b726e3018266a4a,2019-12-16 15:05:00 UTC,heat,hold,679,680,680,KY,Georgetown,120,False,False,True,Electric


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/KY/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/KY-day/2020-dec-day-KY.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,31ed9378d5a4cc5c6883c1f09c95c15cec33373d,2020-12-20 13:20:00 UTC,heat,hold,672,695,695,KY,Mount Washington,39,False,False,True,Electric
1,c2c20f09a15771decb656a0e7fd0be6f529101cd,2020-12-02 11:35:00 UTC,heat,auto,682,750,690,KY,Vine Grove,28,True,False,True,Electric
2,56f54fb3fa14bf1c16c79b63de32965c79394dc5,2020-12-06 15:30:00 UTC,heat,hold,694,697,697,KY,Goshen,36,False,False,True,Electric
3,31ed9378d5a4cc5c6883c1f09c95c15cec33373d,2020-12-14 15:20:00 UTC,heat,auto,687,700,700,KY,Mount Washington,39,False,False,True,Electric
4,3ddcf8c99249bbefb2b23450d38a8b3f25dc19a7,2020-12-02 17:45:00 UTC,heat,hold,691,690,690,KY,Highlands,100,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
465169,628b89c27b7633e93a2abe430b726e3018266a4a,2020-12-25 19:45:00 UTC,heat,hold,712,710,710,KY,Georgetown,120,False,False,True,Electric
465170,628b89c27b7633e93a2abe430b726e3018266a4a,2020-12-06 18:55:00 UTC,heat,hold,690,700,700,KY,Georgetown,120,False,False,True,Electric
465171,628b89c27b7633e93a2abe430b726e3018266a4a,2020-12-20 14:45:00 UTC,heat,hold,705,710,710,KY,Georgetown,120,False,False,True,Electric
465172,628b89c27b7633e93a2abe430b726e3018266a4a,2020-12-13 12:15:00 UTC,heat,hold,672,683,683,KY,Georgetown,120,False,False,True,Electric


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/KY/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/KY/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
KY_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/KY/dec/" + file)
    KY_dec = pd.concat([KY_dec, df])
    
KY_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00105867bef9463f0a62d5257d0e91b8c8d19dee,dec,2017,heat,auto,Southgate,649.070588,650.000000,643.176471,75.0,False,False,False
1,00105867bef9463f0a62d5257d0e91b8c8d19dee,dec,2017,heat,hold,Southgate,676.944444,672.111111,668.222222,75.0,False,False,False
2,003e168f638289edc59e7df30f5a63ff7bae7a2e,dec,2017,heat,auto,Science Hill,720.093750,722.968750,723.562500,5.0,True,False,True
3,003e168f638289edc59e7df30f5a63ff7bae7a2e,dec,2017,heat,hold,Science Hill,713.600000,706.550000,707.350000,5.0,True,False,True
4,008cfa92481086d7def321eeabb85c0d3eb846de,dec,2017,heat,auto,Campbellsville,661.038462,661.384615,661.230769,5.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
698,fd8aaf4038f451870fc23ab85245f4ca4a83a354,dec,2020,auto,auto,Lexington,605.981132,631.264151,610.415094,18.0,False,False,True
699,fd8aaf4038f451870fc23ab85245f4ca4a83a354,dec,2020,auto,hold,Lexington,623.116279,720.000000,615.162791,18.0,False,False,True
700,fefca5ecf57c50c2f337a1de4577dae3d020f2cd,dec,2020,heat,auto,Lexington,728.101227,740.235583,679.871166,8.0,True,False,True
701,ff1640c73f967b557d8c64dff6f8139a85e71d38,dec,2020,auto,hold,Georgetown,681.409709,732.732039,686.102913,15.0,True,False,True


In [187]:
KY_dec.to_csv("Scraper_Output/State_Month_Day/KY/KY_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/KY/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
KY_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/KY/" + file)
    KY_all = pd.concat([KY_all, df])
    
KY_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00105867bef9463f0a62d5257d0e91b8c8d19dee,aug,2017,cool,hold,Southgate,728.285714,700.000000,700.000000,75.0,False,False,False
1,008cfa92481086d7def321eeabb85c0d3eb846de,aug,2017,cool,auto,Campbellsville,702.335294,699.223529,680.000000,5.0,True,False,True
2,008cfa92481086d7def321eeabb85c0d3eb846de,aug,2017,cool,hold,Campbellsville,706.809160,700.091603,699.908397,5.0,True,False,True
3,06e700cffb486061579c6d328729f3a60e9092a3,aug,2017,cool,auto,louisville,719.000000,740.000000,690.000000,30.0,False,True,True
4,06e700cffb486061579c6d328729f3a60e9092a3,aug,2017,cool,hold,louisville,725.250000,750.000000,750.000000,30.0,False,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2927,fd49781b1b98414ed0495e079dd5435a549a7a86,jun,2021,auto,hold,Alexandria,701.587084,696.886497,643.115460,5.0,False,False,True
2928,fd49781b1b98414ed0495e079dd5435a549a7a86,jun,2021,heat,hold,Alexandria,703.504950,688.287129,688.287129,5.0,False,False,True
2929,fefca5ecf57c50c2f337a1de4577dae3d020f2cd,jun,2021,cool,hold,Lexington,729.707865,720.415730,719.842697,8.0,True,False,True
2930,ff1640c73f967b557d8c64dff6f8139a85e71d38,jun,2021,auto,hold,Georgetown,691.604651,686.279070,646.674419,15.0,True,False,True


In [190]:
KY_all.to_csv("Scraper_Output/State_Month_Day/KY_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mKYe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['KY']
Unique jan_2018: ['KY']
Unique jan_2019: ['KY']
Unique jan_2020: ['KY']
Unique jan_2021: ['KY']
Unique feb_2017: ['KY']
Unique feb_2018: ['KY']
Unique feb_2019: ['KY']
Unique feb_2020: ['KY']
Unique feb_2021: ['KY']
Unique jun_2017: ['KY']
Unique jun_2018: ['KY']
Unique jun_2019: ['KY']
Unique jun_2020: ['KY']
Unique jun_2021: ['KY']
Unique jul_2017: ['KY']
Unique jul_2018: ['KY']
Unique jul_2019: ['KY']
Unique jul_2020: ['KY']
Unique jul_2021: ['KY']
Unique aug_2017: ['KY']
Unique aug_2018: ['KY']
Unique aug_2019: ['KY']
Unique aug_2020: ['KY']
Unique dec_2017: ['KY']
Unique dec_2018: ['KY']
Unique dec_2019: ['KY']
Unique dec_2020: ['KY']
