# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/OH-day/2017-jan-day-OH.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,80284b61e03d5549598520c1638b1df28725275d,2017-01-01 11:50:00 UTC,heat,hold,686,680,680,OH,Barberton,100,False,False,False,Gas
1,4d03087a9e3ec9aea793811b5b18a51ee2e9105c,2017-01-10 13:35:00 UTC,heat,auto,636,640,640,OH,Akron,0,True,False,False,Gas
2,62fa780fbd9270a5e5f44566877c2268164103f8,2017-01-07 19:00:00 UTC,heat,hold,700,700,700,OH,Greenville,65,False,False,False,Gas
3,896eac8a5abb0d5eafdeef9aebf557aa01f2db27,2017-01-21 19:35:00 UTC,heat,hold,672,659,686,OH,Grove City,15,False,False,False,Gas
4,2c7458fa73fa31803c17e4afb79e341bf6c05f78,2017-01-15 15:35:00 UTC,heat,auto,685,690,690,OH,Worthington,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
380931,fe3606accf6709a05203758e35c4d4f3ad47cd8a,2017-01-09 18:45:00 UTC,heat,auto,641,650,640,OH,Cincinnati,115,False,False,False,Gas
380932,fe3606accf6709a05203758e35c4d4f3ad47cd8a,2017-01-11 15:50:00 UTC,heat,hold,663,650,640,OH,Cincinnati,115,False,False,False,Gas
380933,fe3606accf6709a05203758e35c4d4f3ad47cd8a,2017-01-24 17:05:00 UTC,heat,hold,652,650,650,OH,Cincinnati,115,False,False,False,Gas
380934,fe3606accf6709a05203758e35c4d4f3ad47cd8a,2017-01-11 15:05:00 UTC,heat,hold,668,650,640,OH,Cincinnati,115,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
009c68a7b26c494e4ebacfae61ef65b502eea795,Jan,2017,heat,auto,Worthington,670.000000,781.000000,652.000000,55.0,False,False,False
011b43c5a16016373499bcec61b788772b0d7b50,Jan,2017,heat,auto,Saint Bernard,714.527378,715.636888,715.636888,110.0,False,False,False
011b43c5a16016373499bcec61b788772b0d7b50,Jan,2017,heat,hold,Saint Bernard,717.884687,717.938004,717.938004,110.0,False,False,False
016e7f0b54c10c0156112e069c90dedec6be0afa,Jan,2017,heat,auto,Columbus,656.258065,779.709677,610.677419,45.0,False,False,False
016e7f0b54c10c0156112e069c90dedec6be0afa,Jan,2017,heat,hold,Columbus,689.434783,700.739130,699.695652,45.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fe3606accf6709a05203758e35c4d4f3ad47cd8a,Jan,2017,heat,hold,Cincinnati,667.637993,667.245878,663.488889,115.0,False,False,False
fe3f33fd0cd9f3230a6558a6afe6ee11101dde70,Jan,2017,heat,hold,Avon,703.476190,706.317460,705.365079,0.0,True,False,False
ff2621b99a533a2fdf32c4aa63e8d7dc3907f35a,Jan,2017,auto,auto,Clyde,637.607143,710.000000,634.773810,60.0,False,False,False
ff343b1c3e538fb14818836cd9b5cb9796af1af5,Jan,2017,heat,auto,Dayton,688.426829,690.085366,690.085366,15.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/OH/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/OH-day/2018-jan-day-OH.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0df8706d8417531484d8ad82c0f198775481d7b8,2018-01-07 18:05:00 UTC,heat,hold,776,675,675,OH,Chardon,6,False,False,False,Gas
1,a7fbbb09ebc1569ad4d23c226db968de77d0bd31,2018-01-24 14:35:00 UTC,auto,hold,736,785,735,OH,New Philadelphia,35,False,False,False,Gas
2,221d242d12b21c5ad6329cb44dfd47a5253c912f,2018-01-14 19:40:00 UTC,heat,hold,690,695,695,OH,Cleveland heights,0,True,False,False,Gas
3,f9fe7774bc9e750737ae5ee5e4e72e8212206a24,2018-01-29 10:25:00 UTC,heat,hold,680,679,679,OH,Pepper Pike,30,False,False,False,Gas
4,eaa838ee68527adc30409737bea8b4639b75729f,2018-01-23 14:55:00 UTC,heat,auto,737,733,733,OH,Columbus,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
898650,1621d5218d23b849fb1105a74b54d8e25f6dfaef,2018-01-17 16:10:00 UTC,auto,auto,656,760,660,OH,Columbus,5,False,False,False,Gas
898651,cad0f83cfc16638b1dd5cb32e8ad389e1e8a04a6,2018-01-25 11:50:00 UTC,auto,hold,676,760,680,OH,Canal Winchester,18,False,False,False,Gas
898652,900132fbc2d5215844524e66ca4b8c2b02b1bf7c,2018-01-09 16:20:00 UTC,heat,auto,698,760,700,OH,Dublin,10,False,False,True,Electric
898653,623e67bd6798ab5a76f9c95d9512c10cb993207e,2018-01-14 17:50:00 UTC,auto,hold,685,760,690,OH,Cincinnati,117,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/OH/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/OH-day/2019-jan-day-OH.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b2e25758f03c4bf68b8314b63f8d66ee14ffb5c4,2019-01-04 17:55:00 UTC,heat,hold,769,737,770,OH,Columbus,40,True,False,False,Gas
1,b2e25758f03c4bf68b8314b63f8d66ee14ffb5c4,2019-01-23 13:35:00 UTC,heat,hold,748,737,750,OH,Columbus,40,True,False,False,Gas
2,9ccef5a4260e584b4e40f1e3ea6f0d4b6e24f754,2019-01-17 15:55:00 UTC,auto,hold,711,775,725,OH,Powell,15,False,False,False,Gas
3,96ba197b72498eba5ed3b4b8e2e16b94f5409098,2019-01-18 18:35:00 UTC,auto,auto,725,775,725,OH,Wheelersburg,0,True,False,True,Electric
4,6a7ef1ea49bfe85c33c98284d27e15b56cc04b60,2019-01-23 19:45:00 UTC,auto,hold,704,755,695,OH,Kent,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1692508,b8d4e6d969350a452070a280344d1d3d83472999,2019-01-23 10:45:00 UTC,auto,hold,695,760,700,OH,Cincinnati,70,False,False,False,Gas
1692509,e79ce48976b37605b3476cc035103dfafdbecd21,2019-01-31 12:40:00 UTC,heat,hold,754,760,760,OH,Silver Lake,0,False,False,False,Gas
1692510,6d4b27215d130980cc6a100b8ec97b2fe6bc2155,2019-01-16 15:50:00 UTC,heat,auto,747,760,760,OH,Pataskala,20,True,False,False,Gas
1692511,b013b5c434f1e0b39450e27a80a6cf75f8028ddd,2019-01-11 18:45:00 UTC,auto,hold,709,760,710,OH,Lake Milton,0,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/OH/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/OH-day/2020-jan-day-OH.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,12d18d15c296b6d7f4847c6dbcfa90c19bc3224d,2020-01-04 19:35:00 UTC,heat,hold,680,685,685,OH,Arlington,9,True,False,False,Gas
1,1c3107c4a07fad34c7e49cee1438f67d775f34e3,2020-01-16 14:40:00 UTC,heat,hold,707,709,709,OH,Parma,60,True,False,False,Gas
2,573424176639b83053179b88d792954548fb7ea6,2020-01-06 18:30:00 UTC,heat,hold,713,723,723,OH,Plain City,0,True,False,False,Gas
3,9ca13e364ddac522b569bac959e27029c1cedde0,2020-01-03 11:50:00 UTC,auto,hold,665,722,670,OH,Plain City,0,False,False,False,Gas
4,0ac1b2d451e76db496bff3147115017b85f8cb1c,2020-01-21 17:25:00 UTC,auto,auto,700,772,697,OH,Columbus,30,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1813374,20cda76cb274e1ee3bcb804281a1994341deb3b7,2020-01-13 19:25:00 UTC,auto,auto,679,760,680,OH,New Albany,0,False,False,True,Electric
1813375,55154379d445742cee34122bfa856217852d6dae,2020-01-28 13:55:00 UTC,heat,auto,696,760,700,OH,Columbus,55,False,False,False,Gas
1813376,55154379d445742cee34122bfa856217852d6dae,2020-01-23 09:50:00 UTC,heat,hold,686,760,680,OH,Columbus,55,False,False,False,Gas
1813377,70b85ce083f1efae9e9d40f99feeab38ac58f507,2020-01-04 11:45:00 UTC,auto,hold,699,760,700,OH,Clayton,65,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/OH/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/OH-day/2021-jan-day-OH.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,311a6a24e918582bbfd47e11313ac9cf1a78851f,2021-01-23 18:40:00 UTC,heat,hold,641,641,641,OH,Groveport,0,True,False,False,Gas
1,33ca5c8fa4f78b557b27693aed64dbc558f655d5,2021-01-04 15:25:00 UTC,auto,hold,724,788,726,OH,University Heights,70,False,False,False,Gas
3,464fab203a2e49eff4a1df1359f4699647384115,2021-01-05 16:15:00 UTC,heat,hold,703,703,703,OH,Columbia Station,5,False,False,True,Electric
4,5d632e9aa10c3020726b0eab554781fc388ff757,2021-01-18 17:15:00 UTC,heat,hold,654,630,630,OH,Brecksville,79,True,False,False,Gas
5,f54ba177a31735d8cfd26bcf39d586d799a49394,2021-01-31 15:20:00 UTC,auto,hold,692,835,695,OH,Strongsville,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1083472,54c47043d91026d0142704ce3d79515d023019d9,2021-01-18 13:15:00 UTC,auto,hold,676,760,680,OH,Miamisburg,10,False,False,False,Gas
1083473,4bffaa083be900e4874ccd92653597981185c141,2021-01-17 16:15:00 UTC,auto,hold,689,760,690,OH,Mentor,5,True,False,False,Gas
1083474,95d6c5e3a7253f2a9a1d1eaab9e4c869d13a5db7,2021-01-29 13:00:00 UTC,auto,hold,675,760,680,OH,Northfield,68,False,False,False,Gas
1083475,55154379d445742cee34122bfa856217852d6dae,2021-01-02 13:30:00 UTC,heat,hold,651,760,660,OH,Columbus,55,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/OH/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/OH/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
OH_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/OH/jan/" + file)
    OH_jan = pd.concat([OH_jan, df])
    
OH_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,009c68a7b26c494e4ebacfae61ef65b502eea795,Jan,2017,heat,auto,Worthington,670.000000,781.000000,652.000000,55.0,False,False,False
1,011b43c5a16016373499bcec61b788772b0d7b50,Jan,2017,heat,auto,Saint Bernard,714.527378,715.636888,715.636888,110.0,False,False,False
2,011b43c5a16016373499bcec61b788772b0d7b50,Jan,2017,heat,hold,Saint Bernard,717.884687,717.938004,717.938004,110.0,False,False,False
3,016e7f0b54c10c0156112e069c90dedec6be0afa,Jan,2017,heat,auto,Columbus,656.258065,779.709677,610.677419,45.0,False,False,False
4,016e7f0b54c10c0156112e069c90dedec6be0afa,Jan,2017,heat,hold,Columbus,689.434783,700.739130,699.695652,45.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1279,fec504f45d44ded4ff359b64d2bb70a1a019f345,Jan,2021,heat,hold,Fairborn,685.741379,691.387931,691.318966,45.0,True,False,False
1280,fee0f7b8328e5e8dafc178dcea1fb6685d4372ff,Jan,2021,heat,hold,Mansfield,702.527830,705.736875,705.736875,0.0,False,False,False
1281,ff35d6fd6ca8dbb43a99e3efae22030989e2fb6b,Jan,2021,heat,hold,Toledo,724.158537,730.054878,729.890244,0.0,False,False,False
1282,ffb9ae558cdffd5abb2322361fd731c6ff75bd78,Jan,2021,heat,hold,Columbus,685.630137,698.945205,698.945205,0.0,False,False,False


In [34]:
OH_jan.to_csv("Scraper_Output/State_Month_Day/OH/OH_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/OH-day/2017-feb-day-OH.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6d4b27215d130980cc6a100b8ec97b2fe6bc2155,2017-02-28 17:25:00 UTC,heat,auto,691,720,690,OH,Pataskala,20,True,False,False,Gas
1,2a1515b0a5dd04d318a296946be1c7ce2bd06ddf,2017-02-10 19:50:00 UTC,heat,hold,665,670,670,OH,New Albany,25,False,False,False,Gas
2,84857b85942f30ee5c91cdc37df7b3447783d927,2017-02-08 19:35:00 UTC,heat,auto,693,690,690,OH,Warren,20,False,False,False,Gas
3,84c217250a6abbf8900a420ef693055e48e88d0b,2017-02-12 18:40:00 UTC,heat,auto,671,670,670,OH,West Chester,40,True,False,True,Electric
4,6d24e4a2ae6bdae593224e742aea8969515e9c43,2017-02-12 17:35:00 UTC,heat,hold,637,650,630,OH,Marysville,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
337720,fe3606accf6709a05203758e35c4d4f3ad47cd8a,2017-02-11 18:05:00 UTC,heat,hold,690,680,680,OH,Cincinnati,115,False,False,False,Gas
337721,fe3606accf6709a05203758e35c4d4f3ad47cd8a,2017-02-12 18:35:00 UTC,heat,auto,705,700,700,OH,Cincinnati,115,False,False,False,Gas
337722,fe3606accf6709a05203758e35c4d4f3ad47cd8a,2017-02-20 12:25:00 UTC,heat,hold,653,680,680,OH,Cincinnati,115,False,False,False,Gas
337723,fe3606accf6709a05203758e35c4d4f3ad47cd8a,2017-02-28 15:20:00 UTC,heat,hold,680,680,680,OH,Cincinnati,115,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/OH/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/OH-day/2018-feb-day-OH.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,94ddac7cd73f9c4691fe231ef7665ee09fd6e985,2018-02-10 18:35:00 UTC,auto,hold,675,840,680,OH,Findlay,40,False,False,False,Gas
1,e55c141bbc0d842083d223e75d25a29d2c72647a,2018-02-09 16:30:00 UTC,heat,hold,666,665,665,OH,Chardon,6,False,False,False,Gas
2,175735f454c6f7739fefb6dfba73217450545a40,2018-02-01 10:50:00 UTC,heat,hold,710,715,715,OH,Columbus,69,True,False,False,Gas
3,3e4f91678686e55efb1c3f959d0e2ae3e77dc029,2018-02-04 16:55:00 UTC,auto,hold,694,715,695,OH,Dublin,5,False,False,False,Gas
4,24f892e6a5ae933d8bab077356bb682b44fdec23,2018-02-01 11:05:00 UTC,heat,hold,743,630,630,OH,Cincinnati,20,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
845192,dd61c8f5309598aab20a10dd1f9c195883f7ad26,2018-02-18 16:15:00 UTC,auto,hold,709,760,710,OH,Canfield,50,False,False,False,Gas
845193,9001027bb0338929f43ca74f7ed16fb3a9eb189b,2018-02-20 12:55:00 UTC,auto,auto,710,760,700,OH,Lancaster,27,False,False,False,Gas
845194,6eec3548ab9aa89bc38491edc2364f0ec36cb491,2018-02-20 18:55:00 UTC,auto,hold,731,760,700,OH,Liberty Township,10,False,False,False,Gas
845195,6d56ddd1c2fc5015ff2e70d18d15bda7111ad019,2018-02-02 12:15:00 UTC,heat,hold,754,760,760,OH,Columbus,60,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/OH/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/OH-day/2019-feb-day-OH.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,63c5b58ff22cec6533fc978e34f014423c7f15bb,2019-02-28 13:30:00 UTC,heat,hold,689,685,685,OH,Springfield,40,False,False,False,Gas
1,c3360d2bd9bf79b07b0f6f9fbb72110efcf2fb5b,2019-02-22 16:30:00 UTC,heat,hold,719,719,719,OH,North Royalton,0,False,False,False,Gas
2,e51c0222888ea3f787b1006bf8612c07d7a78293,2019-02-25 12:35:00 UTC,heat,hold,654,729,670,OH,Cincinnati,10,True,False,True,Electric
3,2de0253bf4cd7105ea87f815860df50544f4d6b6,2019-02-05 11:40:00 UTC,heat,hold,685,689,689,OH,Columbus,70,False,False,False,Gas
4,cdbab601a551d3f07e58ffec0a81a6f5f133ea98,2019-02-13 17:25:00 UTC,auto,hold,689,813,653,OH,Hamilton,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1176501,6b3e59f39b725c428d638f327956fda1253c10d7,2019-02-13 18:15:00 UTC,auto,hold,699,765,705,OH,Lisbon,0,True,False,True,Electric
1176502,7e1b47f479db0b6c67b1bb1d9ccb945caf77398f,2019-02-21 19:45:00 UTC,auto,hold,704,765,705,OH,Columbus,10,False,False,False,Gas
1176503,7e1b47f479db0b6c67b1bb1d9ccb945caf77398f,2019-02-27 13:10:00 UTC,auto,hold,708,765,705,OH,Columbus,10,False,False,False,Gas
1176504,04bf4b129a5e320dbb00db5170dc0040a2ac9f0c,2019-02-25 14:25:00 UTC,auto,hold,710,765,715,OH,Oxford,5,False,False,True,Electric


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/OH/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/OH-day/2020-feb-day-OH.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e3620660402328ff25728ea4311648e453d05f2a,2020-02-07 18:30:00 UTC,heat,hold,700,705,705,OH,McDonald,100,False,False,False,Gas
1,97f8fae10c08dc1fd970ee3ae38178299e5611b9,2020-02-09 07:15:00 UTC,heat,hold,672,674,674,OH,North Canton,15,True,False,False,Gas
2,24f892e6a5ae933d8bab077356bb682b44fdec23,2020-02-24 09:25:00 UTC,heat,auto,720,764,704,OH,Cincinnati,20,True,False,False,Gas
3,aab92214721d41aa3f5760873353ef2d5d096aa2,2020-02-27 13:40:00 UTC,heat,hold,667,672,672,OH,North Ridgeville,0,False,False,False,Gas
4,cb2327097b3f4df248da91aa6dc7b3659ac96f96,2020-02-06 17:20:00 UTC,heat,auto,720,725,725,OH,Columbus,80,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1654677,1f52554603f12d496fc139ebba95e1d01be7f7f4,2020-02-06 18:10:00 UTC,auto,auto,700,760,700,OH,Springboro,0,False,False,False,Gas
1654678,55154379d445742cee34122bfa856217852d6dae,2020-02-09 17:55:00 UTC,heat,auto,701,760,710,OH,Columbus,55,False,False,False,Gas
1654679,b816dad77e7d102dbae8bab80756f8dbf4728b7e,2020-02-10 13:30:00 UTC,auto,auto,697,760,700,OH,Barberton,0,False,False,False,Gas
1654680,921f69e62f95da37485f02d1db7a7eb0589421b6,2020-02-15 13:00:00 UTC,auto,hold,698,760,700,OH,Upper Arlington,45,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/OH/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/OH-day/2021-feb-day-OH.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e799d377e5952c301b55e7ad34636a008ffb6257,2021-02-27 14:50:00 UTC,heat,hold,634,630,630,OH,broadview hts,50,False,False,False,Gas
1,f54ba177a31735d8cfd26bcf39d586d799a49394,2021-02-13 19:30:00 UTC,auto,hold,695,835,695,OH,Strongsville,30,False,False,False,Gas
2,aab92214721d41aa3f5760873353ef2d5d096aa2,2021-02-01 16:40:00 UTC,heat,hold,718,712,712,OH,North Ridgeville,0,False,False,False,Gas
3,1e95fd8ed77f3dc21aa85dc573ba686f5490de89,2021-02-18 15:50:00 UTC,auto,hold,674,732,682,OH,Dayton,45,True,False,True,Electric
4,3756c4c08950b45f277fe65d28f01ea3f9540702,2021-02-21 17:45:00 UTC,heat,hold,725,719,729,OH,Parma,49,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
950468,70b85ce083f1efae9e9d40f99feeab38ac58f507,2021-02-03 12:40:00 UTC,heat,hold,764,760,760,OH,Clayton,65,False,False,False,Gas
950469,4c8d984c2c30340dc7f12130c0eef8ea9e28201c,2021-02-09 14:50:00 UTC,auto,hold,698,760,710,OH,Powell,25,False,False,False,Gas
950470,4aa426d28dda7701935cc73194c2cec4eccbb7b1,2021-02-09 13:20:00 UTC,auto,hold,698,760,700,OH,Bellbrook,5,True,False,False,Gas
950471,4aa426d28dda7701935cc73194c2cec4eccbb7b1,2021-02-28 17:50:00 UTC,auto,hold,699,760,700,OH,Bellbrook,5,True,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/OH/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/OH/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
OH_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/OH/feb/" + file)
    OH_feb = pd.concat([OH_feb, df])
    
OH_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,009c68a7b26c494e4ebacfae61ef65b502eea795,feb,2017,cool,hold,Worthington,681.000000,650.000000,650.000000,55.0,False,False,False
1,009c68a7b26c494e4ebacfae61ef65b502eea795,feb,2017,heat,hold,Worthington,685.068966,678.724138,678.724138,55.0,False,False,False
2,011b43c5a16016373499bcec61b788772b0d7b50,feb,2017,heat,auto,Saint Bernard,711.294658,711.483630,711.483630,110.0,False,False,False
3,011b43c5a16016373499bcec61b788772b0d7b50,feb,2017,heat,hold,Saint Bernard,709.849336,707.630757,707.630757,110.0,False,False,False
4,016e7f0b54c10c0156112e069c90dedec6be0afa,feb,2017,heat,auto,Columbus,669.392857,779.000000,612.214286,45.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1212,fed9e3b26b40cf465b48adbb5240a68a28d57dfd,feb,2021,auto,hold,Columbus,672.205882,735.000000,675.000000,20.0,False,False,False
1213,fee0f7b8328e5e8dafc178dcea1fb6685d4372ff,feb,2021,heat,hold,Mansfield,711.616726,715.240569,715.245907,0.0,False,False,False
1214,ff35d6fd6ca8dbb43a99e3efae22030989e2fb6b,feb,2021,heat,hold,Toledo,725.733333,730.000000,730.000000,0.0,False,False,False
1215,ff35df97caa10f2dab17f309525c11d9f9ea2f22,feb,2021,heat,hold,Chagrin Falls,715.000000,684.000000,674.000000,10.0,False,False,False


In [67]:
OH_feb.to_csv("Scraper_Output/State_Month_Day/OH/OH_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/OH-day/2017-jun-day-OH.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,67f6c95b705d8bf7e95553f87143cb6a9e7edf7f,2017-06-01 17:00:00 UTC,cool,hold,695,710,710,OH,New Albany,0,False,False,False,Gas
1,2fd00106b7aaee28c57b2f19fbe85cce6c4ae316,2017-06-29 12:10:00 UTC,cool,hold,680,675,675,OH,Toledo,65,False,False,False,Gas
2,471bc45347168c4b2c8018ed3e413d577e914f4c,2017-06-23 11:40:00 UTC,cool,hold,734,760,730,OH,Columbus,120,False,False,False,Gas
3,c34dbfe5eaefca8019db79eb7a6d2f3f69d094a0,2017-06-29 17:50:00 UTC,cool,auto,742,740,740,OH,Walton Hills,25,False,False,False,Gas
4,af0a9f8e7d0744eecb023e32c7e50a1630ef194f,2017-06-07 18:05:00 UTC,heat,hold,694,670,670,OH,Gnadenhutten,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
533889,fddaeba6b916a3b0ebf90a8ad6aff597eebf1862,2017-06-15 17:25:00 UTC,auto,auto,745,740,640,OH,Cincinnati,0,False,False,False,Gas
533890,fddaeba6b916a3b0ebf90a8ad6aff597eebf1862,2017-06-04 15:30:00 UTC,cool,hold,739,730,730,OH,Cincinnati,0,False,False,False,Gas
533891,fddaeba6b916a3b0ebf90a8ad6aff597eebf1862,2017-06-18 18:30:00 UTC,auto,hold,760,760,640,OH,Cincinnati,0,False,False,False,Gas
533892,fddaeba6b916a3b0ebf90a8ad6aff597eebf1862,2017-06-18 14:55:00 UTC,auto,hold,762,760,640,OH,Cincinnati,0,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/OH/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/OH-day/2018-jun-day-OH.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c3107d08ad2c49384298802ffe55168bb4cd894b,2018-06-01 11:10:00 UTC,cool,auto,733,783,735,OH,Brecksville,60,False,False,False,Gas
1,1514efc93d4ab6fdab0c26a1cd04900977383d72,2018-06-24 19:00:00 UTC,auto,auto,750,750,655,OH,Columbus,50,False,False,False,Gas
2,e57876a557aae9cc75b09b18e146952e57a5c1fa,2018-06-03 18:45:00 UTC,auto,hold,725,723,673,OH,Copley,40,False,False,False,Gas
3,b85794524caf3789cbd05f58863703b051bfd430,2018-06-27 16:00:00 UTC,auto,hold,716,725,675,OH,Shaker Heights,80,False,False,False,Gas
4,a755f3ccb0094232655c829653d9c2a0f520f30f,2018-06-24 17:15:00 UTC,auto,hold,721,725,675,OH,Struthers,65,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1277310,29ac5a9b4ef25cba0725f515af75f84b9c00e910,2018-06-26 14:05:00 UTC,cool,hold,747,760,760,OH,Cincinnati,10,True,False,False,Gas
1277311,84ff482e5caf916ec2272816ca8141e76384d5a5,2018-06-30 18:40:00 UTC,cool,auto,764,760,760,OH,Lakewood,90,False,False,False,Gas
1277312,fe3606accf6709a05203758e35c4d4f3ad47cd8a,2018-06-16 16:00:00 UTC,cool,hold,762,760,760,OH,Cincinnati,115,False,False,False,Gas
1277313,a4efa2f2dde746408a61d9aaa63a469b5a9825a4,2018-06-19 12:05:00 UTC,cool,hold,762,760,760,OH,Lewis Center,5,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/OH/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/OH-day/2019-jun-day-OH.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bd6cad2bfa36dc25c8403c4200433d253269f158,2019-06-14 15:35:00 UTC,auto,hold,748,757,677,OH,Wooster,35,True,False,True,Electric
1,903bc79c396aae816137e4a9405c0d19122e3723,2019-06-28 10:35:00 UTC,cool,auto,716,710,673,OH,Wadsworth,8,False,False,False,Gas
2,1b687b348356dc9c6148ea48cd2c52d67862b10f,2019-06-01 15:55:00 UTC,auto,hold,710,720,610,OH,Heath,40,False,False,False,Gas
3,306806ddbcfe52d070787d741e09cd4c67fc3c69,2019-06-01 19:00:00 UTC,cool,hold,734,748,684,OH,commercial point,15,False,False,False,Gas
4,7e1b47f479db0b6c67b1bb1d9ccb945caf77398f,2019-06-18 11:35:00 UTC,cool,hold,708,705,705,OH,Columbus,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1883448,d66aefc6f2cb719a6ed3258d59add7d30cbf1f76,2019-06-28 14:15:00 UTC,cool,auto,763,760,760,OH,Columbus,10,False,False,False,Gas
1883449,85e613bd17cb70626b1920551edad9c35696a2e7,2019-06-18 18:10:00 UTC,cool,hold,705,760,760,OH,Swanton,15,False,False,False,Gas
1883450,58c5e6ee753ebac81189bdaa25c07d199c2f3272,2019-06-23 17:15:00 UTC,cool,auto,784,800,760,OH,Pataskala,20,False,False,False,Gas
1883451,6da179f977b52899bd6a7d9dcbd516237bfd7957,2019-06-07 11:45:00 UTC,cool,auto,748,750,760,OH,Sweetwater,30,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/OH/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/OH-day/2020-jun-day-OH.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a41ced239935715cfc1f67aa2799d19292ff36dc,2020-06-11 12:30:00 UTC,cool,hold,742,749,749,OH,Cincinnati,40,False,False,False,Gas
1,fb847842ee1a025b9577d4be7f325601b1041563,2020-06-24 16:40:00 UTC,cool,auto,737,750,696,OH,Medina,0,True,False,False,Gas
2,e2feab072846f2c7a77601163bb43b1b434b9a37,2020-06-16 12:35:00 UTC,cool,hold,730,735,735,OH,Huber Heights,60,False,False,True,Electric
3,5d696899ed693079eff7e4aecd098bf5d8561133,2020-06-01 17:40:00 UTC,auto,hold,686,770,644,OH,Columbus,10,True,False,False,Gas
5,6c554bb37415799c6e716cf50b837033321a079a,2020-06-05 15:25:00 UTC,auto,hold,731,717,657,OH,Cincinnati,95,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1887614,7ed378dd1f06909eeaea239282158a797c4bbef4,2020-06-04 14:50:00 UTC,cool,hold,756,760,760,OH,Cincinnati,35,True,False,False,Gas
1887615,dfa80e67d12707669249aad19e06f40ca6b0e32b,2020-06-23 12:30:00 UTC,cool,hold,761,760,760,OH,Cincinnati,77,False,False,False,Gas
1887616,ab915ab60c82a79a0677ad9d788bb05b6f9e1d32,2020-06-09 15:00:00 UTC,cool,auto,765,760,760,OH,Dublin,10,True,False,False,Gas
1887617,c27cc911bcf2537ba38a872d129f35343cfba248,2020-06-11 15:05:00 UTC,cool,hold,744,760,760,OH,Wooster,15,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/OH/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/OH-day/2021-jun-day-OH.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4f670188fa30ea473fbe240808478827a2e3860a,2021-06-29 17:05:00 UTC,cool,hold,666,665,665,OH,,0,False,False,False,Gas
1,a26f6b9b3cc9502bbb52336d02264d7ede4366d4,2021-06-09 14:40:00 UTC,auto,hold,720,717,667,OH,Hilliard,15,False,False,False,Gas
3,63af810e8a1c560b9cb2c79fc60fee8b5c47e4ae,2021-06-13 14:50:00 UTC,cool,hold,692,687,687,OH,Galloway,50,False,False,False,Gas
4,6c1f9b6116f131884ca88e2c3649b6f248108621,2021-06-03 15:05:00 UTC,auto,hold,702,699,649,OH,Dublin,0,False,False,False,Gas
5,a2e684582faf67e5e325e05a9c9cbb9037178bb5,2021-06-02 14:25:00 UTC,cool,hold,715,692,692,OH,Highland Heights,5,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1065820,6b633807ed8c6b61f5f2654f9c3793097ff9e326,2021-06-16 11:25:00 UTC,cool,hold,728,760,760,OH,Lewis Center,20,False,False,False,Gas
1065821,10065cc2a678c00428084c38b0fdd80f5f7c25ce,2021-06-07 12:05:00 UTC,cool,hold,760,760,760,OH,Liberty Township,30,False,False,False,Gas
1065822,adbb008f21ea6ef5491522c172ebfca1ac28aed0,2021-06-16 19:30:00 UTC,cool,hold,748,760,760,OH,Mount Vernon,50,False,False,False,Gas
1065823,e105f6890e9ab28a4363c4162dd54e61ad5ec459,2021-06-20 19:40:00 UTC,cool,hold,764,760,760,OH,Middletown,15,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/OH/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/OH/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
OH_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/OH/jun/" + file)
    OH_jun = pd.concat([OH_jun, df])
    
OH_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0060e7a0af03651e6426442471e5ff5857f6594a,jun,2017,auto,hold,Ashland,713.438596,715.000000,665.000000,0.0,False,False,False
1,008fb72df6f08eeb7284d5fa4f148833251ff5e3,jun,2017,auto,hold,Columbus,693.688931,690.714958,640.714958,90.0,False,False,True
2,009c68a7b26c494e4ebacfae61ef65b502eea795,jun,2017,cool,hold,Worthington,754.881579,741.447368,741.447368,55.0,False,False,False
3,01193423d765e95e26315c356123ba9f02b6c584,jun,2017,cool,auto,Berea,708.050847,709.949153,699.179661,7.0,False,False,False
4,01193423d765e95e26315c356123ba9f02b6c584,jun,2017,cool,hold,Berea,709.126214,708.679612,708.475728,7.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1507,fee0f7b8328e5e8dafc178dcea1fb6685d4372ff,jun,2021,auto,hold,Mansfield,731.915663,732.542169,643.638554,0.0,False,False,False
1508,fee0f7b8328e5e8dafc178dcea1fb6685d4372ff,jun,2021,cool,hold,Mansfield,724.798413,725.266449,725.241717,0.0,False,False,False
1509,fee0f7b8328e5e8dafc178dcea1fb6685d4372ff,jun,2021,heat,hold,Mansfield,707.884393,675.522158,675.522158,0.0,False,False,False
1510,ffb9ae558cdffd5abb2322361fd731c6ff75bd78,jun,2021,cool,hold,Columbus,742.434783,740.000000,740.000000,0.0,False,False,False


In [100]:
OH_jun.to_csv("Scraper_Output/State_Month_Day/OH/OH_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/OH-day/2017-jul-day-OH.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,af0a9f8e7d0744eecb023e32c7e50a1630ef194f,2017-07-15 11:45:00 UTC,cool,hold,759,760,760,OH,Gnadenhutten,60,False,False,False,Gas
1,a995e508a6f0a9821aa4dc47cbc68a1d12f29eb5,2017-07-09 12:25:00 UTC,cool,auto,699,700,670,OH,Avon Lake,60,False,False,False,Gas
2,84c217250a6abbf8900a420ef693055e48e88d0b,2017-07-16 16:25:00 UTC,cool,hold,761,770,770,OH,West Chester,40,True,False,True,Electric
3,e08fdbd62896b4e4bad67cfd10db44afc531b562,2017-07-30 17:15:00 UTC,auto,hold,705,700,630,OH,Galena,25,False,False,False,Gas
4,07a4af86e62b2cfbae7dbc9c3d39654d6ab7336f,2017-07-09 19:05:00 UTC,auto,auto,709,710,650,OH,Twinsburg,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
678436,a73f3b35d6a66d7f61485569381d52a4a020b12b,2017-07-26 14:35:00 UTC,cool,hold,700,700,700,OH,North Ridgeville,0,False,False,False,Gas
678437,c4a14fa64b84b36a49792fe4d2e7fe7480e47da9,2017-07-23 19:15:00 UTC,cool,hold,733,730,720,OH,North Ridgeville,5,False,False,False,Gas
678438,15b5087ead26b36704ceedaac06615366b3cbc88,2017-07-01 17:50:00 UTC,auto,auto,740,740,680,OH,North Ridgeville,5,False,False,False,Gas
678439,a73f3b35d6a66d7f61485569381d52a4a020b12b,2017-07-27 13:45:00 UTC,cool,hold,699,700,700,OH,North Ridgeville,0,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/OH/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/OH-day/2018-jul-day-OH.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b67364625f51fa60b7ed5106dda6e1798bfe90d1,2018-07-21 17:30:00 UTC,auto,hold,734,735,665,OH,Russellville,8,True,False,True,Electric
1,84d15a2a950231abdcdf5a1be386ac235899d805,2018-07-17 18:50:00 UTC,auto,hold,717,715,665,OH,Willowick,50,False,False,False,Gas
2,01d9f0cfe5da6643600a7812c866cdf8900ac495,2018-07-15 12:00:00 UTC,cool,hold,727,725,725,OH,Amherst,20,False,False,False,Gas
3,065121c3ed20e29b8427db09d79d6652897ee9ad,2018-07-16 15:45:00 UTC,cool,hold,730,725,725,OH,Mason,0,False,False,False,Gas
4,01dea9eda7bb26be14d25b749be1b047cccce34b,2018-07-22 14:15:00 UTC,cool,hold,653,707,707,OH,Amherst,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1519278,9ba1f8a7059062449ba227d4827ab4b8eb140c64,2018-07-29 16:50:00 UTC,cool,hold,743,760,760,OH,South Euclid,80,False,False,False,Gas
1519279,712b15a44e5910e4e4d84cf681bb2fba1f231009,2018-07-05 11:50:00 UTC,cool,hold,761,760,760,OH,Avon Lake,10,False,False,False,Gas
1519280,264ad7f2bcdfead7153236b98a5c9e899a899133,2018-07-06 15:15:00 UTC,cool,hold,745,760,760,OH,Lyndhurst,55,False,False,False,Gas
1519281,58c5e6ee753ebac81189bdaa25c07d199c2f3272,2018-07-17 13:55:00 UTC,cool,auto,763,760,760,OH,Pataskala,20,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/OH/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/OH-day/2019-jul-day-OH.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,937bdeecacbcf2ea33bc8fbb8fd6583f5610e0b9,2019-07-22 14:25:00 UTC,auto,hold,728,745,645,OH,Westerville,50,False,False,True,Electric
1,c6455e14fc4fc945b14ccbb0064f10d9b35e2da4,2019-07-29 17:05:00 UTC,cool,hold,737,728,728,OH,Columbus,60,False,False,False,Gas
2,64b652c09b4637db1eae5343cd4d245d55b93cc2,2019-07-03 14:20:00 UTC,cool,hold,694,695,655,OH,Akron,39,False,False,False,Gas
3,c318a76d716ff865c3709e73afe1d8f98f49fee6,2019-07-31 17:00:00 UTC,cool,hold,708,707,707,OH,Loveland,90,False,False,True,Electric
4,af23e1735a0f78ca556eccc78d453b5d406c8fd6,2019-07-19 18:45:00 UTC,cool,hold,733,735,735,OH,Columbus,9,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2168359,eb73d3c2af864fce075fb91c195268aa4ffc4c47,2019-07-15 17:10:00 UTC,cool,hold,767,760,760,OH,Lewis Center,7,False,False,False,Gas
2168360,fc5c7128ae0de2425dfd9af85e140839dd8329ec,2019-07-03 13:20:00 UTC,cool,hold,754,760,760,OH,Dublin,50,False,False,False,Gas
2168361,e227fbc107ea9a7ec589cea88089aa932032731d,2019-07-13 19:10:00 UTC,cool,hold,767,760,760,OH,Cincinnati,80,False,False,False,Gas
2168362,6d245f4dc87906ae4724751e83e2449b37b3f70a,2019-07-26 12:35:00 UTC,cool,auto,741,760,760,OH,Pickerington,19,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/OH/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/OH-day/2020-jul-day-OH.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,97ab5ce11ce37ebacadc1739f61e8648875cf8d7,2020-07-17 18:05:00 UTC,cool,auto,680,680,665,OH,Akron,77,True,False,False,Gas
1,12d18d15c296b6d7f4847c6dbcfa90c19bc3224d,2020-07-19 12:10:00 UTC,auto,auto,701,700,625,OH,Arlington,9,True,False,False,Gas
2,d637d34d3f41957618ba27116c2ac7763a2baf8c,2020-07-15 13:45:00 UTC,cool,hold,725,720,699,OH,Dayton,99,True,False,False,Gas
3,8f0b3aeda65f3ea3f21d353d276484b19d2c2fdb,2020-07-06 13:35:00 UTC,cool,auto,717,730,723,OH,Dublin,15,False,False,False,Gas
5,a759e96fabe6a7528d6c566ec219a30380779d73,2020-07-03 10:30:00 UTC,cool,hold,750,759,759,OH,Cleveland,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2125097,6b633807ed8c6b61f5f2654f9c3793097ff9e326,2020-07-30 18:15:00 UTC,cool,auto,762,760,760,OH,Lewis Center,20,False,False,False,Gas
2125098,039338b03d16ddfcb271a4baa9cb33c96e1cceca,2020-07-08 19:15:00 UTC,cool,hold,761,760,760,OH,Cleveland,70,False,False,False,Gas
2125099,ade8238209514b54156dd9cfa5cb9e3528f4fd71,2020-07-17 18:50:00 UTC,cool,hold,764,760,760,OH,Parma,0,False,False,False,Gas
2125100,d50a991159bd133759a8730af11b9b89a7aaab9b,2020-07-09 18:15:00 UTC,auto,hold,787,780,760,OH,Brecksville,45,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/OH/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/OH-day/2021-jul-day-OH.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0a23ccf453156f758ff4aaa7cb79286a6a0b943a,2021-07-28 19:35:00 UTC,cool,hold,708,705,705,OH,Dayton,0,False,False,False,Gas
1,220863762ee1a0e25fccb9610d9a46c3209290b1,2021-07-29 15:55:00 UTC,auto,hold,733,726,676,OH,Beavercreek Township,17,False,False,False,Gas
2,f50bcffde442b05b31bb337552db6f67270ffa2f,2021-07-23 19:10:00 UTC,cool,hold,717,711,711,OH,Akron,90,False,False,False,Gas
4,4f841c0f66bf405db71cd92d8caf1bb7766cf0dd,2021-07-13 19:25:00 UTC,cool,hold,765,675,625,OH,Gates Mills,40,True,False,True,Electric
5,dd953d0a5dbdb5828e656b6cf03bc11fc318230e,2021-07-03 19:20:00 UTC,cool,hold,677,672,672,OH,Columbus,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1062392,a4052f486eff664274e7f05760425a0a66787ed1,2021-07-21 17:15:00 UTC,cool,hold,751,760,760,OH,Cincinnati,90,False,False,False,Gas
1062393,ee8e24019cdaafd0a975cbd49a11efac7541bb99,2021-07-22 11:35:00 UTC,cool,hold,739,760,760,OH,Solon,68,False,False,False,Gas
1062394,c27cc911bcf2537ba38a872d129f35343cfba248,2021-07-28 11:25:00 UTC,cool,hold,759,760,760,OH,Wooster,15,False,False,False,Gas
1062395,d0ae27636077bba08ffb2e6f733d51ad1688c047,2021-07-06 11:40:00 UTC,cool,hold,694,760,760,OH,Maumee,17,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/OH/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/OH/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
OH_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/OH/jul/" + file)
    OH_jul = pd.concat([OH_jul, df])
    
OH_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0060e7a0af03651e6426442471e5ff5857f6594a,jul,2017,auto,auto,Ashland,717.882236,721.113772,647.964072,0.0,False,False,False
1,0060e7a0af03651e6426442471e5ff5857f6594a,jul,2017,auto,hold,Ashland,714.655007,716.655350,651.269890,0.0,False,False,False
2,008fb72df6f08eeb7284d5fa4f148833251ff5e3,jul,2017,auto,auto,Columbus,810.946154,749.192308,651.492308,90.0,False,False,True
3,008fb72df6f08eeb7284d5fa4f148833251ff5e3,jul,2017,auto,hold,Columbus,698.826248,695.769231,637.692308,90.0,False,False,True
4,008fb72df6f08eeb7284d5fa4f148833251ff5e3,jul,2017,cool,auto,Columbus,850.400000,710.000000,720.000000,90.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1288,fe9b616a98ebdb5ffc5de3f7b4379052fc26dfb9,jul,2021,cool,hold,Newark,724.572614,726.360996,726.360996,10.0,False,False,False
1289,fea443aeb4d3a25e9db891ea0c53b74c640cccb5,jul,2021,cool,hold,Cortland,708.760661,714.773716,714.773716,48.0,False,False,False
1290,fee0f7b8328e5e8dafc178dcea1fb6685d4372ff,jul,2021,cool,hold,Mansfield,723.098893,724.484094,724.486860,0.0,False,False,False
1291,ff35d6fd6ca8dbb43a99e3efae22030989e2fb6b,jul,2021,cool,hold,Toledo,735.014286,730.000000,730.000000,0.0,False,False,False


In [133]:
OH_jul.to_csv("Scraper_Output/State_Month_Day/OH/OH_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/OH-day/2017-aug-day-OH.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,af14cbc15366711ebe10f833534e1bba382bcae9,2017-08-17 07:15:00 UTC,auto,auto,722,720,650,OH,Brook Park,80,True,False,False,Gas
1,47aaaf2595775635318a8c2cf7d7985d4cc43292,2017-08-20 16:10:00 UTC,cool,hold,729,710,710,OH,Granville,120,False,False,False,Gas
2,78b4fcc52eac46e47cee290d39e78b42146f0e1c,2017-08-13 18:15:00 UTC,auto,hold,701,700,640,OH,Mantua,40,True,False,True,Electric
3,e814522b2762dcdbbe0a55c7079568a28b25ce05,2017-08-16 18:40:00 UTC,cool,hold,736,740,740,OH,Amberley,0,False,False,False,Gas
4,9a3a15675914d628fdc6d3ef64d34664331c5a85,2017-08-06 12:10:00 UTC,cool,auto,740,750,690,OH,Waynesville,5,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
623979,aab92214721d41aa3f5760873353ef2d5d096aa2,2017-08-04 19:55:00 UTC,cool,hold,681,680,680,OH,North Ridgeville,0,False,False,False,Gas
623980,aab92214721d41aa3f5760873353ef2d5d096aa2,2017-08-10 16:15:00 UTC,cool,hold,686,680,680,OH,North Ridgeville,0,False,False,False,Gas
623981,a5ad499d883c3d5441678731297b9ab11855fecf,2017-08-26 13:40:00 UTC,auto,hold,707,740,670,OH,North Ridgeville,5,False,False,False,Gas
623982,a73f3b35d6a66d7f61485569381d52a4a020b12b,2017-08-02 16:05:00 UTC,cool,hold,719,720,720,OH,North Ridgeville,0,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/OH/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/OH-day/2018-aug-day-OH.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,868c84e3b1632137eb94b07e43724f4c351fb331,2018-08-19 14:00:00 UTC,cool,hold,710,739,739,OH,Centerville,50,False,False,False,Gas
1,e385611263226fd366215fd0eb287046fea3b27a,2018-08-06 10:55:00 UTC,auto,hold,727,725,645,OH,Akron,0,False,False,False,Gas
2,432d2c2dd15de01705b481dd30673dfe7df1110a,2018-08-28 12:45:00 UTC,cool,auto,698,716,657,OH,Centerburg,0,False,False,False,Gas
3,80284b61e03d5549598520c1638b1df28725275d,2018-08-31 16:45:00 UTC,auto,hold,723,720,655,OH,Barberton,100,False,False,False,Gas
4,7afd86909296d156f416b6402e61544d70280151,2018-08-28 15:00:00 UTC,cool,hold,725,725,725,OH,Medina,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1536735,2bea7b3a62e0f0010bf6f411a5fce894e1a0f8e3,2018-08-27 17:00:00 UTC,cool,hold,764,760,760,OH,Springfield,25,False,False,False,Gas
1536736,84ff482e5caf916ec2272816ca8141e76384d5a5,2018-08-27 11:15:00 UTC,cool,hold,748,760,760,OH,Lakewood,90,False,False,False,Gas
1536737,a41ced239935715cfc1f67aa2799d19292ff36dc,2018-08-22 11:15:00 UTC,cool,hold,757,760,760,OH,Cincinnati,40,False,False,False,Gas
1536738,b2e25758f03c4bf68b8314b63f8d66ee14ffb5c4,2018-08-16 15:25:00 UTC,cool,auto,760,760,760,OH,Columbus,40,True,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/OH/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/OH-day/2019-aug-day-OH.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4d089486756e563f508be606afa4267a0d3b7a8c,2019-08-24 18:10:00 UTC,auto,hold,712,711,651,OH,Green,15,False,False,False,Gas
1,561a6eb070b0d33db7a47796f124d924209c2e56,2019-08-12 10:55:00 UTC,auto,hold,741,745,665,OH,Miamisburg,45,False,False,False,Gas
2,eb4c793b3ce0c1547bbe48bbeca8f20c41a59901,2019-08-05 19:40:00 UTC,cool,hold,723,722,722,OH,Toledo,50,False,False,False,Gas
3,dcf2d553d44cc491d3c5a44ca0ccbd1562852a03,2019-08-21 12:40:00 UTC,cool,auto,729,730,721,OH,Grove City,20,True,False,False,Gas
4,70b85ce083f1efae9e9d40f99feeab38ac58f507,2019-08-31 18:55:00 UTC,auto,hold,736,745,695,OH,Clayton,65,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023263,fe3606accf6709a05203758e35c4d4f3ad47cd8a,2019-08-19 09:35:00 UTC,cool,auto,761,760,760,OH,Cincinnati,115,False,False,False,Gas
2023264,f4cd4abfe9626feaf5dea408b27271ec0a784c4d,2019-08-04 15:55:00 UTC,cool,hold,751,760,760,OH,Pickerington,0,True,False,True,Electric
2023265,e38e51628f7e07408399c66396520e748ac86340,2019-08-07 15:30:00 UTC,cool,hold,763,760,760,OH,Austintown,0,True,False,False,Gas
2023266,6b633807ed8c6b61f5f2654f9c3793097ff9e326,2019-08-08 12:45:00 UTC,cool,hold,758,760,760,OH,Lewis Center,20,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/OH/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/OH-day/2020-aug-day-OH.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,93bb1790d741a72d30bdf597f5845ae4ec3d6c91,2020-08-17 19:50:00 UTC,auto,hold,781,780,702,OH,Sandusky,90,False,False,False,Gas
1,2797747224eb6c68096a9e9eec32cc53d1547165,2020-08-21 13:55:00 UTC,cool,hold,747,745,745,OH,Columbus,25,False,False,False,Gas
2,39edfe40b5ce081bca703fab4da0548c351654b6,2020-08-17 13:35:00 UTC,auto,hold,728,725,635,OH,Beavercreek,10,False,False,False,Gas
3,d53f0d707805f8def2983e795aaea834b1684eaa,2020-08-15 14:25:00 UTC,cool,auto,764,761,752,OH,Loveland,25,False,False,False,Gas
4,bf14d2592d79863a0fd075dac5d10428efe1e634,2020-08-03 17:10:00 UTC,cool,auto,691,680,655,OH,Dayton,30,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025400,213fad33cb79efb3fa5b7c5abc240f2d97fff26b,2020-08-17 19:15:00 UTC,cool,hold,760,760,760,OH,Columbus,30,False,False,False,Gas
2025401,ea952110e47753d306c4a1052f1b8b8c59154676,2020-08-01 11:30:00 UTC,cool,hold,742,760,760,OH,Valley View,89,False,False,False,Gas
2025402,f91d7e37f752e2151c024fed6f845a1544430d1b,2020-08-17 12:20:00 UTC,cool,hold,723,760,760,OH,Mineral City,40,False,False,False,Gas
2025403,ee3ba1ffffe26fb4880f878d1747395ea9c45a82,2020-08-19 13:40:00 UTC,cool,auto,725,760,760,OH,Columbus,0,True,False,True,Electric


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/OH/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/OH/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
OH_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/OH/aug/" + file)
    OH_aug = pd.concat([OH_aug, df])
    
OH_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0060e7a0af03651e6426442471e5ff5857f6594a,aug,2017,auto,hold,Ashland,714.937354,720.600703,649.933255,0.0,False,False,False
1,008fb72df6f08eeb7284d5fa4f148833251ff5e3,aug,2017,auto,hold,Columbus,723.145449,720.000000,670.000000,90.0,False,False,True
2,009c68a7b26c494e4ebacfae61ef65b502eea795,aug,2017,cool,hold,Worthington,755.052174,735.234783,735.234783,55.0,False,False,False
3,01193423d765e95e26315c356123ba9f02b6c584,aug,2017,cool,auto,Berea,713.723684,706.184211,720.473684,7.0,False,False,False
4,01193423d765e95e26315c356123ba9f02b6c584,aug,2017,cool,hold,Berea,716.596491,701.736842,700.649123,7.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2400,ff35df97caa10f2dab17f309525c11d9f9ea2f22,aug,2020,cool,hold,Chagrin Falls,670.555133,670.022814,669.992395,10.0,False,False,False
2401,ff9b595f4d0899e85e5d9cf1a466898257fae09f,aug,2020,cool,auto,Madison,690.642857,680.000000,680.000000,5.0,False,False,False
2402,ffb9ae558cdffd5abb2322361fd731c6ff75bd78,aug,2020,cool,auto,Columbus,728.695908,730.000000,680.000000,0.0,False,False,False
2403,ffe9b1c80744e98b9edfb7a91d3793630023a634,aug,2020,cool,auto,Columbus,724.666667,720.000000,720.000000,5.0,False,False,False


In [160]:
OH_aug.to_csv("Scraper_Output/State_Month_Day/OH/OH_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/OH-day/2017-dec-day-OH.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0b997730d0c3383aac47a091fe9d82dda775bd83,2017-12-31 17:55:00 UTC,heat,hold,698,702,702,OH,Thompson,15,True,False,True,Electric
1,a7fbbb09ebc1569ad4d23c226db968de77d0bd31,2017-12-26 14:50:00 UTC,auto,auto,729,785,735,OH,New Philadelphia,35,False,False,False,Gas
2,4d41d4b24145a87fc8e16e0f7963295f81d6c135,2017-12-24 15:30:00 UTC,heat,hold,685,685,685,OH,Massillon,20,False,False,False,Gas
3,6b347cac75b760dfbc548b3275b0e5ba24d3ce37,2017-12-19 11:30:00 UTC,auto,hold,668,789,669,OH,Pickerington,45,False,False,True,Electric
4,4421b8a4ea8abe8cb20d563e8b47c395a4dcaf73,2017-12-10 17:20:00 UTC,auto,auto,703,810,710,OH,Portsmouth,55,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
822269,e714158702f05def8ba54160f2e9b8480dd6aaa1,2017-12-30 17:35:00 UTC,auto,auto,706,760,710,OH,Springboro,20,False,False,False,Gas
822270,c8e621039b1650aef4cc147d09f5914ab4c5fe38,2017-12-06 15:45:00 UTC,auto,auto,591,760,690,OH,Akron,25,False,False,False,Gas
822271,5b620cf256ebbdff4737d7fa4cf27129b4978823,2017-12-18 19:00:00 UTC,heat,auto,674,760,640,OH,Twinsburg,37,False,False,False,Gas
822273,6af158187e86e0a3b887b0f6b78a22741204b387,2017-12-21 14:25:00 UTC,cool,auto,682,760,760,OH,Dayton,40,True,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/OH/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/OH-day/2018-dec-day-OH.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2de0253bf4cd7105ea87f815860df50544f4d6b6,2018-12-15 11:25:00 UTC,heat,hold,696,699,699,OH,Columbus,70,False,False,False,Gas
1,02d135064d9dd3fee7019921e09b5c118f0f16dd,2018-12-27 14:35:00 UTC,heat,auto,657,719,710,OH,Cleveland,75,False,False,False,Gas
2,2de0253bf4cd7105ea87f815860df50544f4d6b6,2018-12-09 13:45:00 UTC,heat,hold,700,709,709,OH,Columbus,70,False,False,False,Gas
3,cb87b984545343d53a037f1ed018080b603996df,2018-12-08 16:00:00 UTC,heat,hold,681,685,685,OH,Bowling Green,60,False,False,False,Gas
4,265f5fe5f7419bfb87d7fb8b2111a6fb21c0bcb6,2018-12-13 19:10:00 UTC,heat,hold,634,620,620,OH,Avon Lake,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1653280,9b5dbdf8f9f9bf24535b0e8dda631cd36933e232,2018-12-09 12:45:00 UTC,auto,auto,728,760,710,OH,New Albany,35,True,False,True,Electric
1653281,ee642e47f470e29571039cbe9228f84503f9f903,2018-12-23 14:00:00 UTC,auto,auto,625,760,610,OH,New Albany,0,False,False,False,Gas
1653282,5c3c8df14b5fad84716b601b51ba0f5861ed4237,2018-12-01 15:45:00 UTC,auto,hold,727,760,730,OH,Washington Township,30,True,False,True,Electric
1653283,45d62177a7abf81c7465e250f49b866e08b3c3b6,2018-12-23 19:10:00 UTC,heat,hold,758,760,760,OH,Lyndhurst,70,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/OH/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/OH-day/2019-dec-day-OH.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,12a343118d47db036443e43331895335b26a21df,2019-12-02 19:20:00 UTC,heat,auto,617,620,620,OH,Columbus,69,False,False,False,Gas
1,311a6a24e918582bbfd47e11313ac9cf1a78851f,2019-12-28 13:45:00 UTC,heat,auto,618,638,620,OH,Groveport,0,True,False,False,Gas
2,4921fee7755b4dab730fe026a966df256b907b5b,2019-12-27 13:30:00 UTC,auto,auto,679,830,680,OH,Oakwood,90,False,False,False,Gas
3,b67364625f51fa60b7ed5106dda6e1798bfe90d1,2019-12-06 10:00:00 UTC,auto,hold,700,755,705,OH,Russellville,8,True,False,True,Electric
4,42f5c8695847408643da52b133c5abce8e319e68,2019-12-20 19:35:00 UTC,heat,hold,734,734,734,OH,Columbus,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1854577,defac5652f00cf838a3472182768f0ccbe4d0aa0,2019-12-17 17:00:00 UTC,heat,auto,738,760,760,OH,Hough,100,False,False,False,Gas
1854578,70b85ce083f1efae9e9d40f99feeab38ac58f507,2019-12-28 19:25:00 UTC,auto,hold,696,760,700,OH,Clayton,65,False,False,False,Gas
1854579,5f00f518bfe674057489bd7b8b05cfc0f3c08441,2019-12-27 10:00:00 UTC,auto,auto,726,760,720,OH,Cincinnati,0,False,False,False,Gas
1854580,0a7bd8f706d48a84ec2e42fda3ea8720320e2530,2019-12-13 17:25:00 UTC,auto,hold,649,760,650,OH,Bellevue,9,True,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/OH/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/OH-day/2020-dec-day-OH.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ce29ec1ddef1dd74908629dfd477a8f03b0c69de,2020-12-23 18:10:00 UTC,heat,hold,719,719,719,OH,Pataskala,0,False,False,False,Gas
1,b3cae957e880d8b243166af893fcccdb46c43a3d,2020-12-16 19:25:00 UTC,auto,hold,730,784,734,OH,Columbus,40,True,False,False,Gas
2,988c5d21b7009a782e03d0b0cc6b88cc6d572a88,2020-12-13 19:35:00 UTC,heat,hold,678,682,682,OH,Barberton,50,False,False,False,Gas
3,c6e63317f9e15d643d00c3a7bb75e97560532922,2020-12-22 16:45:00 UTC,heat,hold,701,706,706,OH,Columbus,5,True,False,False,Gas
4,396830596d2bc0a1435399ce9a5917f3284c9e9b,2020-12-22 18:25:00 UTC,heat,hold,724,719,719,OH,Brunswick,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1695219,95d6c5e3a7253f2a9a1d1eaab9e4c869d13a5db7,2020-12-21 13:45:00 UTC,auto,hold,688,760,690,OH,Northfield,68,False,False,False,Gas
1695220,1541ce7969fd56626df8e80292b685025f4bee53,2020-12-11 14:40:00 UTC,heat,auto,677,760,670,OH,Avon,25,True,False,False,Gas
1695221,b9f3e6ac9b47d8581e1e1f87c5c9540e96d77889,2020-12-18 17:55:00 UTC,auto,hold,678,760,680,OH,Canal Fulton,28,False,False,False,Gas
1695222,b9f3e6ac9b47d8581e1e1f87c5c9540e96d77889,2020-12-07 16:30:00 UTC,auto,hold,658,760,665,OH,Canal Fulton,28,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/OH/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/OH/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
OH_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/OH/dec/" + file)
    OH_dec = pd.concat([OH_dec, df])
    
OH_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0060e7a0af03651e6426442471e5ff5857f6594a,dec,2017,auto,auto,Ashland,703.314732,754.879464,704.837054,0.0,False,False,False
1,0060e7a0af03651e6426442471e5ff5857f6594a,dec,2017,auto,hold,Ashland,659.280379,765.740960,658.492590,0.0,False,False,False
2,008708a18059f2847b8385699bc376034e386c8d,dec,2017,auto,hold,Harrison,688.818182,745.000000,695.000000,0.0,False,False,False
3,008fb72df6f08eeb7284d5fa4f148833251ff5e3,dec,2017,heat,hold,Columbus,676.000000,730.833333,729.833333,90.0,False,False,True
4,009c68a7b26c494e4ebacfae61ef65b502eea795,dec,2017,heat,hold,Worthington,686.872340,691.872340,691.872340,55.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2385,ff35d6fd6ca8dbb43a99e3efae22030989e2fb6b,dec,2020,heat,auto,Toledo,682.011628,664.662791,640.918605,0.0,False,False,False
2386,ff35d6fd6ca8dbb43a99e3efae22030989e2fb6b,dec,2020,heat,hold,Toledo,706.733333,710.000000,710.000000,0.0,False,False,False
2387,ff35df97caa10f2dab17f309525c11d9f9ea2f22,dec,2020,heat,auto,Chagrin Falls,677.417031,697.347162,697.347162,10.0,False,False,False
2388,ff9b595f4d0899e85e5d9cf1a466898257fae09f,dec,2020,heat,auto,Madison,687.570213,687.757447,687.621277,5.0,False,False,False


In [187]:
OH_dec.to_csv("Scraper_Output/State_Month_Day/OH/OH_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/OH/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
OH_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/OH/" + file)
    OH_all = pd.concat([OH_all, df])
    
OH_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0060e7a0af03651e6426442471e5ff5857f6594a,aug,2017,auto,hold,Ashland,714.937354,720.600703,649.933255,0.0,False,False,False
1,008fb72df6f08eeb7284d5fa4f148833251ff5e3,aug,2017,auto,hold,Columbus,723.145449,720.000000,670.000000,90.0,False,False,True
2,009c68a7b26c494e4ebacfae61ef65b502eea795,aug,2017,cool,hold,Worthington,755.052174,735.234783,735.234783,55.0,False,False,False
3,01193423d765e95e26315c356123ba9f02b6c584,aug,2017,cool,auto,Berea,713.723684,706.184211,720.473684,7.0,False,False,False
4,01193423d765e95e26315c356123ba9f02b6c584,aug,2017,cool,hold,Berea,716.596491,701.736842,700.649123,7.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10645,fee0f7b8328e5e8dafc178dcea1fb6685d4372ff,jun,2021,auto,hold,Mansfield,731.915663,732.542169,643.638554,0.0,False,False,False
10646,fee0f7b8328e5e8dafc178dcea1fb6685d4372ff,jun,2021,cool,hold,Mansfield,724.798413,725.266449,725.241717,0.0,False,False,False
10647,fee0f7b8328e5e8dafc178dcea1fb6685d4372ff,jun,2021,heat,hold,Mansfield,707.884393,675.522158,675.522158,0.0,False,False,False
10648,ffb9ae558cdffd5abb2322361fd731c6ff75bd78,jun,2021,cool,hold,Columbus,742.434783,740.000000,740.000000,0.0,False,False,False


In [190]:
OH_all.to_csv("Scraper_Output/State_Month_Day/OH_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mOHe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['OH']
Unique jan_2018: ['OH']
Unique jan_2019: ['OH']
Unique jan_2020: ['OH']
Unique jan_2021: ['OH']
Unique feb_2017: ['OH']
Unique feb_2018: ['OH']
Unique feb_2019: ['OH']
Unique feb_2020: ['OH']
Unique feb_2021: ['OH']
Unique jun_2017: ['OH']
Unique jun_2018: ['OH']
Unique jun_2019: ['OH']
Unique jun_2020: ['OH']
Unique jun_2021: ['OH']
Unique jul_2017: ['OH']
Unique jul_2018: ['OH']
Unique jul_2019: ['OH']
Unique jul_2020: ['OH']
Unique jul_2021: ['OH']
Unique aug_2017: ['OH']
Unique aug_2018: ['OH']
Unique aug_2019: ['OH']
Unique aug_2020: ['OH']
Unique dec_2017: ['OH']
Unique dec_2018: ['OH']
Unique dec_2019: ['OH']
Unique dec_2020: ['OH']
