# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/SC-day/2017-jan-day-SC.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,0dc1cbe8afc966c9c35f4e92d16d98919fc119ae,2017-01-28 12:10:00 UTC,auto,auto,709,765,735,SC,Orangeburg,30,True,False,True,Electric
4,0dc1cbe8afc966c9c35f4e92d16d98919fc119ae,2017-01-31 10:40:00 UTC,auto,auto,742,775,745,SC,Orangeburg,30,True,False,True,Electric
7,0dc1cbe8afc966c9c35f4e92d16d98919fc119ae,2017-01-31 12:05:00 UTC,auto,auto,752,775,745,SC,Orangeburg,30,True,False,True,Electric
14,0dc1cbe8afc966c9c35f4e92d16d98919fc119ae,2017-01-31 14:35:00 UTC,auto,auto,743,775,745,SC,Orangeburg,30,True,False,True,Electric
21,0dc1cbe8afc966c9c35f4e92d16d98919fc119ae,2017-01-31 12:20:00 UTC,auto,auto,751,775,745,SC,Orangeburg,30,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187636,70255f854c31d52aea23eca924d0a84e6a9ec13c,2017-01-09 18:15:00 UTC,auto,auto,756,780,760,SC,Boiling Springs,15,False,False,False,Gas
187637,70255f854c31d52aea23eca924d0a84e6a9ec13c,2017-01-30 16:00:00 UTC,auto,hold,755,780,760,SC,Boiling Springs,15,False,False,False,Gas
187638,70255f854c31d52aea23eca924d0a84e6a9ec13c,2017-01-06 19:35:00 UTC,auto,auto,758,780,760,SC,Boiling Springs,15,False,False,False,Gas
187639,70255f854c31d52aea23eca924d0a84e6a9ec13c,2017-01-06 19:55:00 UTC,auto,auto,756,780,760,SC,Boiling Springs,15,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
02b28369b0d5e758bcdd07a5a1b653c80ac5bf74,Jan,2017,heat,auto,Elgin,661.964286,662.202381,662.202381,15.0,True,False,True
02b28369b0d5e758bcdd07a5a1b653c80ac5bf74,Jan,2017,heat,hold,Elgin,655.080321,653.670683,653.020080,15.0,True,False,True
03bef4aae91c5ff7d053e64e8b9ecbb87b716259,Jan,2017,auto,auto,Mount Pleasant,659.235602,820.000000,640.000000,5.0,False,False,False
03bef4aae91c5ff7d053e64e8b9ecbb87b716259,Jan,2017,auto,hold,Mount Pleasant,709.171429,764.885714,714.885714,5.0,False,False,False
044b7bf990f4b3e898d8dd96c1a4a4dbfcc02a34,Jan,2017,auto,auto,Charleston,686.420748,789.919431,650.075829,6.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
f761de99818358ffbb78abbcd57a96b363dcd9fb,Jan,2017,heat,hold,Mount Pleasant,658.050000,668.450000,666.900000,5.0,False,False,False
fbc4018b01a73b33d58854b50b84b7378d94339a,Jan,2017,auto,hold,Clover,658.508772,760.000000,662.859649,10.0,False,False,True
fce2d041fb3e938ef55ba1ddfebd247dbded44b9,Jan,2017,auto,auto,Greer,697.602620,780.000000,700.000000,15.0,False,False,False
fce2d041fb3e938ef55ba1ddfebd247dbded44b9,Jan,2017,auto,hold,Greer,669.415842,780.000000,670.000000,15.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/SC/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/SC-day/2018-jan-day-SC.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7c6aa64bcc9fe28ff05e75c663ed0cac76a0e25c,2018-01-06 17:20:00 UTC,heat,hold,679,735,735,SC,Cheraw,40,False,False,True,Electric
1,8e23c84d4572303c4c0f4d9ed9c6a3bfa88252e5,2018-01-05 19:50:00 UTC,auto,auto,643,685,635,SC,Georgetown,10,True,False,True,Electric
2,8e23c84d4572303c4c0f4d9ed9c6a3bfa88252e5,2018-01-09 13:55:00 UTC,heat,hold,642,650,645,SC,Georgetown,10,True,False,True,Electric
3,374b84f3a250a86b28d684b24d0ab272bab92ca7,2018-01-06 16:25:00 UTC,heat,hold,603,708,708,SC,johnston,5,True,False,True,Electric
4,8e23c84d4572303c4c0f4d9ed9c6a3bfa88252e5,2018-01-09 18:15:00 UTC,heat,hold,658,650,645,SC,Georgetown,10,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
711428,8cf44cafd40dfdaf4aa4e2fdbc709bb7543118e2,2018-01-22 18:45:00 UTC,heat,hold,769,760,760,SC,Summerville,17,False,False,False,Gas
711429,8cf44cafd40dfdaf4aa4e2fdbc709bb7543118e2,2018-01-30 13:35:00 UTC,heat,hold,758,760,760,SC,Summerville,17,False,False,False,Gas
711430,8cf44cafd40dfdaf4aa4e2fdbc709bb7543118e2,2018-01-03 12:40:00 UTC,heat,hold,756,760,760,SC,Summerville,17,False,False,False,Gas
711431,8cf44cafd40dfdaf4aa4e2fdbc709bb7543118e2,2018-01-02 16:10:00 UTC,heat,hold,756,760,760,SC,Summerville,17,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/SC/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/SC-day/2019-jan-day-SC.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
3,7fa3e2b98ecea862f6677c34e2789fdde2fa43f5,2019-01-02 15:15:00 UTC,auto,hold,620,800,610,SC,Townville,7,False,False,True,Electric
4,e7caa108d4b377e746d684dca71e5e7a4dfc05dc,2019-01-30 19:25:00 UTC,heat,hold,697,650,605,SC,Mullins,0,True,False,True,Electric
6,353ec8b093d15d7620dc59c8e136da5167decf51,2019-01-01 14:30:00 UTC,auto,hold,692,695,645,SC,Laurens,40,False,False,False,Gas
10,1510049e5406d48aadda4afdca54c8165abedc15,2019-01-02 15:40:00 UTC,auto,hold,676,695,645,SC,Leesville,0,True,False,True,Electric
12,2b905babddf91744026ff378aded1dae206a8489,2019-01-19 18:45:00 UTC,auto,hold,652,725,655,SC,Belton,5,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1048149,1e14759f6ed52ebd25f4d175458b8161f3f02968,2019-01-26 12:45:00 UTC,heat,hold,723,730,730,SC,North Myrtle Beach,10,True,False,True,Electric
1048150,f688617a0c530e664495649220d5975cfa0ae77e,2019-01-21 14:15:00 UTC,heat,auto,727,730,730,SC,North Myrtle Beach,15,True,False,True,Electric
1048151,9219f797641dae6d7e17c29742d42e53f39a78ee,2019-01-11 14:45:00 UTC,heat,auto,730,730,740,SC,North Myrtle Beach,5,True,False,True,Electric
1048152,9219f797641dae6d7e17c29742d42e53f39a78ee,2019-01-11 15:35:00 UTC,heat,auto,734,730,740,SC,North Myrtle Beach,5,True,False,True,Electric


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/SC/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/SC-day/2020-jan-day-SC.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8929c0c5f09a398e0d53aa8f36d965089e27c6f8,2020-01-04 11:55:00 UTC,heat,hold,667,664,664,SC,Greenwood,45,True,False,False,Gas
1,18de35ed8732f95fdcc60b01eb1dfaec1e51f2fb,2020-01-28 15:25:00 UTC,heat,hold,622,650,628,SC,Charlestonp,0,True,False,True,Electric
4,18de35ed8732f95fdcc60b01eb1dfaec1e51f2fb,2020-01-20 14:25:00 UTC,heat,hold,652,658,658,SC,Charlestonp,0,True,False,True,Electric
7,810c070c5f8c3de55f0ba4cf5419f5bbd183bf20,2020-01-15 15:55:00 UTC,cool,auto,671,680,678,SC,Ware Shoals,0,True,False,True,Electric
8,810c070c5f8c3de55f0ba4cf5419f5bbd183bf20,2020-01-12 19:50:00 UTC,heat,auto,760,700,678,SC,Ware Shoals,0,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1145617,1e14759f6ed52ebd25f4d175458b8161f3f02968,2020-01-19 14:10:00 UTC,heat,auto,727,730,730,SC,North Myrtle Beach,10,True,False,True,Electric
1145618,1e14759f6ed52ebd25f4d175458b8161f3f02968,2020-01-19 19:05:00 UTC,heat,auto,728,730,730,SC,North Myrtle Beach,10,True,False,True,Electric
1145619,f688617a0c530e664495649220d5975cfa0ae77e,2020-01-03 14:15:00 UTC,heat,auto,727,737,730,SC,North Myrtle Beach,15,True,False,True,Electric
1145620,1e14759f6ed52ebd25f4d175458b8161f3f02968,2020-01-19 16:25:00 UTC,heat,auto,732,730,730,SC,North Myrtle Beach,10,True,False,True,Electric


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/SC/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/SC-day/2021-jan-day-SC.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,66d46d8dedc5a4d2e2d1b3c16bab0a11b3142a13,2021-01-12 13:50:00 UTC,auto,hold,684,732,682,SC,belton,10,True,False,True,Electric
2,50e73c399c87e75a0415e096f2cef95f17e098d8,2021-01-03 14:45:00 UTC,cool,hold,640,638,638,SC,Nichols,9,False,False,False,Gas
3,66d46d8dedc5a4d2e2d1b3c16bab0a11b3142a13,2021-01-20 14:35:00 UTC,auto,hold,699,742,692,SC,belton,10,True,False,True,Electric
5,0ce033a5254b9032f4060c81b15e9ba49a483fa2,2021-01-07 15:55:00 UTC,auto,hold,745,795,745,SC,Awendaw,5,False,False,False,Gas
7,cae24000584fa7e9d99d79977b165efdfc58ee18,2021-01-29 19:20:00 UTC,heat,hold,651,680,655,SC,Cross Hill,60,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
687167,ce38cabf84623250c44f6a37602ccc716d89a0e2,2021-01-03 12:20:00 UTC,heat,hold,757,760,760,SC,Greenville,15,True,False,False,Gas
687168,ce38cabf84623250c44f6a37602ccc716d89a0e2,2021-01-02 18:00:00 UTC,heat,hold,759,760,760,SC,Greenville,15,True,False,False,Gas
687169,ce38cabf84623250c44f6a37602ccc716d89a0e2,2021-01-02 18:20:00 UTC,heat,hold,760,760,760,SC,Greenville,15,True,False,False,Gas
687170,ce38cabf84623250c44f6a37602ccc716d89a0e2,2021-01-02 12:20:00 UTC,heat,hold,755,760,760,SC,Greenville,15,True,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/SC/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/SC/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
SC_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/SC/jan/" + file)
    SC_jan = pd.concat([SC_jan, df])
    
SC_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,02b28369b0d5e758bcdd07a5a1b653c80ac5bf74,Jan,2017,heat,auto,Elgin,661.964286,662.202381,662.202381,15.0,True,False,True
1,02b28369b0d5e758bcdd07a5a1b653c80ac5bf74,Jan,2017,heat,hold,Elgin,655.080321,653.670683,653.020080,15.0,True,False,True
2,03bef4aae91c5ff7d053e64e8b9ecbb87b716259,Jan,2017,auto,auto,Mount Pleasant,659.235602,820.000000,640.000000,5.0,False,False,False
3,03bef4aae91c5ff7d053e64e8b9ecbb87b716259,Jan,2017,auto,hold,Mount Pleasant,709.171429,764.885714,714.885714,5.0,False,False,False
4,044b7bf990f4b3e898d8dd96c1a4a4dbfcc02a34,Jan,2017,auto,auto,Charleston,686.420748,789.919431,650.075829,6.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
869,fde66b501061e0ca119e16e5b82e943588156f93,Jan,2021,heat,hold,Fountain Inn,679.428571,685.095238,684.047619,0.0,True,False,False
870,fe1856e458d0dffbf7be56839c5aa76f6d9d1c11,Jan,2021,heat,hold,Inman,711.882237,714.653637,714.653637,0.0,False,False,False
871,fe686174646f2f879c73848d3ba235cc47663579,Jan,2021,heat,hold,Easley,713.914685,720.008790,720.008790,40.0,True,False,True
872,ff5369f8d3d7df70eaefc8e74d1924b0e9cd0192,Jan,2021,auto,hold,Charleston,684.019231,739.230769,689.192308,10.0,False,False,True


In [34]:
SC_jan.to_csv("Scraper_Output/State_Month_Day/SC/SC_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/SC-day/2017-feb-day-SC.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0dc1cbe8afc966c9c35f4e92d16d98919fc119ae,2017-02-17 10:45:00 UTC,auto,auto,737,775,745,SC,Orangeburg,30,True,False,True,Electric
1,5b0843d8f988a065731449ab788b6f1c37a1479e,2017-02-13 16:45:00 UTC,auto,hold,672,705,655,SC,Tega Cay,15,False,False,False,Gas
2,0dc1cbe8afc966c9c35f4e92d16d98919fc119ae,2017-02-14 16:30:00 UTC,auto,auto,743,795,765,SC,Orangeburg,30,True,False,True,Electric
4,0dc1cbe8afc966c9c35f4e92d16d98919fc119ae,2017-02-16 16:45:00 UTC,auto,auto,768,795,765,SC,Orangeburg,30,True,False,True,Electric
5,0ced570195657b0d3cfd1b6e3c15eb671570b7e3,2017-02-16 16:15:00 UTC,auto,hold,713,770,717,SC,McCormick,5,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174438,05cc0b66531e7e3766970dda03f2d887af548f44,2017-02-18 15:55:00 UTC,heat,auto,759,760,760,SC,Summerville,6,False,False,False,Gas
174439,05cc0b66531e7e3766970dda03f2d887af548f44,2017-02-18 15:40:00 UTC,heat,auto,759,760,760,SC,Summerville,6,False,False,False,Gas
174440,65d07df3c73b3cfe74ee2f1bd0df7e9b489b0363,2017-02-05 18:35:00 UTC,heat,auto,755,760,760,SC,Summerville,5,False,False,False,Gas
174441,05cc0b66531e7e3766970dda03f2d887af548f44,2017-02-18 19:25:00 UTC,heat,auto,761,760,760,SC,Summerville,6,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/SC/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/SC-day/2018-feb-day-SC.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,374b84f3a250a86b28d684b24d0ab272bab92ca7,2018-02-03 16:45:00 UTC,heat,hold,735,735,735,SC,johnston,5,True,False,True,Electric
1,a6c1c4e7080b901d22012c01bc49d441ef9f67e0,2018-02-17 16:30:00 UTC,heat,hold,717,719,719,SC,Piedmont,0,False,False,False,Gas
2,a6c1c4e7080b901d22012c01bc49d441ef9f67e0,2018-02-13 18:35:00 UTC,heat,hold,720,718,718,SC,Piedmont,0,False,False,False,Gas
3,a6c1c4e7080b901d22012c01bc49d441ef9f67e0,2018-02-17 11:40:00 UTC,heat,hold,715,719,719,SC,Piedmont,0,False,False,False,Gas
5,374b84f3a250a86b28d684b24d0ab272bab92ca7,2018-02-01 16:25:00 UTC,heat,hold,745,735,735,SC,johnston,5,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
626936,9219f797641dae6d7e17c29742d42e53f39a78ee,2018-02-15 19:35:00 UTC,cool,auto,723,780,720,SC,North Myrtle Beach,5,True,False,True,Electric
626937,9219f797641dae6d7e17c29742d42e53f39a78ee,2018-02-16 15:05:00 UTC,cool,auto,757,780,720,SC,North Myrtle Beach,5,True,False,True,Electric
626938,9219f797641dae6d7e17c29742d42e53f39a78ee,2018-02-16 14:45:00 UTC,cool,auto,757,780,720,SC,North Myrtle Beach,5,True,False,True,Electric
626939,9219f797641dae6d7e17c29742d42e53f39a78ee,2018-02-16 15:50:00 UTC,cool,auto,761,780,720,SC,North Myrtle Beach,5,True,False,True,Electric


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/SC/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/SC-day/2019-feb-day-SC.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1256cd86583094ecdd244c7b9a049b174b893830,2019-02-15 14:50:00 UTC,auto,auto,711,768,718,SC,Briarcliffe Acres,20,True,False,True,Electric
1,e7caa108d4b377e746d684dca71e5e7a4dfc05dc,2019-02-14 18:15:00 UTC,heat,hold,717,650,625,SC,Mullins,0,True,False,True,Electric
4,e7caa108d4b377e746d684dca71e5e7a4dfc05dc,2019-02-25 13:20:00 UTC,heat,hold,680,755,755,SC,Mullins,0,True,False,True,Electric
10,e7caa108d4b377e746d684dca71e5e7a4dfc05dc,2019-02-21 19:25:00 UTC,heat,hold,664,655,655,SC,Mullins,0,True,False,True,Electric
13,e7caa108d4b377e746d684dca71e5e7a4dfc05dc,2019-02-03 12:30:00 UTC,heat,hold,695,650,645,SC,Mullins,0,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
696775,9367462bdf76e7e2bd5e50a1932ffdad9d7cb9e1,2019-02-22 09:10:00 UTC,cool,auto,727,760,760,SC,Summerville,10,False,False,False,Gas
696776,178c5b0217ffcfe0633a6f6006954c5268477280,2019-02-12 16:10:00 UTC,heat,hold,758,760,760,SC,Summerville,5,False,False,False,Gas
696777,7b63f2442559ec444fccb3f00fbc8bdf4d8a2552,2019-02-19 14:25:00 UTC,auto,auto,758,810,760,SC,Moncks Corner,0,False,False,True,Electric
696778,7b63f2442559ec444fccb3f00fbc8bdf4d8a2552,2019-02-19 14:05:00 UTC,auto,auto,759,810,760,SC,Moncks Corner,0,False,False,True,Electric


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/SC/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/SC-day/2020-feb-day-SC.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,e7caa108d4b377e746d684dca71e5e7a4dfc05dc,2020-02-03 15:00:00 UTC,heat,hold,743,745,745,SC,Mullins,0,True,False,True,Electric
2,2bb0f0613c810a4e9c36015cc8b85aa70fd0a4c9,2020-02-05 14:00:00 UTC,auto,hold,704,752,702,SC,Hartsville,15,True,False,True,Electric
3,2bb0f0613c810a4e9c36015cc8b85aa70fd0a4c9,2020-02-02 14:30:00 UTC,auto,hold,710,762,712,SC,Hartsville,15,True,False,True,Electric
5,21b1c1c4d35ff318c55c4b8972f50a1b686bccff,2020-02-09 12:50:00 UTC,cool,hold,546,671,671,SC,Little River,15,True,False,True,Electric
6,0f5207efbffc736c18da7650e7d2cbca76e7fa8e,2020-02-08 16:05:00 UTC,heat,hold,656,673,673,SC,Pickens,40,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1015675,f3e108e03ab604aef64760a476b9fe96f20acf94,2020-02-15 18:05:00 UTC,cool,auto,731,760,760,SC,Murrells Inlet,10,True,False,True,Electric
1015676,732365684243e569182d28e98b222f46b76305f1,2020-02-16 19:25:00 UTC,heat,auto,749,780,760,SC,Murrells Inlet,25,False,False,True,Electric
1015677,f3e108e03ab604aef64760a476b9fe96f20acf94,2020-02-15 18:30:00 UTC,cool,auto,726,760,760,SC,Murrells Inlet,10,True,False,True,Electric
1015678,f3e108e03ab604aef64760a476b9fe96f20acf94,2020-02-15 16:55:00 UTC,cool,auto,748,760,760,SC,Murrells Inlet,10,True,False,True,Electric


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/SC/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/SC-day/2021-feb-day-SC.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,581600a4b97ded6ce5a4732f5d59534796e12330,2021-02-25 13:50:00 UTC,heat,hold,672,650,610,SC,Elloree,90,False,False,False,Gas
1,0ce033a5254b9032f4060c81b15e9ba49a483fa2,2021-02-08 19:20:00 UTC,auto,hold,773,805,755,SC,Awendaw,5,False,False,False,Gas
2,581600a4b97ded6ce5a4732f5d59534796e12330,2021-02-24 18:15:00 UTC,heat,hold,680,650,610,SC,Elloree,90,False,False,False,Gas
3,7c6aa64bcc9fe28ff05e75c663ed0cac76a0e25c,2021-02-22 18:20:00 UTC,heat,hold,745,735,735,SC,Cheraw,40,False,False,True,Electric
4,50e73c399c87e75a0415e096f2cef95f17e098d8,2021-02-10 07:10:00 UTC,cool,hold,631,638,638,SC,Nichols,9,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
586509,14ab1e2607805d47f010bb6902763c610f462f11,2021-02-08 14:30:00 UTC,heat,hold,758,760,760,SC,North Charleston,0,False,False,False,Gas
586510,14ab1e2607805d47f010bb6902763c610f462f11,2021-02-08 12:35:00 UTC,heat,hold,753,760,760,SC,North Charleston,0,False,False,False,Gas
586511,14ab1e2607805d47f010bb6902763c610f462f11,2021-02-08 19:35:00 UTC,heat,hold,763,760,760,SC,North Charleston,0,False,False,False,Gas
586512,14ab1e2607805d47f010bb6902763c610f462f11,2021-02-09 16:40:00 UTC,heat,hold,766,760,760,SC,North Charleston,0,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/SC/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/SC/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
SC_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/SC/feb/" + file)
    SC_feb = pd.concat([SC_feb, df])
    
SC_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,03bef4aae91c5ff7d053e64e8b9ecbb87b716259,feb,2017,auto,hold,Mount Pleasant,710.208333,764.916667,714.916667,5.0,False,False,False
1,044b7bf990f4b3e898d8dd96c1a4a4dbfcc02a34,feb,2017,auto,auto,Charleston,691.669523,789.895961,650.099755,6.0,False,False,False
2,04e03135f1ece7f646e2553fa92b2979ab79c7d3,feb,2017,auto,auto,Fort Mill,708.797546,738.466258,687.036810,0.0,False,False,True
3,04e03135f1ece7f646e2553fa92b2979ab79c7d3,feb,2017,auto,hold,Fort Mill,705.468750,740.000000,690.000000,0.0,False,False,True
4,05cc0b66531e7e3766970dda03f2d887af548f44,feb,2017,cool,auto,Summerville,762.600000,780.000000,780.000000,6.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
822,fdebcbc35418bce8754c073bfe0f10443b2270e3,feb,2021,heat,hold,Fort Mill,707.343284,702.283582,702.283582,10.0,True,False,True
823,fe1856e458d0dffbf7be56839c5aa76f6d9d1c11,feb,2021,heat,hold,Inman,703.976325,705.914882,705.914882,0.0,False,False,False
824,fe686174646f2f879c73848d3ba235cc47663579,feb,2021,heat,hold,Easley,713.477076,718.282528,718.282528,40.0,True,False,True
825,ff5369f8d3d7df70eaefc8e74d1924b0e9cd0192,feb,2021,auto,hold,Charleston,676.895833,730.000000,679.645833,10.0,False,False,True


In [67]:
SC_feb.to_csv("Scraper_Output/State_Month_Day/SC/SC_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/SC-day/2017-jun-day-SC.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
4,b71e707805e966fee06d3398d63f9604a9f0b78a,2017-06-24 15:20:00 UTC,auto,auto,702,685,635,SC,Longs,20,False,False,False,Gas
8,427a734bc6574fc7493217cdd49fbe29b7f7b519,2017-06-24 14:50:00 UTC,cool,hold,749,741,741,SC,Irmo,10,False,False,False,Gas
13,f00082b931d00b8405e9fc592f41d9f9a99d6327,2017-06-13 18:00:00 UTC,cool,auto,761,811,643,SC,Goose creek,0,False,False,False,Gas
27,b71e707805e966fee06d3398d63f9604a9f0b78a,2017-06-25 17:35:00 UTC,auto,auto,694,685,635,SC,Longs,20,False,False,False,Gas
37,b71e707805e966fee06d3398d63f9604a9f0b78a,2017-06-24 17:55:00 UTC,auto,auto,750,685,635,SC,Longs,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
385170,29b0a0cf84e448ab3dcbbda83246957b89c7c99a,2017-06-30 12:35:00 UTC,cool,auto,724,720,760,SC,Travelers Rest,5,True,False,True,Electric
385171,29b0a0cf84e448ab3dcbbda83246957b89c7c99a,2017-06-04 15:50:00 UTC,cool,hold,760,760,760,SC,Travelers Rest,5,True,False,True,Electric
385172,29b0a0cf84e448ab3dcbbda83246957b89c7c99a,2017-06-02 16:00:00 UTC,cool,auto,763,760,760,SC,Travelers Rest,5,True,False,True,Electric
385173,29b0a0cf84e448ab3dcbbda83246957b89c7c99a,2017-06-02 16:35:00 UTC,cool,auto,760,760,760,SC,Travelers Rest,5,True,False,True,Electric


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/SC/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/SC-day/2018-jun-day-SC.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b84d2472561ee83d8eae01e1f13d79656e478daf,2018-06-17 17:50:00 UTC,auto,auto,730,730,672,SC,Liberty,0,False,False,True,Electric
1,b5408b31c5aaa770c267cc4d9224f0fe7be85da8,2018-06-10 15:25:00 UTC,auto,hold,740,745,645,SC,greer,5,False,False,False,Gas
3,b84d2472561ee83d8eae01e1f13d79656e478daf,2018-06-25 18:20:00 UTC,auto,auto,730,730,672,SC,Liberty,0,False,False,True,Electric
6,b84d2472561ee83d8eae01e1f13d79656e478daf,2018-06-18 13:10:00 UTC,auto,auto,734,730,672,SC,Liberty,0,False,False,True,Electric
10,b84d2472561ee83d8eae01e1f13d79656e478daf,2018-06-30 11:15:00 UTC,auto,auto,733,730,672,SC,Liberty,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
973170,1e14759f6ed52ebd25f4d175458b8161f3f02968,2018-06-23 13:05:00 UTC,cool,hold,740,740,730,SC,North Myrtle Beach,10,True,False,True,Electric
973171,f688617a0c530e664495649220d5975cfa0ae77e,2018-06-30 10:55:00 UTC,cool,auto,747,760,730,SC,North Myrtle Beach,15,True,False,True,Electric
973172,f688617a0c530e664495649220d5975cfa0ae77e,2018-06-22 12:10:00 UTC,cool,hold,741,740,730,SC,North Myrtle Beach,15,True,False,True,Electric
973173,9219f797641dae6d7e17c29742d42e53f39a78ee,2018-06-29 14:25:00 UTC,cool,auto,786,740,740,SC,North Myrtle Beach,5,True,False,True,Electric


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/SC/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/SC-day/2019-jun-day-SC.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8705c1c782ef42c2337596d372572e33a345fa92,2019-06-30 11:45:00 UTC,cool,auto,717,720,610,SC,Liberty,30,False,False,False,Gas
1,e7caa108d4b377e746d684dca71e5e7a4dfc05dc,2019-06-15 19:10:00 UTC,cool,hold,764,795,795,SC,Mullins,0,True,False,True,Electric
3,d2a4f1852c3d4b3df58d4bbf5df07c26fe973273,2019-06-10 16:05:00 UTC,cool,auto,696,720,662,SC,Moncks corner,0,False,False,False,Gas
4,aafc6a0da72df7797952c19824c9222195b7adc2,2019-06-26 17:45:00 UTC,cool,hold,787,784,784,SC,Forest Acres,9,False,False,False,Gas
6,33572c25ce9ed19203ed52bc4f630742c06d53b4,2019-06-24 18:00:00 UTC,cool,hold,745,655,655,SC,Daufuskie Island,20,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1328285,d652e965da436cc8684607cbefc19653576252f8,2019-06-21 17:30:00 UTC,cool,hold,743,770,760,SC,North Charleston,40,False,False,True,Electric
1328286,8876b36c1f0facd23c0336e79ae5855472b35905,2019-06-28 16:15:00 UTC,cool,auto,760,760,760,SC,North Charleston,57,False,False,False,Gas
1328287,d652e965da436cc8684607cbefc19653576252f8,2019-06-15 13:50:00 UTC,cool,auto,729,760,760,SC,North Charleston,40,False,False,True,Electric
1328288,e025b236c20e35225d4387680298355a4ab15eb1,2019-06-06 15:05:00 UTC,cool,hold,735,760,760,SC,North Charleston,10,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/SC/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/SC-day/2020-jun-day-SC.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1256cd86583094ecdd244c7b9a049b174b893830,2020-06-25 17:15:00 UTC,auto,hold,727,721,641,SC,Briarcliffe Acres,20,True,False,True,Electric
1,5d62c17fc2faf932e9f7e5abe60b5a89de0b0956,2020-06-03 19:30:00 UTC,auto,hold,732,735,655,SC,Stonefield,5,False,False,False,Gas
3,810c070c5f8c3de55f0ba4cf5419f5bbd183bf20,2020-06-23 13:00:00 UTC,cool,hold,721,740,678,SC,Ware Shoals,0,True,False,True,Electric
4,e7caa108d4b377e746d684dca71e5e7a4dfc05dc,2020-06-24 14:50:00 UTC,cool,hold,777,775,775,SC,Mullins,0,True,False,True,Electric
5,7c6aa64bcc9fe28ff05e75c663ed0cac76a0e25c,2020-06-06 15:00:00 UTC,cool,hold,750,745,745,SC,Cheraw,40,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1270314,8876b36c1f0facd23c0336e79ae5855472b35905,2020-06-01 15:15:00 UTC,cool,auto,760,760,760,SC,North Charleston,57,False,False,False,Gas
1270315,8876b36c1f0facd23c0336e79ae5855472b35905,2020-06-01 11:35:00 UTC,cool,auto,765,760,760,SC,North Charleston,57,False,False,False,Gas
1270316,128d494d0786c1a0e230d0c0e329294a3eaca8bc,2020-06-28 15:55:00 UTC,cool,auto,750,760,760,SC,North Charleston,5,True,False,True,Electric
1270317,128d494d0786c1a0e230d0c0e329294a3eaca8bc,2020-06-28 19:15:00 UTC,cool,auto,761,760,760,SC,North Charleston,5,True,False,True,Electric


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/SC/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/SC-day/2021-jun-day-SC.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,50e73c399c87e75a0415e096f2cef95f17e098d8,2021-06-29 17:20:00 UTC,cool,hold,659,638,638,SC,Nichols,9,False,False,False,Gas
2,21b1c1c4d35ff318c55c4b8972f50a1b686bccff,2021-06-26 07:55:00 UTC,cool,hold,726,731,731,SC,Little River,15,True,False,True,Electric
3,5d62c17fc2faf932e9f7e5abe60b5a89de0b0956,2021-06-15 19:30:00 UTC,auto,hold,724,745,655,SC,Stonefield,5,False,False,False,Gas
4,5d62c17fc2faf932e9f7e5abe60b5a89de0b0956,2021-06-18 19:30:00 UTC,auto,hold,739,745,655,SC,Stonefield,5,False,False,False,Gas
5,0ce033a5254b9032f4060c81b15e9ba49a483fa2,2021-06-27 12:55:00 UTC,auto,hold,755,747,687,SC,Awendaw,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
813673,1d1f183d6d63a4e7ddc5f80628a2f7266bb228e0,2021-06-26 16:20:00 UTC,cool,hold,723,720,720,SC,North Myrtle Beach,5,False,False,False,Gas
813674,ca4ca1971033e783af9ec9283f06b9547e4a4f65,2021-06-02 16:40:00 UTC,cool,hold,709,720,720,SC,North Myrtle Beach,5,False,False,False,Gas
813675,1d1f183d6d63a4e7ddc5f80628a2f7266bb228e0,2021-06-25 14:45:00 UTC,cool,hold,724,720,720,SC,North Myrtle Beach,5,False,False,False,Gas
813676,1e14759f6ed52ebd25f4d175458b8161f3f02968,2021-06-27 14:45:00 UTC,cool,hold,735,730,730,SC,North Myrtle Beach,10,True,False,True,Electric


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/SC/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/SC/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
SC_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/SC/jun/" + file)
    SC_jun = pd.concat([SC_jun, df])
    
SC_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00f84a3c7f1096a53cbe252d757eadf37f79631a,jun,2017,cool,auto,Mount Pleasant,801.039750,812.367973,642.862010,0.0,False,False,True
1,00f84a3c7f1096a53cbe252d757eadf37f79631a,jun,2017,cool,hold,Mount Pleasant,744.870707,747.521212,747.262626,0.0,False,False,True
2,03bef4aae91c5ff7d053e64e8b9ecbb87b716259,jun,2017,cool,auto,Mount Pleasant,821.406746,820.000000,640.000000,5.0,False,False,False
3,044b7bf990f4b3e898d8dd96c1a4a4dbfcc02a34,jun,2017,auto,auto,Charleston,750.611686,789.710286,650.213634,6.0,False,False,False
4,04e03135f1ece7f646e2553fa92b2979ab79c7d3,jun,2017,auto,auto,Fort Mill,698.600559,699.913408,645.678771,0.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
776,fe686174646f2f879c73848d3ba235cc47663579,jun,2021,cool,hold,Easley,727.077906,737.147900,728.926354,40.0,True,False,True
777,fe97495c3aa5c64e64f6a187430256c7d05391dd,jun,2021,cool,hold,Greenville,691.950617,693.777778,693.469136,60.0,True,False,True
778,fedeb36160778da45a6a9d87c447ab79d6773221,jun,2021,cool,hold,Moore,701.533960,702.386561,702.051301,5.0,False,False,True
779,ff5369f8d3d7df70eaefc8e74d1924b0e9cd0192,jun,2021,auto,hold,Charleston,701.698113,697.547170,647.433962,10.0,False,False,True


In [100]:
SC_jun.to_csv("Scraper_Output/State_Month_Day/SC/SC_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/SC-day/2017-jul-day-SC.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,b71e707805e966fee06d3398d63f9604a9f0b78a,2017-07-01 17:50:00 UTC,auto,auto,708,685,635,SC,Longs,20,False,False,False,Gas
2,b71e707805e966fee06d3398d63f9604a9f0b78a,2017-07-01 11:05:00 UTC,auto,auto,686,685,635,SC,Longs,20,False,False,False,Gas
3,ebfc2830e310ec6af04bba75d5234ae6e4b61578,2017-07-21 19:15:00 UTC,auto,hold,751,745,645,SC,Abbeville,17,False,False,False,Gas
5,34445d99ca41f06d2cb40df52f9d67bf2750dc2e,2017-07-01 14:50:00 UTC,cool,hold,763,818,682,SC,Elloree,90,False,False,False,Gas
6,b71e707805e966fee06d3398d63f9604a9f0b78a,2017-07-02 18:55:00 UTC,auto,auto,766,685,635,SC,Longs,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
473359,9219f797641dae6d7e17c29742d42e53f39a78ee,2017-07-17 15:25:00 UTC,cool,hold,737,740,740,SC,North Myrtle Beach,5,True,False,True,Electric
473360,9219f797641dae6d7e17c29742d42e53f39a78ee,2017-07-14 18:20:00 UTC,cool,hold,741,740,740,SC,North Myrtle Beach,5,True,False,True,Electric
473361,9219f797641dae6d7e17c29742d42e53f39a78ee,2017-07-17 16:50:00 UTC,cool,hold,744,740,740,SC,North Myrtle Beach,5,True,False,True,Electric
473362,9219f797641dae6d7e17c29742d42e53f39a78ee,2017-07-14 16:25:00 UTC,cool,hold,744,740,740,SC,North Myrtle Beach,5,True,False,True,Electric


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/SC/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/SC-day/2018-jul-day-SC.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,b84d2472561ee83d8eae01e1f13d79656e478daf,2018-07-04 16:20:00 UTC,auto,auto,731,730,672,SC,Liberty,0,False,False,True,Electric
2,b84d2472561ee83d8eae01e1f13d79656e478daf,2018-07-01 11:25:00 UTC,auto,auto,734,730,672,SC,Liberty,0,False,False,True,Electric
3,1510049e5406d48aadda4afdca54c8165abedc15,2018-07-18 14:50:00 UTC,auto,hold,744,710,610,SC,Leesville,0,True,False,True,Electric
4,2e482754513032c218de994cd25cc6228584c852,2018-07-26 14:50:00 UTC,cool,hold,725,723,723,SC,Gaffney,10,True,False,True,Electric
5,b84d2472561ee83d8eae01e1f13d79656e478daf,2018-07-06 15:10:00 UTC,auto,auto,730,730,672,SC,Liberty,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1075609,f688617a0c530e664495649220d5975cfa0ae77e,2018-07-01 15:05:00 UTC,cool,auto,753,740,730,SC,North Myrtle Beach,15,True,False,True,Electric
1075610,f688617a0c530e664495649220d5975cfa0ae77e,2018-07-21 17:30:00 UTC,cool,hold,750,760,730,SC,North Myrtle Beach,15,True,False,True,Electric
1075611,1e14759f6ed52ebd25f4d175458b8161f3f02968,2018-07-20 19:00:00 UTC,cool,auto,762,760,730,SC,North Myrtle Beach,10,True,False,True,Electric
1075612,f688617a0c530e664495649220d5975cfa0ae77e,2018-07-28 17:35:00 UTC,cool,auto,742,740,730,SC,North Myrtle Beach,15,True,False,True,Electric


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/SC/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/SC-day/2019-jul-day-SC.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,e7caa108d4b377e746d684dca71e5e7a4dfc05dc,2019-07-08 14:55:00 UTC,cool,hold,792,795,795,SC,Mullins,0,True,False,True,Electric
3,37791c5a6328f44b889b366c2b115030388d80d9,2019-07-21 19:00:00 UTC,cool,hold,751,745,745,SC,Winnsboro,9,True,False,True,Electric
4,d2a4f1852c3d4b3df58d4bbf5df07c26fe973273,2019-07-21 18:20:00 UTC,cool,hold,733,732,732,SC,Moncks corner,0,False,False,False,Gas
5,d2a4f1852c3d4b3df58d4bbf5df07c26fe973273,2019-07-10 11:55:00 UTC,cool,hold,713,732,732,SC,Moncks corner,0,False,False,False,Gas
6,e7caa108d4b377e746d684dca71e5e7a4dfc05dc,2019-07-24 17:10:00 UTC,cool,hold,776,795,795,SC,Mullins,0,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1407617,1442b14c747ad216239ab8cef48d6f035cb890f0,2019-07-25 13:55:00 UTC,cool,auto,758,760,760,SC,North Myrtle Beach,19,True,False,True,Electric
1407618,1442b14c747ad216239ab8cef48d6f035cb890f0,2019-07-24 11:00:00 UTC,cool,auto,748,760,760,SC,North Myrtle Beach,19,True,False,True,Electric
1407619,1442b14c747ad216239ab8cef48d6f035cb890f0,2019-07-24 17:00:00 UTC,cool,auto,760,760,760,SC,North Myrtle Beach,19,True,False,True,Electric
1407620,1442b14c747ad216239ab8cef48d6f035cb890f0,2019-07-25 14:45:00 UTC,cool,auto,762,760,760,SC,North Myrtle Beach,19,True,False,True,Electric


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/SC/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/SC-day/2020-jul-day-SC.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b84d2472561ee83d8eae01e1f13d79656e478daf,2020-07-13 11:35:00 UTC,auto,hold,733,732,662,SC,Liberty,0,False,False,True,Electric
1,87e431301fab531093b6ebcbaaa3505a17bc6e15,2020-07-08 12:25:00 UTC,auto,hold,689,685,615,SC,Lyman,0,False,False,False,Gas
2,e7caa108d4b377e746d684dca71e5e7a4dfc05dc,2020-07-07 16:15:00 UTC,cool,hold,781,785,785,SC,Mullins,0,True,False,True,Electric
4,810c070c5f8c3de55f0ba4cf5419f5bbd183bf20,2020-07-12 18:50:00 UTC,cool,hold,770,760,678,SC,Ware Shoals,0,True,False,True,Electric
5,e7caa108d4b377e746d684dca71e5e7a4dfc05dc,2020-07-04 13:35:00 UTC,cool,hold,751,755,755,SC,Mullins,0,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1285208,ca4ca1971033e783af9ec9283f06b9547e4a4f65,2020-07-13 15:45:00 UTC,cool,auto,730,730,730,SC,North Myrtle Beach,5,False,False,False,Gas
1285209,ca4ca1971033e783af9ec9283f06b9547e4a4f65,2020-07-15 18:05:00 UTC,cool,hold,731,730,730,SC,North Myrtle Beach,5,False,False,False,Gas
1285210,ca4ca1971033e783af9ec9283f06b9547e4a4f65,2020-07-05 14:55:00 UTC,cool,auto,724,720,730,SC,North Myrtle Beach,5,False,False,False,Gas
1285211,ca4ca1971033e783af9ec9283f06b9547e4a4f65,2020-07-08 11:40:00 UTC,cool,auto,718,720,730,SC,North Myrtle Beach,5,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/SC/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/SC-day/2021-jul-day-SC.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,50e73c399c87e75a0415e096f2cef95f17e098d8,2021-07-02 16:30:00 UTC,cool,hold,680,668,668,SC,Nichols,9,False,False,False,Gas
2,2bb0f0613c810a4e9c36015cc8b85aa70fd0a4c9,2021-07-26 15:45:00 UTC,auto,hold,747,730,678,SC,Hartsville,15,True,False,True,Electric
4,50e73c399c87e75a0415e096f2cef95f17e098d8,2021-07-09 13:45:00 UTC,cool,hold,672,668,668,SC,Nichols,9,False,False,False,Gas
5,50e73c399c87e75a0415e096f2cef95f17e098d8,2021-07-18 17:45:00 UTC,cool,hold,657,638,638,SC,Nichols,9,False,False,False,Gas
6,50e73c399c87e75a0415e096f2cef95f17e098d8,2021-07-08 14:45:00 UTC,cool,hold,672,668,668,SC,Nichols,9,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
800085,c6e5a59e87f4068be38d1d16ce14d81a9edd0114,2021-07-28 13:35:00 UTC,cool,hold,753,760,760,SC,North Charleston,60,True,False,True,Electric
800086,8876b36c1f0facd23c0336e79ae5855472b35905,2021-07-17 16:50:00 UTC,cool,hold,768,760,760,SC,North Charleston,57,False,False,False,Gas
800087,c6e5a59e87f4068be38d1d16ce14d81a9edd0114,2021-07-28 19:10:00 UTC,cool,hold,759,760,760,SC,North Charleston,60,True,False,True,Electric
800088,8876b36c1f0facd23c0336e79ae5855472b35905,2021-07-24 16:15:00 UTC,cool,hold,758,760,760,SC,North Charleston,57,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/SC/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/SC/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
SC_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/SC/jul/" + file)
    SC_jul = pd.concat([SC_jul, df])
    
SC_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00f84a3c7f1096a53cbe252d757eadf37f79631a,jul,2017,cool,auto,Mount Pleasant,814.916925,822.013932,648.785346,0.0,False,False,True
1,00f84a3c7f1096a53cbe252d757eadf37f79631a,jul,2017,cool,hold,Mount Pleasant,741.814136,739.905759,739.654450,0.0,False,False,True
2,02b28369b0d5e758bcdd07a5a1b653c80ac5bf74,jul,2017,cool,hold,Elgin,789.333333,790.000000,790.000000,15.0,True,False,True
3,03bef4aae91c5ff7d053e64e8b9ecbb87b716259,jul,2017,cool,auto,Mount Pleasant,826.900615,820.000000,640.000000,5.0,False,False,False
4,044b7bf990f4b3e898d8dd96c1a4a4dbfcc02a34,jul,2017,auto,auto,Charleston,750.215728,789.778959,650.163656,6.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
759,fe686174646f2f879c73848d3ba235cc47663579,jul,2021,cool,hold,Easley,725.701425,734.211140,730.882772,40.0,True,False,True
760,fedeb36160778da45a6a9d87c447ab79d6773221,jul,2021,auto,hold,Moore,719.684564,726.265101,676.446309,5.0,False,False,True
761,fedeb36160778da45a6a9d87c447ab79d6773221,jul,2021,cool,hold,Moore,707.334755,706.942431,706.613362,5.0,False,False,True
762,fedeb36160778da45a6a9d87c447ab79d6773221,jul,2021,heat,hold,Moore,691.000000,730.000000,730.000000,5.0,False,False,True


In [133]:
SC_jul.to_csv("Scraper_Output/State_Month_Day/SC/SC_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/SC-day/2017-aug-day-SC.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,a9aa583c00d764f67661d84680bd8578850346ac,2017-08-05 17:25:00 UTC,cool,auto,743,756,752,SC,Irmo,10,True,False,True,Electric
3,034f7acc5406d2acf9aefbf56054f175989922e5,2017-08-21 19:45:00 UTC,cool,hold,742,747,747,SC,Seneca,5,False,False,True,Electric
4,034f7acc5406d2acf9aefbf56054f175989922e5,2017-08-20 17:00:00 UTC,cool,hold,747,742,742,SC,Seneca,5,False,False,True,Electric
5,b71e707805e966fee06d3398d63f9604a9f0b78a,2017-08-20 16:55:00 UTC,auto,auto,685,665,615,SC,Longs,20,False,False,False,Gas
6,b71e707805e966fee06d3398d63f9604a9f0b78a,2017-08-26 19:40:00 UTC,auto,auto,694,665,615,SC,Longs,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
518284,d4f7172a56a1d4c111af76f7152405cb3c9deaad,2017-08-11 12:30:00 UTC,cool,auto,736,730,730,SC,North Myrtle Beach,17,False,False,True,Electric
518285,d4f7172a56a1d4c111af76f7152405cb3c9deaad,2017-08-11 12:10:00 UTC,cool,auto,731,730,730,SC,North Myrtle Beach,17,False,False,True,Electric
518286,d4f7172a56a1d4c111af76f7152405cb3c9deaad,2017-08-21 16:00:00 UTC,cool,auto,734,710,730,SC,North Myrtle Beach,17,False,False,True,Electric
518287,d4f7172a56a1d4c111af76f7152405cb3c9deaad,2017-08-11 15:20:00 UTC,cool,auto,713,730,730,SC,North Myrtle Beach,17,False,False,True,Electric


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/SC/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/SC-day/2018-aug-day-SC.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8e23c84d4572303c4c0f4d9ed9c6a3bfa88252e5,2018-08-28 12:15:00 UTC,cool,hold,717,718,718,SC,Georgetown,10,True,False,True,Electric
1,1510049e5406d48aadda4afdca54c8165abedc15,2018-08-16 16:50:00 UTC,auto,hold,755,742,692,SC,Leesville,0,True,False,True,Electric
2,9908ac3a885821ee31a68ca6f738189b7864395e,2018-08-19 12:00:00 UTC,cool,hold,750,752,752,SC,Forest Acres,20,False,False,True,Electric
3,1510049e5406d48aadda4afdca54c8165abedc15,2018-08-16 11:25:00 UTC,auto,hold,745,742,692,SC,Leesville,0,True,False,True,Electric
4,351eaaf3750eb411d9a11090baf16240dd63f8fe,2018-08-13 11:15:00 UTC,cool,auto,662,714,669,SC,Williamston,5,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1096299,128d494d0786c1a0e230d0c0e329294a3eaca8bc,2018-08-27 17:50:00 UTC,cool,auto,757,760,760,SC,North Charleston,5,True,False,True,Electric
1096300,e025b236c20e35225d4387680298355a4ab15eb1,2018-08-22 17:10:00 UTC,cool,hold,745,760,760,SC,North Charleston,10,False,False,False,Gas
1096301,8876b36c1f0facd23c0336e79ae5855472b35905,2018-08-20 16:25:00 UTC,cool,auto,760,760,760,SC,North Charleston,57,False,False,False,Gas
1096302,b0de5793f4495b9b8620e73c5351dfb7b0deb160,2018-08-29 19:15:00 UTC,cool,hold,765,760,760,SC,Hilton Head Island,0,True,False,True,Electric


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/SC/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/SC-day/2019-aug-day-SC.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bda07a019ad8664335ea15aeb7bf2d89a1b7745c,2019-08-25 14:50:00 UTC,auto,hold,776,780,610,SC,Edisto Beach,19,True,False,True,Electric
1,8929c0c5f09a398e0d53aa8f36d965089e27c6f8,2019-08-27 13:30:00 UTC,cool,auto,777,770,658,SC,Greenwood,45,True,False,False,Gas
2,bda07a019ad8664335ea15aeb7bf2d89a1b7745c,2019-08-22 19:00:00 UTC,auto,hold,778,780,610,SC,Edisto Beach,19,True,False,True,Electric
3,d2a4f1852c3d4b3df58d4bbf5df07c26fe973273,2019-08-16 18:00:00 UTC,cool,hold,742,732,732,SC,Moncks corner,0,False,False,False,Gas
5,bda07a019ad8664335ea15aeb7bf2d89a1b7745c,2019-08-21 18:10:00 UTC,auto,hold,779,780,610,SC,Edisto Beach,19,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1421132,8876b36c1f0facd23c0336e79ae5855472b35905,2019-08-23 14:05:00 UTC,cool,auto,768,760,760,SC,North Charleston,57,False,False,False,Gas
1421133,8876b36c1f0facd23c0336e79ae5855472b35905,2019-08-10 16:15:00 UTC,cool,auto,770,760,760,SC,North Charleston,57,False,False,False,Gas
1421134,8876b36c1f0facd23c0336e79ae5855472b35905,2019-08-22 15:15:00 UTC,cool,auto,771,760,760,SC,North Charleston,57,False,False,False,Gas
1421135,1442b14c747ad216239ab8cef48d6f035cb890f0,2019-08-30 19:25:00 UTC,cool,hold,765,760,760,SC,North Myrtle Beach,19,True,False,True,Electric


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/SC/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/SC-day/2020-aug-day-SC.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b84d2472561ee83d8eae01e1f13d79656e478daf,2020-08-14 16:30:00 UTC,auto,hold,738,732,662,SC,Liberty,0,False,False,True,Electric
1,87e431301fab531093b6ebcbaaa3505a17bc6e15,2020-08-10 17:40:00 UTC,auto,hold,695,685,615,SC,Lyman,0,False,False,False,Gas
2,66d46d8dedc5a4d2e2d1b3c16bab0a11b3142a13,2020-08-09 11:15:00 UTC,auto,hold,721,722,672,SC,belton,10,True,False,True,Electric
3,b84d2472561ee83d8eae01e1f13d79656e478daf,2020-08-19 16:45:00 UTC,auto,hold,728,722,662,SC,Liberty,0,False,False,True,Electric
4,5d62c17fc2faf932e9f7e5abe60b5a89de0b0956,2020-08-08 16:30:00 UTC,auto,hold,744,745,655,SC,Stonefield,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1289977,1d1f183d6d63a4e7ddc5f80628a2f7266bb228e0,2020-08-14 17:40:00 UTC,cool,hold,741,740,740,SC,North Myrtle Beach,5,False,False,False,Gas
1289978,1d1f183d6d63a4e7ddc5f80628a2f7266bb228e0,2020-08-26 17:25:00 UTC,cool,hold,741,740,740,SC,North Myrtle Beach,5,False,False,False,Gas
1289979,ca4ca1971033e783af9ec9283f06b9547e4a4f65,2020-08-04 18:30:00 UTC,cool,auto,734,730,740,SC,North Myrtle Beach,5,False,False,False,Gas
1289980,ca4ca1971033e783af9ec9283f06b9547e4a4f65,2020-08-05 10:40:00 UTC,cool,auto,721,720,740,SC,North Myrtle Beach,5,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/SC/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/SC/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
SC_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/SC/aug/" + file)
    SC_aug = pd.concat([SC_aug, df])
    
SC_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0011fb83dfa5644ad133058b0540e2af72598827,aug,2017,cool,auto,Hanahan,772.196721,799.803279,779.934426,10.0,True,False,True
1,0011fb83dfa5644ad133058b0540e2af72598827,aug,2017,cool,hold,Hanahan,769.545288,784.017841,780.357731,10.0,True,False,True
2,00f84a3c7f1096a53cbe252d757eadf37f79631a,aug,2017,cool,auto,Mount Pleasant,805.408812,829.966537,640.045733,0.0,False,False,True
3,00f84a3c7f1096a53cbe252d757eadf37f79631a,aug,2017,cool,hold,Mount Pleasant,749.521645,754.463203,748.077922,0.0,False,False,True
4,034f7acc5406d2acf9aefbf56054f175989922e5,aug,2017,cool,auto,Seneca,757.000000,750.500000,690.000000,5.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1327,fe686174646f2f879c73848d3ba235cc47663579,aug,2020,cool,hold,Easley,727.787716,736.078664,733.403017,40.0,True,False,True
1328,fe97495c3aa5c64e64f6a187430256c7d05391dd,aug,2020,auto,auto,Greenville,736.570175,730.000000,680.000000,60.0,True,False,True
1329,ff5369f8d3d7df70eaefc8e74d1924b0e9cd0192,aug,2020,auto,auto,Charleston,711.920000,713.640000,643.733333,10.0,False,False,True
1330,ff5369f8d3d7df70eaefc8e74d1924b0e9cd0192,aug,2020,auto,hold,Charleston,720.622449,708.367347,644.642857,10.0,False,False,True


In [160]:
SC_aug.to_csv("Scraper_Output/State_Month_Day/SC/SC_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/SC-day/2017-dec-day-SC.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8e23c84d4572303c4c0f4d9ed9c6a3bfa88252e5,2017-12-30 14:50:00 UTC,heat,auto,709,758,718,SC,Georgetown,10,True,False,True,Electric
2,86021749dea646954a372d0644196cfdd48ed3ce,2017-12-29 18:20:00 UTC,auto,auto,762,815,765,SC,Piedmont,37,False,False,False,Gas
4,86021749dea646954a372d0644196cfdd48ed3ce,2017-12-29 17:15:00 UTC,auto,auto,769,815,765,SC,Piedmont,37,False,False,False,Gas
5,8e23c84d4572303c4c0f4d9ed9c6a3bfa88252e5,2017-12-31 15:50:00 UTC,heat,hold,698,696,696,SC,Georgetown,10,True,False,True,Electric
10,86021749dea646954a372d0644196cfdd48ed3ce,2017-12-29 18:50:00 UTC,auto,auto,760,815,765,SC,Piedmont,37,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
630028,70255f854c31d52aea23eca924d0a84e6a9ec13c,2017-12-29 19:20:00 UTC,auto,auto,755,790,760,SC,Boiling Springs,15,False,False,False,Gas
630029,70255f854c31d52aea23eca924d0a84e6a9ec13c,2017-12-29 17:05:00 UTC,auto,auto,766,790,760,SC,Boiling Springs,15,False,False,False,Gas
630030,70255f854c31d52aea23eca924d0a84e6a9ec13c,2017-12-27 16:10:00 UTC,auto,auto,751,790,760,SC,Boiling Springs,15,False,False,False,Gas
630031,70255f854c31d52aea23eca924d0a84e6a9ec13c,2017-12-27 19:15:00 UTC,auto,auto,761,790,760,SC,Boiling Springs,15,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/SC/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/SC-day/2018-dec-day-SC.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,7fa3e2b98ecea862f6677c34e2789fdde2fa43f5,2018-12-28 12:35:00 UTC,auto,hold,615,800,610,SC,Townville,7,False,False,True,Electric
2,8929c0c5f09a398e0d53aa8f36d965089e27c6f8,2018-12-23 17:25:00 UTC,heat,auto,695,848,628,SC,Greenwood,45,True,False,False,Gas
8,810c070c5f8c3de55f0ba4cf5419f5bbd183bf20,2018-12-17 15:55:00 UTC,heat,auto,666,649,610,SC,Ware Shoals,0,True,False,True,Electric
9,4e20054a8ac679adb887307262eff272eb9ad729,2018-12-08 15:30:00 UTC,heat,hold,610,650,610,SC,Hodges,0,False,False,True,Electric
15,4e20054a8ac679adb887307262eff272eb9ad729,2018-12-10 13:45:00 UTC,heat,hold,613,650,610,SC,Hodges,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1046299,9219f797641dae6d7e17c29742d42e53f39a78ee,2018-12-28 17:55:00 UTC,heat,hold,734,730,730,SC,North Myrtle Beach,5,True,False,True,Electric
1046300,9219f797641dae6d7e17c29742d42e53f39a78ee,2018-12-17 15:50:00 UTC,heat,hold,726,730,730,SC,North Myrtle Beach,5,True,False,True,Electric
1046301,1e14759f6ed52ebd25f4d175458b8161f3f02968,2018-12-01 12:45:00 UTC,heat,hold,730,730,730,SC,North Myrtle Beach,10,True,False,True,Electric
1046302,9219f797641dae6d7e17c29742d42e53f39a78ee,2018-12-17 19:20:00 UTC,heat,hold,729,730,730,SC,North Myrtle Beach,5,True,False,True,Electric


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/SC/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/SC-day/2019-dec-day-SC.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,581600a4b97ded6ce5a4732f5d59534796e12330,2019-12-22 11:30:00 UTC,heat,hold,625,650,610,SC,Elloree,90,False,False,False,Gas
2,581600a4b97ded6ce5a4732f5d59534796e12330,2019-12-21 10:05:00 UTC,heat,auto,619,650,610,SC,Elloree,90,False,False,False,Gas
4,18de35ed8732f95fdcc60b01eb1dfaec1e51f2fb,2019-12-31 12:50:00 UTC,heat,hold,637,650,628,SC,Charlestonp,0,True,False,True,Electric
6,810c070c5f8c3de55f0ba4cf5419f5bbd183bf20,2019-12-07 12:10:00 UTC,heat,hold,644,650,648,SC,Ware Shoals,0,True,False,True,Electric
7,bbed2be4e33f80f296bcf163de1b30f5dd75994b,2019-12-07 18:30:00 UTC,auto,hold,696,693,643,SC,Greenwood,49,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1150164,17dc39ba30ca6365c4dbef2c035ae087dad561b4,2019-12-18 19:05:00 UTC,auto,hold,760,810,760,SC,Summerville,17,False,False,False,Gas
1150165,17dc39ba30ca6365c4dbef2c035ae087dad561b4,2019-12-11 11:30:00 UTC,auto,hold,759,810,760,SC,Summerville,17,False,False,False,Gas
1150166,17dc39ba30ca6365c4dbef2c035ae087dad561b4,2019-12-27 19:40:00 UTC,auto,hold,766,810,760,SC,Summerville,17,False,False,False,Gas
1150167,17dc39ba30ca6365c4dbef2c035ae087dad561b4,2019-12-17 14:55:00 UTC,auto,hold,757,810,760,SC,Summerville,17,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/SC/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/SC-day/2020-dec-day-SC.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,108d1e24a1b0a2db05f4a7275a9e4bfd7c53a1b2,2020-12-25 16:15:00 UTC,auto,hold,620,806,643,SC,Salem,10,False,False,False,Gas
1,fde66b501061e0ca119e16e5b82e943588156f93,2020-12-04 16:20:00 UTC,heat,hold,673,650,645,SC,Fountain Inn,0,True,False,False,Gas
2,50e73c399c87e75a0415e096f2cef95f17e098d8,2020-12-18 07:35:00 UTC,heat,hold,657,658,658,SC,Nichols,9,False,False,False,Gas
3,bbed2be4e33f80f296bcf163de1b30f5dd75994b,2020-12-08 15:05:00 UTC,auto,hold,672,724,674,SC,Greenwood,49,True,False,False,Gas
4,0dc1cbe8afc966c9c35f4e92d16d98919fc119ae,2020-12-26 19:05:00 UTC,heat,hold,742,756,756,SC,Orangeburg,30,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1033719,1e14759f6ed52ebd25f4d175458b8161f3f02968,2020-12-17 17:50:00 UTC,auto,auto,724,770,730,SC,North Myrtle Beach,10,True,False,True,Electric
1033720,1e14759f6ed52ebd25f4d175458b8161f3f02968,2020-12-10 19:50:00 UTC,auto,auto,731,770,730,SC,North Myrtle Beach,10,True,False,True,Electric
1033721,1e14759f6ed52ebd25f4d175458b8161f3f02968,2020-12-03 16:55:00 UTC,auto,auto,729,770,730,SC,North Myrtle Beach,10,True,False,True,Electric
1033722,1e14759f6ed52ebd25f4d175458b8161f3f02968,2020-12-10 15:20:00 UTC,auto,auto,725,770,730,SC,North Myrtle Beach,10,True,False,True,Electric


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/SC/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/SC/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
SC_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/SC/dec/" + file)
    SC_dec = pd.concat([SC_dec, df])
    
SC_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0000ac445144d0c6debe6d9952ead47342bdd9f7,dec,2017,auto,auto,York,680.571429,799.142857,678.571429,15.0,False,False,False
1,0000ac445144d0c6debe6d9952ead47342bdd9f7,dec,2017,auto,hold,York,690.615385,798.692308,688.000000,15.0,False,False,False
2,0000ac445144d0c6debe6d9952ead47342bdd9f7,dec,2017,heat,auto,York,688.187500,712.500000,690.000000,15.0,False,False,False
3,0000ac445144d0c6debe6d9952ead47342bdd9f7,dec,2017,heat,hold,York,688.653333,689.573333,689.300000,15.0,False,False,False
4,00f84a3c7f1096a53cbe252d757eadf37f79631a,dec,2017,auto,auto,Mount Pleasant,644.361878,830.000000,634.339779,0.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1581,fe686174646f2f879c73848d3ba235cc47663579,dec,2020,heat,auto,Easley,703.968085,710.063830,710.063830,40.0,True,False,True
1582,fe686174646f2f879c73848d3ba235cc47663579,dec,2020,heat,hold,Easley,714.278298,720.402553,720.402553,40.0,True,False,True
1583,ff5369f8d3d7df70eaefc8e74d1924b0e9cd0192,dec,2020,auto,hold,Charleston,639.744186,730.000000,640.000000,10.0,False,False,True
1584,ffb6b508c8f15eab92611a5d86947fece7ae5440,dec,2020,auto,auto,Summerville,704.171196,780.683424,706.096467,0.0,False,False,False


In [187]:
SC_dec.to_csv("Scraper_Output/State_Month_Day/SC/SC_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/SC/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
SC_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/SC/" + file)
    SC_all = pd.concat([SC_all, df])
    
SC_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0011fb83dfa5644ad133058b0540e2af72598827,aug,2017,cool,auto,Hanahan,772.196721,799.803279,779.934426,10.0,True,False,True
1,0011fb83dfa5644ad133058b0540e2af72598827,aug,2017,cool,hold,Hanahan,769.545288,784.017841,780.357731,10.0,True,False,True
2,00f84a3c7f1096a53cbe252d757eadf37f79631a,aug,2017,cool,auto,Mount Pleasant,805.408812,829.966537,640.045733,0.0,False,False,True
3,00f84a3c7f1096a53cbe252d757eadf37f79631a,aug,2017,cool,hold,Mount Pleasant,749.521645,754.463203,748.077922,0.0,False,False,True
4,034f7acc5406d2acf9aefbf56054f175989922e5,aug,2017,cool,auto,Seneca,757.000000,750.500000,690.000000,5.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5452,fe686174646f2f879c73848d3ba235cc47663579,jun,2021,cool,hold,Easley,727.077906,737.147900,728.926354,40.0,True,False,True
5453,fe97495c3aa5c64e64f6a187430256c7d05391dd,jun,2021,cool,hold,Greenville,691.950617,693.777778,693.469136,60.0,True,False,True
5454,fedeb36160778da45a6a9d87c447ab79d6773221,jun,2021,cool,hold,Moore,701.533960,702.386561,702.051301,5.0,False,False,True
5455,ff5369f8d3d7df70eaefc8e74d1924b0e9cd0192,jun,2021,auto,hold,Charleston,701.698113,697.547170,647.433962,10.0,False,False,True


In [190]:
SC_all.to_csv("Scraper_Output/State_Month_Day/SC_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mSCe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['SC']
Unique jan_2018: ['SC']
Unique jan_2019: ['SC']
Unique jan_2020: ['SC']
Unique jan_2021: ['SC']
Unique feb_2017: ['SC']
Unique feb_2018: ['SC']
Unique feb_2019: ['SC']
Unique feb_2020: ['SC']
Unique feb_2021: ['SC']
Unique jun_2017: ['SC']
Unique jun_2018: ['SC']
Unique jun_2019: ['SC']
Unique jun_2020: ['SC']
Unique jun_2021: ['SC']
Unique jul_2017: ['SC']
Unique jul_2018: ['SC']
Unique jul_2019: ['SC']
Unique jul_2020: ['SC']
Unique jul_2021: ['SC']
Unique aug_2017: ['SC']
Unique aug_2018: ['SC']
Unique aug_2019: ['SC']
Unique aug_2020: ['SC']
Unique dec_2017: ['SC']
Unique dec_2018: ['SC']
Unique dec_2019: ['SC']
Unique dec_2020: ['SC']
