# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/LA-day/2017-jan-day-LA.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c860132b3fec8cb13c5f596228150dc21955ca1f,2017-01-23 12:00:00 UTC,cool,auto,710,718,706,LA,New Orleans,116,False,False,False,Gas
1,c860132b3fec8cb13c5f596228150dc21955ca1f,2017-01-31 12:05:00 UTC,auto,auto,633,745,693,LA,New Orleans,116,False,False,False,Gas
2,c860132b3fec8cb13c5f596228150dc21955ca1f,2017-01-18 13:50:00 UTC,cool,auto,710,719,706,LA,New Orleans,116,False,False,False,Gas
3,8fa13e7e50301f8a651baabde86f373451ec5ce5,2017-01-09 13:40:00 UTC,heat,auto,646,702,702,LA,New Orleans,110,True,False,False,Gas
4,8fa13e7e50301f8a651baabde86f373451ec5ce5,2017-01-28 12:30:00 UTC,heat,hold,645,650,650,LA,New Orleans,110,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87553,438ad9028fceaf017e96c2e00bd8ad26955e7ed1,2017-01-13 12:05:00 UTC,auto,hold,712,765,715,LA,New Orleans,105,False,False,False,Gas
87554,438ad9028fceaf017e96c2e00bd8ad26955e7ed1,2017-01-21 13:00:00 UTC,auto,hold,746,765,715,LA,New Orleans,105,False,False,False,Gas
87555,438ad9028fceaf017e96c2e00bd8ad26955e7ed1,2017-01-08 19:50:00 UTC,auto,hold,683,765,715,LA,New Orleans,105,False,False,False,Gas
87556,438ad9028fceaf017e96c2e00bd8ad26955e7ed1,2017-01-06 12:40:00 UTC,auto,hold,675,800,730,LA,New Orleans,105,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,Jan,2017,auto,auto,Pearl River,715.264331,755.300955,703.503185,5.0,False,False,False
052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,Jan,2017,auto,hold,Pearl River,706.764706,748.305882,697.552941,5.0,False,False,False
052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,Jan,2017,cool,auto,Pearl River,724.530159,739.060317,719.879365,5.0,False,False,False
052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,Jan,2017,cool,hold,Pearl River,718.092784,717.484536,715.783505,5.0,False,False,False
052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,Jan,2017,heat,auto,Pearl River,718.740458,737.858779,718.889313,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
febbf001775f53721f273a9d945abe91195ba8e7,Jan,2017,auto,auto,Kenner,704.885714,744.685714,714.685714,35.0,False,False,False
febbf001775f53721f273a9d945abe91195ba8e7,Jan,2017,auto,hold,Kenner,696.186667,756.573333,723.533333,35.0,False,False,False
febbf001775f53721f273a9d945abe91195ba8e7,Jan,2017,cool,hold,Kenner,722.812500,712.145833,711.916667,35.0,False,False,False
febbf001775f53721f273a9d945abe91195ba8e7,Jan,2017,heat,auto,Kenner,700.000000,720.000000,720.000000,35.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/LA/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/LA-day/2018-jan-day-LA.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,84f63f709a4ff07fc5643dd033d4b177b9fc6057,2018-01-12 19:05:00 UTC,heat,hold,688,688,679,LA,Deridder,77,False,False,False,Gas
1,8fa13e7e50301f8a651baabde86f373451ec5ce5,2018-01-18 16:10:00 UTC,heat,auto,640,650,634,LA,New Orleans,110,True,False,False,Gas
2,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2018-01-25 12:30:00 UTC,auto,hold,723,775,725,LA,New Orleans,117,False,False,False,Gas
3,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2018-01-25 15:20:00 UTC,auto,hold,718,775,725,LA,New Orleans,117,False,False,False,Gas
4,48e85da341cc609461e45d9aeda5a049dd62dd32,2018-01-28 17:20:00 UTC,heat,auto,699,686,682,LA,Denham Springs,28,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
317656,317355f77acab7c643125a0ad60a681803675b66,2018-01-05 15:30:00 UTC,auto,auto,733,790,740,LA,New Orleans,120,False,False,False,Gas
317657,317355f77acab7c643125a0ad60a681803675b66,2018-01-03 17:35:00 UTC,auto,auto,739,790,740,LA,New Orleans,120,False,False,False,Gas
317658,317355f77acab7c643125a0ad60a681803675b66,2018-01-04 19:30:00 UTC,auto,auto,739,790,740,LA,New Orleans,120,False,False,False,Gas
317659,317355f77acab7c643125a0ad60a681803675b66,2018-01-04 14:20:00 UTC,auto,auto,727,790,740,LA,New Orleans,120,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/LA/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/LA-day/2019-jan-day-LA.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2019-01-05 14:45:00 UTC,auto,hold,710,783,713,LA,New Orleans,117,False,False,False,Gas
1,8fa13e7e50301f8a651baabde86f373451ec5ce5,2019-01-21 18:55:00 UTC,heat,hold,700,760,682,LA,New Orleans,110,True,False,False,Gas
3,8fa13e7e50301f8a651baabde86f373451ec5ce5,2019-01-24 18:30:00 UTC,heat,hold,680,760,661,LA,New Orleans,110,True,False,False,Gas
5,dfb11e6c1c058534612767a680637a9e2427b605,2019-01-20 17:15:00 UTC,auto,hold,642,750,664,LA,Baton Rouge,28,False,False,False,Gas
6,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2019-01-06 13:45:00 UTC,auto,hold,708,783,713,LA,New Orleans,117,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
452544,317355f77acab7c643125a0ad60a681803675b66,2019-01-24 19:00:00 UTC,auto,auto,728,790,740,LA,New Orleans,120,False,False,False,Gas
452545,317355f77acab7c643125a0ad60a681803675b66,2019-01-06 16:15:00 UTC,auto,auto,741,790,740,LA,New Orleans,120,False,False,False,Gas
452546,81640304f6d55978ea76eafcccd97a08c13fb89d,2019-01-06 13:20:00 UTC,auto,auto,736,790,740,LA,New Orleans,120,False,False,False,Gas
452547,81640304f6d55978ea76eafcccd97a08c13fb89d,2019-01-13 14:50:00 UTC,auto,auto,737,790,740,LA,New Orleans,120,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/LA/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/LA-day/2020-jan-day-LA.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f3b4d87cb4940a008823292bb3fcb24f49b03df1,2020-01-19 19:30:00 UTC,auto,hold,730,785,735,LA,Baton Rouge,99,False,False,False,Gas
1,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2020-01-21 16:10:00 UTC,heat,auto,615,650,648,LA,Lafayette,70,True,False,True,Electric
2,7445735f5e05d141f97c02791af735f7d1942e27,2020-01-25 16:35:00 UTC,auto,hold,720,781,723,LA,New Orleans,99,False,False,False,Gas
3,f3b4d87cb4940a008823292bb3fcb24f49b03df1,2020-01-19 19:35:00 UTC,auto,hold,724,785,735,LA,Baton Rouge,99,False,False,False,Gas
4,7445735f5e05d141f97c02791af735f7d1942e27,2020-01-15 19:10:00 UTC,auto,hold,757,753,703,LA,New Orleans,99,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
491903,317355f77acab7c643125a0ad60a681803675b66,2020-01-29 17:45:00 UTC,heat,auto,751,740,740,LA,New Orleans,120,False,False,False,Gas
491904,81640304f6d55978ea76eafcccd97a08c13fb89d,2020-01-25 16:10:00 UTC,heat,auto,749,740,740,LA,New Orleans,120,False,False,False,Gas
491905,81640304f6d55978ea76eafcccd97a08c13fb89d,2020-01-23 19:15:00 UTC,heat,auto,736,740,740,LA,New Orleans,120,False,False,False,Gas
491906,81640304f6d55978ea76eafcccd97a08c13fb89d,2020-01-23 16:40:00 UTC,heat,auto,750,740,740,LA,New Orleans,120,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/LA/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/LA-day/2021-jan-day-LA.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,329373c9083e6c134f988acf99b81c2a24087a1a,2021-01-28 13:20:00 UTC,heat,hold,668,680,668,LA,New Orleans,88,False,False,False,Gas
1,dfb11e6c1c058534612767a680637a9e2427b605,2021-01-23 18:45:00 UTC,auto,hold,697,749,699,LA,Baton Rouge,28,False,False,False,Gas
2,84f63f709a4ff07fc5643dd033d4b177b9fc6057,2021-01-04 18:45:00 UTC,cool,hold,707,705,655,LA,Deridder,77,False,False,False,Gas
3,84f63f709a4ff07fc5643dd033d4b177b9fc6057,2021-01-04 17:55:00 UTC,cool,hold,699,705,655,LA,Deridder,77,False,False,False,Gas
4,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2021-01-29 14:55:00 UTC,auto,hold,679,749,667,LA,New Orleans,117,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
312708,491b4d1c4862a8e581f0d82389a32d97ddd56714,2021-01-12 17:55:00 UTC,auto,hold,713,770,720,LA,New Orleans,120,False,False,False,Gas
312709,491b4d1c4862a8e581f0d82389a32d97ddd56714,2021-01-10 17:45:00 UTC,auto,hold,709,780,720,LA,New Orleans,120,False,False,False,Gas
312710,491b4d1c4862a8e581f0d82389a32d97ddd56714,2021-01-11 19:50:00 UTC,auto,hold,713,770,720,LA,New Orleans,120,False,False,False,Gas
312711,491b4d1c4862a8e581f0d82389a32d97ddd56714,2021-01-12 14:40:00 UTC,auto,hold,693,770,720,LA,New Orleans,120,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/LA/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/LA/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
LA_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/LA/jan/" + file)
    LA_jan = pd.concat([LA_jan, df])
    
LA_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,Jan,2017,auto,auto,Pearl River,715.264331,755.300955,703.503185,5.0,False,False,False
1,052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,Jan,2017,auto,hold,Pearl River,706.764706,748.305882,697.552941,5.0,False,False,False
2,052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,Jan,2017,cool,auto,Pearl River,724.530159,739.060317,719.879365,5.0,False,False,False
3,052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,Jan,2017,cool,hold,Pearl River,718.092784,717.484536,715.783505,5.0,False,False,False
4,052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,Jan,2017,heat,auto,Pearl River,718.740458,737.858779,718.889313,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
561,fe284678f060eac820ce5928292bb3bdede08f9c,Jan,2021,heat,hold,New Orleans,638.115607,666.404624,648.005780,15.0,False,False,False
562,fe66b30e171f94a05f6059cd6b94528c71106f2f,Jan,2021,auto,hold,New Orleans,699.833333,770.000000,690.000000,60.0,False,False,False
563,ff4033ac05e748f162f68652c332b76627d948c6,Jan,2021,cool,hold,Lake Charles,705.112745,714.191176,714.191176,10.0,False,False,False
564,ff4033ac05e748f162f68652c332b76627d948c6,Jan,2021,heat,hold,Lake Charles,702.476942,699.015777,699.015777,10.0,False,False,False


In [34]:
LA_jan.to_csv("Scraper_Output/State_Month_Day/LA/LA_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/LA-day/2017-feb-day-LA.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9a6796e026672f7806aa34e07d872a289f54cf5e,2017-02-25 18:00:00 UTC,auto,auto,700,700,640,LA,Denham Springs,16,False,False,False,Gas
1,9a6796e026672f7806aa34e07d872a289f54cf5e,2017-02-25 17:25:00 UTC,auto,auto,701,700,640,LA,Denham Springs,16,False,False,False,Gas
2,9a6796e026672f7806aa34e07d872a289f54cf5e,2017-02-25 19:00:00 UTC,auto,auto,698,700,640,LA,Denham Springs,16,False,False,False,Gas
3,6f55642311224b998b39d4b6f62bf1440b03f046,2017-02-08 16:10:00 UTC,auto,hold,700,700,650,LA,Lafayette,6,False,False,False,Gas
4,088afc790d7b9e119bb67470c35d985fdb5afe22,2017-02-11 12:55:00 UTC,cool,auto,676,680,650,LA,Baton Rouge,1,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83962,438ad9028fceaf017e96c2e00bd8ad26955e7ed1,2017-02-06 12:30:00 UTC,auto,hold,701,755,705,LA,New Orleans,105,False,False,False,Gas
83963,438ad9028fceaf017e96c2e00bd8ad26955e7ed1,2017-02-05 13:00:00 UTC,auto,hold,698,755,705,LA,New Orleans,105,False,False,False,Gas
83964,438ad9028fceaf017e96c2e00bd8ad26955e7ed1,2017-02-19 18:30:00 UTC,auto,hold,747,755,705,LA,New Orleans,105,False,False,False,Gas
83965,438ad9028fceaf017e96c2e00bd8ad26955e7ed1,2017-02-22 13:10:00 UTC,auto,hold,709,755,705,LA,New Orleans,105,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/LA/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/LA-day/2018-feb-day-LA.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2018-02-02 13:50:00 UTC,auto,hold,716,815,725,LA,New Orleans,117,False,False,False,Gas
3,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2018-02-02 18:30:00 UTC,auto,hold,717,815,725,LA,New Orleans,117,False,False,False,Gas
4,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2018-02-03 13:55:00 UTC,auto,hold,724,815,735,LA,New Orleans,117,False,False,False,Gas
5,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2018-02-03 12:35:00 UTC,auto,hold,718,815,725,LA,New Orleans,117,False,False,False,Gas
6,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2018-02-17 14:25:00 UTC,cool,hold,721,736,664,LA,Lafayette,70,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285201,317355f77acab7c643125a0ad60a681803675b66,2018-02-13 15:50:00 UTC,auto,auto,727,790,730,LA,New Orleans,120,False,False,False,Gas
285202,81640304f6d55978ea76eafcccd97a08c13fb89d,2018-02-03 14:00:00 UTC,auto,hold,734,780,730,LA,New Orleans,120,False,False,False,Gas
285203,81640304f6d55978ea76eafcccd97a08c13fb89d,2018-02-03 14:05:00 UTC,auto,hold,734,780,730,LA,New Orleans,120,False,False,False,Gas
285204,317355f77acab7c643125a0ad60a681803675b66,2018-02-14 19:50:00 UTC,auto,auto,746,790,730,LA,New Orleans,120,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/LA/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/LA-day/2019-feb-day-LA.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,329373c9083e6c134f988acf99b81c2a24087a1a,2019-02-01 13:15:00 UTC,heat,hold,646,775,651,LA,New Orleans,88,False,False,False,Gas
1,b30836d5193c063d586905aa9551d15c438282c8,2019-02-16 15:45:00 UTC,auto,auto,694,696,643,LA,New Orleans,57,False,False,False,Gas
2,329373c9083e6c134f988acf99b81c2a24087a1a,2019-02-02 14:50:00 UTC,heat,hold,662,680,657,LA,New Orleans,88,False,False,False,Gas
3,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2019-02-26 14:45:00 UTC,auto,hold,715,820,709,LA,New Orleans,117,False,False,False,Gas
4,d5082ef9335a7f4cb281d4b6231383a69e737e56,2019-02-18 13:45:00 UTC,auto,auto,690,710,630,LA,New Orleans,110,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
290428,317355f77acab7c643125a0ad60a681803675b66,2019-02-04 19:40:00 UTC,auto,auto,729,780,730,LA,New Orleans,120,False,False,False,Gas
290429,81640304f6d55978ea76eafcccd97a08c13fb89d,2019-02-14 19:40:00 UTC,auto,hold,731,780,730,LA,New Orleans,120,False,False,False,Gas
290430,317355f77acab7c643125a0ad60a681803675b66,2019-02-13 19:10:00 UTC,auto,auto,733,780,730,LA,New Orleans,120,False,False,False,Gas
290431,317355f77acab7c643125a0ad60a681803675b66,2019-02-10 17:00:00 UTC,auto,auto,739,790,740,LA,New Orleans,120,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/LA/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/LA-day/2020-feb-day-LA.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f3b4d87cb4940a008823292bb3fcb24f49b03df1,2020-02-21 09:40:00 UTC,auto,hold,721,785,735,LA,Baton Rouge,99,False,False,False,Gas
1,f3b4d87cb4940a008823292bb3fcb24f49b03df1,2020-02-21 15:15:00 UTC,auto,hold,723,785,735,LA,Baton Rouge,99,False,False,False,Gas
2,f3b4d87cb4940a008823292bb3fcb24f49b03df1,2020-02-14 07:20:00 UTC,auto,hold,718,775,725,LA,Baton Rouge,99,False,False,False,Gas
3,c4b5bcf8b64c051d59c4bf2ebd7b6f68a8ee9fa3,2020-02-21 18:30:00 UTC,auto,auto,675,790,736,LA,baton rouge,99,True,False,False,Gas
4,f3b4d87cb4940a008823292bb3fcb24f49b03df1,2020-02-21 07:20:00 UTC,auto,hold,727,785,735,LA,Baton Rouge,99,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
435229,05d6742631d44cf568e468696c42a95d1f7466da,2020-02-12 12:15:00 UTC,cool,hold,744,750,750,LA,New Orleans,120,True,False,False,Gas
435230,05d6742631d44cf568e468696c42a95d1f7466da,2020-02-12 13:30:00 UTC,cool,hold,743,750,750,LA,New Orleans,120,True,False,False,Gas
435231,05d6742631d44cf568e468696c42a95d1f7466da,2020-02-12 12:00:00 UTC,cool,hold,743,750,750,LA,New Orleans,120,True,False,False,Gas
435232,05d6742631d44cf568e468696c42a95d1f7466da,2020-02-12 13:20:00 UTC,cool,hold,743,750,750,LA,New Orleans,120,True,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/LA/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/LA-day/2021-feb-day-LA.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,329373c9083e6c134f988acf99b81c2a24087a1a,2021-02-13 15:05:00 UTC,heat,hold,639,680,656,LA,New Orleans,88,False,False,False,Gas
1,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2021-02-02 18:25:00 UTC,auto,hold,674,752,678,LA,New Orleans,117,False,False,False,Gas
2,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2021-02-04 14:50:00 UTC,auto,hold,671,752,678,LA,New Orleans,117,False,False,False,Gas
3,cd655a51d2b4d76fcce18b863529fc3ec2167dd3,2021-02-17 13:55:00 UTC,auto,hold,714,764,717,LA,Shreveport,79,True,False,False,Gas
4,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2021-02-04 14:20:00 UTC,auto,hold,675,752,678,LA,New Orleans,117,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
284188,86596de2451ab672599e1a621be703e098ef0157,2021-02-06 18:10:00 UTC,auto,hold,714,780,720,LA,New Orleans,120,False,False,False,Gas
284189,317355f77acab7c643125a0ad60a681803675b66,2021-02-12 18:55:00 UTC,auto,hold,709,770,720,LA,New Orleans,120,False,False,False,Gas
284190,86596de2451ab672599e1a621be703e098ef0157,2021-02-03 13:30:00 UTC,auto,hold,717,780,720,LA,New Orleans,120,False,False,False,Gas
284191,86596de2451ab672599e1a621be703e098ef0157,2021-02-10 12:50:00 UTC,auto,hold,709,780,720,LA,New Orleans,120,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/LA/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/LA/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
LA_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/LA/feb/" + file)
    LA_feb = pd.concat([LA_feb, df])
    
LA_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,feb,2017,auto,auto,Pearl River,708.641235,749.793824,698.002725,5.0,False,False,False
1,052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,feb,2017,auto,hold,Pearl River,715.428571,717.500000,667.333333,5.0,False,False,False
2,052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,feb,2017,cool,auto,Pearl River,722.053097,739.725664,719.920354,5.0,False,False,False
3,052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,feb,2017,cool,hold,Pearl River,719.543860,717.456140,717.456140,5.0,False,False,False
4,052e3fc7cea46bc5fc812a70646c8e1fc8073ac1,feb,2017,heat,auto,Pearl River,716.514019,740.000000,720.000000,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
577,ff4033ac05e748f162f68652c332b76627d948c6,feb,2021,cool,hold,Lake Charles,715.443478,722.452174,722.278261,10.0,False,False,False
578,ff4033ac05e748f162f68652c332b76627d948c6,feb,2021,heat,hold,Lake Charles,705.524683,707.826516,707.826516,10.0,False,False,False
579,ff6ef6f57a627a2ddf34ff918c56a0b2315c9b40,feb,2021,auto,hold,Bossier City,676.357143,730.000000,680.000000,75.0,False,False,False
580,ff6ef6f57a627a2ddf34ff918c56a0b2315c9b40,feb,2021,cool,hold,Bossier City,697.190476,708.095238,708.095238,75.0,False,False,False


In [67]:
LA_feb.to_csv("Scraper_Output/State_Month_Day/LA/LA_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/LA-day/2017-jun-day-LA.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c860132b3fec8cb13c5f596228150dc21955ca1f,2017-06-01 11:20:00 UTC,auto,auto,745,736,684,LA,New Orleans,116,False,False,False,Gas
1,9a6796e026672f7806aa34e07d872a289f54cf5e,2017-06-23 12:55:00 UTC,auto,auto,704,718,617,LA,Denham Springs,16,False,False,False,Gas
2,c860132b3fec8cb13c5f596228150dc21955ca1f,2017-06-02 13:35:00 UTC,auto,auto,720,745,681,LA,New Orleans,116,False,False,False,Gas
3,7cdd706bb099ffd64f98b0921a0d7f4347a4a8d8,2017-06-05 18:05:00 UTC,cool,hold,765,790,770,LA,baton rouge,75,True,False,False,Gas
4,7cdd706bb099ffd64f98b0921a0d7f4347a4a8d8,2017-06-12 18:20:00 UTC,cool,hold,756,770,770,LA,baton rouge,75,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
151047,172883c40a35466edfe9b17b09f5f1eb59248dc6,2017-06-06 13:00:00 UTC,cool,auto,715,720,680,LA,New Orleans,105,False,False,False,Gas
151048,172883c40a35466edfe9b17b09f5f1eb59248dc6,2017-06-06 10:30:00 UTC,cool,auto,724,720,680,LA,New Orleans,105,False,False,False,Gas
151049,172883c40a35466edfe9b17b09f5f1eb59248dc6,2017-06-06 12:35:00 UTC,cool,auto,720,720,680,LA,New Orleans,105,False,False,False,Gas
151050,172883c40a35466edfe9b17b09f5f1eb59248dc6,2017-06-06 12:15:00 UTC,cool,auto,702,720,680,LA,New Orleans,105,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/LA/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/LA-day/2018-jun-day-LA.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6c85cef71b099ffa77b9ff075278000c8d09dbfa,2018-06-24 14:40:00 UTC,auto,auto,713,714,682,LA,Baton Rouge,70,False,False,False,Gas
1,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2018-06-20 12:05:00 UTC,cool,hold,707,698,698,LA,Lafayette,70,True,False,True,Electric
2,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2018-06-20 12:50:00 UTC,cool,hold,715,698,698,LA,Lafayette,70,True,False,True,Electric
3,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2018-06-23 14:40:00 UTC,cool,hold,724,706,692,LA,Lafayette,70,True,False,True,Electric
4,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2018-06-20 12:10:00 UTC,cool,hold,708,698,698,LA,Lafayette,70,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
367139,317355f77acab7c643125a0ad60a681803675b66,2018-06-25 13:25:00 UTC,auto,auto,744,740,680,LA,New Orleans,120,False,False,False,Gas
367140,86596de2451ab672599e1a621be703e098ef0157,2018-06-19 10:35:00 UTC,auto,hold,736,740,680,LA,New Orleans,120,False,False,False,Gas
367141,317355f77acab7c643125a0ad60a681803675b66,2018-06-16 18:00:00 UTC,auto,auto,735,735,685,LA,New Orleans,120,False,False,False,Gas
367142,317355f77acab7c643125a0ad60a681803675b66,2018-06-16 15:55:00 UTC,auto,auto,736,735,685,LA,New Orleans,120,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/LA/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/LA-day/2019-jun-day-LA.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b1280e0aae63c840682d6819253ce21bac3ddae8,2019-06-24 16:20:00 UTC,cool,auto,743,740,697,LA,Pearl River,15,True,False,False,Gas
1,b1280e0aae63c840682d6819253ce21bac3ddae8,2019-06-14 18:10:00 UTC,cool,hold,755,740,697,LA,Pearl River,15,True,False,False,Gas
2,b1280e0aae63c840682d6819253ce21bac3ddae8,2019-06-16 14:45:00 UTC,cool,hold,751,740,697,LA,Pearl River,15,True,False,False,Gas
3,e67277222bd56c51a4e581f00adaefc8f9fb2ffd,2019-06-27 11:05:00 UTC,cool,hold,687,687,687,LA,Walker,15,False,False,False,Gas
4,ac2e203f3443aff2024d745a04f021bd2ed5dfd0,2019-06-06 19:50:00 UTC,cool,hold,769,770,770,LA,Geismar,15,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
554736,05d6742631d44cf568e468696c42a95d1f7466da,2019-06-11 19:20:00 UTC,cool,hold,759,760,760,LA,New Orleans,120,True,False,False,Gas
554737,05d6742631d44cf568e468696c42a95d1f7466da,2019-06-08 17:05:00 UTC,cool,hold,758,760,760,LA,New Orleans,120,True,False,False,Gas
554738,05d6742631d44cf568e468696c42a95d1f7466da,2019-06-13 18:45:00 UTC,cool,hold,763,760,760,LA,New Orleans,120,True,False,False,Gas
554739,05d6742631d44cf568e468696c42a95d1f7466da,2019-06-29 17:20:00 UTC,cool,hold,763,760,760,LA,New Orleans,120,True,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/LA/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/LA-day/2020-jun-day-LA.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b35030454a1d5f95dca62db4f20f4ec0c070638f,2020-06-06 14:50:00 UTC,auto,hold,724,722,672,LA,Geismar,15,False,False,False,Gas
1,b35030454a1d5f95dca62db4f20f4ec0c070638f,2020-06-01 14:45:00 UTC,auto,hold,721,722,652,LA,Geismar,15,False,False,False,Gas
2,b35030454a1d5f95dca62db4f20f4ec0c070638f,2020-06-09 15:35:00 UTC,auto,hold,726,722,672,LA,Geismar,15,False,False,False,Gas
3,b35030454a1d5f95dca62db4f20f4ec0c070638f,2020-06-24 14:35:00 UTC,auto,hold,719,722,672,LA,Geismar,15,False,False,False,Gas
4,b35030454a1d5f95dca62db4f20f4ec0c070638f,2020-06-20 16:35:00 UTC,auto,hold,728,722,672,LA,Geismar,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
555852,491b4d1c4862a8e581f0d82389a32d97ddd56714,2020-06-12 19:55:00 UTC,cool,hold,750,750,730,LA,New Orleans,120,False,False,False,Gas
555853,491b4d1c4862a8e581f0d82389a32d97ddd56714,2020-06-07 16:25:00 UTC,cool,auto,729,730,730,LA,New Orleans,120,False,False,False,Gas
555854,491b4d1c4862a8e581f0d82389a32d97ddd56714,2020-06-06 15:35:00 UTC,cool,hold,751,750,730,LA,New Orleans,120,False,False,False,Gas
555855,491b4d1c4862a8e581f0d82389a32d97ddd56714,2020-06-24 19:45:00 UTC,cool,hold,747,750,730,LA,New Orleans,120,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/LA/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/LA-day/2021-jun-day-LA.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fb4e7794b973f4863edf5c56973170680276c983,2021-06-13 15:50:00 UTC,cool,hold,784,775,725,LA,Baton Rouge,75,True,False,False,Gas
1,fb4e7794b973f4863edf5c56973170680276c983,2021-06-20 07:30:00 UTC,cool,hold,758,765,765,LA,Baton Rouge,75,True,False,False,Gas
2,fb4e7794b973f4863edf5c56973170680276c983,2021-06-23 19:00:00 UTC,cool,hold,753,745,745,LA,Baton Rouge,75,True,False,False,Gas
3,fb4e7794b973f4863edf5c56973170680276c983,2021-06-17 19:25:00 UTC,cool,hold,763,765,765,LA,Baton Rouge,75,True,False,False,Gas
4,fb4e7794b973f4863edf5c56973170680276c983,2021-06-15 18:55:00 UTC,cool,hold,765,765,765,LA,Baton Rouge,75,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
316980,05d6742631d44cf568e468696c42a95d1f7466da,2021-06-10 15:40:00 UTC,cool,hold,755,750,750,LA,New Orleans,120,True,False,False,Gas
316981,05d6742631d44cf568e468696c42a95d1f7466da,2021-06-17 17:40:00 UTC,cool,hold,754,750,750,LA,New Orleans,120,True,False,False,Gas
316982,05d6742631d44cf568e468696c42a95d1f7466da,2021-06-09 17:55:00 UTC,cool,hold,757,750,750,LA,New Orleans,120,True,False,False,Gas
316983,05d6742631d44cf568e468696c42a95d1f7466da,2021-06-08 15:45:00 UTC,cool,hold,757,750,750,LA,New Orleans,120,True,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/LA/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/LA/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
LA_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/LA/jun/" + file)
    LA_jun = pd.concat([LA_jun, df])
    
LA_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01231b3659604ae515f75ef6288754f18e167b01,jun,2017,cool,auto,Covington,744.400347,744.320624,725.542461,50.0,False,False,False
1,01231b3659604ae515f75ef6288754f18e167b01,jun,2017,cool,hold,Covington,745.036389,747.524843,747.501050,50.0,False,False,False
2,026f9cb5ea5ea90f77c5e550fa2e827561cddef7,jun,2017,cool,auto,Luling,738.509804,740.000000,720.000000,0.0,False,False,False
3,026f9cb5ea5ea90f77c5e550fa2e827561cddef7,jun,2017,cool,hold,Luling,740.176923,742.050000,741.730769,0.0,False,False,False
4,049b5df73fed19a6c5084ad5433f112a0849f724,jun,2017,cool,auto,Gonzales,760.752451,758.801471,649.367647,25.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
384,fd6b5be819774676538d6ebe4bde298eba7853c4,jun,2021,auto,hold,Shreveport,732.664495,730.635179,629.915309,10.0,True,False,False
385,fe284678f060eac820ce5928292bb3bdede08f9c,jun,2021,cool,hold,New Orleans,747.503526,767.414669,767.416079,15.0,False,False,False
386,fe66b30e171f94a05f6059cd6b94528c71106f2f,jun,2021,auto,hold,New Orleans,760.958333,760.208333,690.000000,60.0,False,False,False
387,ff4033ac05e748f162f68652c332b76627d948c6,jun,2021,cool,hold,Lake Charles,732.744220,736.656069,736.554913,10.0,False,False,False


In [100]:
LA_jun.to_csv("Scraper_Output/State_Month_Day/LA/LA_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/LA-day/2017-jul-day-LA.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c860132b3fec8cb13c5f596228150dc21955ca1f,2017-07-15 11:55:00 UTC,auto,auto,733,727,707,LA,New Orleans,116,False,False,False,Gas
1,b30836d5193c063d586905aa9551d15c438282c8,2017-07-29 16:45:00 UTC,cool,auto,733,736,736,LA,New Orleans,57,False,False,False,Gas
2,c860132b3fec8cb13c5f596228150dc21955ca1f,2017-07-03 12:55:00 UTC,auto,auto,745,747,692,LA,New Orleans,116,False,False,False,Gas
3,c860132b3fec8cb13c5f596228150dc21955ca1f,2017-07-02 14:50:00 UTC,auto,auto,727,731,708,LA,New Orleans,116,False,False,False,Gas
4,e6ee21f5bf9911d11686d94c76f53300d83ba051,2017-07-15 12:15:00 UTC,cool,hold,764,770,770,LA,New Orleans,47,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189154,317355f77acab7c643125a0ad60a681803675b66,2017-07-20 13:45:00 UTC,auto,auto,776,776,700,LA,New Orleans,120,False,False,False,Gas
189155,317355f77acab7c643125a0ad60a681803675b66,2017-07-15 17:40:00 UTC,auto,auto,754,760,700,LA,New Orleans,120,False,False,False,Gas
189156,317355f77acab7c643125a0ad60a681803675b66,2017-07-24 14:45:00 UTC,auto,auto,759,768,700,LA,New Orleans,120,False,False,False,Gas
189157,317355f77acab7c643125a0ad60a681803675b66,2017-07-20 13:05:00 UTC,auto,auto,772,776,700,LA,New Orleans,120,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/LA/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/LA-day/2018-jul-day-LA.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2018-07-02 16:30:00 UTC,cool,hold,749,732,732,LA,Lafayette,70,True,False,True,Electric
1,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2018-07-03 11:30:00 UTC,cool,hold,702,702,702,LA,Lafayette,70,True,False,True,Electric
2,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2018-07-29 17:55:00 UTC,cool,hold,764,761,761,LA,Lafayette,70,True,False,True,Electric
3,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2018-07-29 17:35:00 UTC,cool,hold,780,761,761,LA,Lafayette,70,True,False,True,Electric
4,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2018-07-02 16:05:00 UTC,cool,hold,750,732,732,LA,Lafayette,70,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
430975,8621d1da423b509f94faf9cc5aed3862e3c10c1a,2018-07-08 10:00:00 UTC,auto,auto,776,780,700,LA,New Orleans,120,True,False,False,Gas
430976,8621d1da423b509f94faf9cc5aed3862e3c10c1a,2018-07-16 11:55:00 UTC,auto,hold,766,750,700,LA,New Orleans,120,True,False,False,Gas
430977,8621d1da423b509f94faf9cc5aed3862e3c10c1a,2018-07-04 14:40:00 UTC,auto,auto,748,770,700,LA,New Orleans,120,True,False,False,Gas
430978,8621d1da423b509f94faf9cc5aed3862e3c10c1a,2018-07-04 13:10:00 UTC,auto,auto,757,770,700,LA,New Orleans,120,True,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/LA/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/LA-day/2019-jul-day-LA.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2019-07-20 19:20:00 UTC,cool,hold,764,730,701,LA,Lafayette,70,True,False,True,Electric
3,724262bded6dff7619c0dd04f8d4417f1c547c8e,2019-07-08 14:20:00 UTC,auto,auto,719,715,652,LA,Larose,69,True,False,False,Gas
4,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2019-07-26 14:25:00 UTC,cool,auto,754,740,701,LA,Lafayette,70,True,False,True,Electric
7,495fafed3adece8bcd0d29d95ca849729540532f,2019-07-26 17:45:00 UTC,auto,auto,754,751,662,LA,Baton Rouge,28,False,False,False,Gas
8,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2019-07-21 17:25:00 UTC,cool,auto,737,730,701,LA,Lafayette,70,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
556056,05d6742631d44cf568e468696c42a95d1f7466da,2019-07-16 11:30:00 UTC,cool,hold,758,760,760,LA,New Orleans,120,True,False,False,Gas
556057,05d6742631d44cf568e468696c42a95d1f7466da,2019-07-19 18:05:00 UTC,cool,hold,766,760,760,LA,New Orleans,120,True,False,False,Gas
556058,05d6742631d44cf568e468696c42a95d1f7466da,2019-07-24 13:45:00 UTC,cool,hold,751,760,760,LA,New Orleans,120,True,False,False,Gas
556059,05d6742631d44cf568e468696c42a95d1f7466da,2019-07-28 16:05:00 UTC,cool,hold,756,760,760,LA,New Orleans,120,True,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/LA/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/LA-day/2020-jul-day-LA.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,329373c9083e6c134f988acf99b81c2a24087a1a,2020-07-11 14:10:00 UTC,cool,auto,781,797,687,LA,New Orleans,88,False,False,False,Gas
2,088afc790d7b9e119bb67470c35d985fdb5afe22,2020-07-19 17:35:00 UTC,cool,auto,715,701,701,LA,Baton Rouge,1,False,False,False,Gas
34,f3b4d87cb4940a008823292bb3fcb24f49b03df1,2020-07-18 08:45:00 UTC,cool,hold,769,770,770,LA,Baton Rouge,99,False,False,False,Gas
35,f3b4d87cb4940a008823292bb3fcb24f49b03df1,2020-07-18 09:30:00 UTC,cool,hold,770,770,770,LA,Baton Rouge,99,False,False,False,Gas
36,f3b4d87cb4940a008823292bb3fcb24f49b03df1,2020-07-18 07:05:00 UTC,cool,hold,776,770,770,LA,Baton Rouge,99,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
569817,491b4d1c4862a8e581f0d82389a32d97ddd56714,2020-07-31 18:55:00 UTC,cool,auto,746,740,730,LA,New Orleans,120,False,False,False,Gas
569818,491b4d1c4862a8e581f0d82389a32d97ddd56714,2020-07-25 15:30:00 UTC,cool,auto,746,740,730,LA,New Orleans,120,False,False,False,Gas
569819,491b4d1c4862a8e581f0d82389a32d97ddd56714,2020-07-17 19:50:00 UTC,cool,hold,753,740,730,LA,New Orleans,120,False,False,False,Gas
569820,491b4d1c4862a8e581f0d82389a32d97ddd56714,2020-07-30 15:15:00 UTC,cool,auto,736,730,730,LA,New Orleans,120,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/LA/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/LA-day/2021-jul-day-LA.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bc52a4fd4d7a2c7481f9939815455660da55064f,2021-07-26 13:05:00 UTC,cool,hold,710,739,739,LA,Denham Springs,36,False,False,False,Gas
1,fb4e7794b973f4863edf5c56973170680276c983,2021-07-01 07:30:00 UTC,cool,hold,723,735,735,LA,Baton Rouge,75,True,False,False,Gas
2,bc52a4fd4d7a2c7481f9939815455660da55064f,2021-07-18 12:10:00 UTC,cool,hold,725,733,733,LA,Denham Springs,36,False,False,False,Gas
3,bc52a4fd4d7a2c7481f9939815455660da55064f,2021-07-13 12:45:00 UTC,cool,hold,697,732,732,LA,Denham Springs,36,False,False,False,Gas
4,fb4e7794b973f4863edf5c56973170680276c983,2021-07-01 07:20:00 UTC,cool,hold,732,735,735,LA,Baton Rouge,75,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
309162,05d6742631d44cf568e468696c42a95d1f7466da,2021-07-25 17:55:00 UTC,cool,hold,732,730,730,LA,New Orleans,120,True,False,False,Gas
309163,05d6742631d44cf568e468696c42a95d1f7466da,2021-07-24 19:50:00 UTC,cool,hold,763,750,750,LA,New Orleans,120,True,False,False,Gas
309164,05d6742631d44cf568e468696c42a95d1f7466da,2021-07-25 11:25:00 UTC,cool,hold,751,750,750,LA,New Orleans,120,True,False,False,Gas
309165,05d6742631d44cf568e468696c42a95d1f7466da,2021-07-25 10:50:00 UTC,cool,hold,749,750,750,LA,New Orleans,120,True,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/LA/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/LA/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
LA_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/LA/jul/" + file)
    LA_jul = pd.concat([LA_jul, df])
    
LA_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01231b3659604ae515f75ef6288754f18e167b01,jul,2017,cool,auto,Covington,753.508738,755.986408,755.431068,50.0,False,False,False
1,01231b3659604ae515f75ef6288754f18e167b01,jul,2017,cool,hold,Covington,747.984451,747.361516,747.674441,50.0,False,False,False
2,026f9cb5ea5ea90f77c5e550fa2e827561cddef7,jul,2017,cool,hold,Luling,729.472501,729.947411,729.936170,0.0,False,False,False
3,049b5df73fed19a6c5084ad5433f112a0849f724,jul,2017,cool,auto,Gonzales,760.054581,756.543860,655.951267,25.0,False,False,False
4,049b5df73fed19a6c5084ad5433f112a0849f724,jul,2017,cool,hold,Gonzales,765.681979,767.597173,766.749117,25.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
370,fd421852d551812c7bd6c80507e0abcc4868c112,jul,2021,auto,hold,Boyce,747.333333,700.000000,650.000000,10.0,True,False,False
371,fe284678f060eac820ce5928292bb3bdede08f9c,jul,2021,cool,hold,New Orleans,737.904139,756.771242,756.771242,15.0,False,False,False
372,fe66b30e171f94a05f6059cd6b94528c71106f2f,jul,2021,auto,hold,New Orleans,760.829787,758.645390,690.000000,60.0,False,False,False
373,ff4033ac05e748f162f68652c332b76627d948c6,jul,2021,cool,hold,Lake Charles,715.080153,718.118321,717.992366,10.0,False,False,False


In [133]:
LA_jul.to_csv("Scraper_Output/State_Month_Day/LA/LA_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/LA-day/2017-aug-day-LA.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c860132b3fec8cb13c5f596228150dc21955ca1f,2017-08-11 12:55:00 UTC,cool,hold,748,744,723,LA,New Orleans,116,False,False,False,Gas
1,8b7858f4de3979cd406fc2e49e2083fa9e7202d8,2017-08-19 15:20:00 UTC,cool,hold,709,701,707,LA,Holden,27,True,False,True,Electric
2,c860132b3fec8cb13c5f596228150dc21955ca1f,2017-08-11 16:30:00 UTC,auto,auto,757,765,745,LA,New Orleans,116,False,False,False,Gas
3,9a6796e026672f7806aa34e07d872a289f54cf5e,2017-08-17 12:55:00 UTC,auto,auto,702,808,602,LA,Denham Springs,16,False,False,False,Gas
4,b30836d5193c063d586905aa9551d15c438282c8,2017-08-13 15:25:00 UTC,cool,auto,730,724,713,LA,New Orleans,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
207436,317355f77acab7c643125a0ad60a681803675b66,2017-08-26 16:00:00 UTC,auto,auto,775,770,700,LA,New Orleans,120,False,False,False,Gas
207437,81640304f6d55978ea76eafcccd97a08c13fb89d,2017-08-30 11:45:00 UTC,auto,auto,747,760,700,LA,New Orleans,120,False,False,False,Gas
207438,317355f77acab7c643125a0ad60a681803675b66,2017-08-27 11:50:00 UTC,auto,auto,772,770,700,LA,New Orleans,120,False,False,False,Gas
207439,317355f77acab7c643125a0ad60a681803675b66,2017-08-25 13:45:00 UTC,auto,auto,777,793,700,LA,New Orleans,120,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/LA/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/LA-day/2018-aug-day-LA.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dfb11e6c1c058534612767a680637a9e2427b605,2018-08-19 17:40:00 UTC,auto,hold,791,795,725,LA,Baton Rouge,28,False,False,False,Gas
1,fc638274b9a8f9c934872a29341dd2bb96bf77f5,2018-08-14 17:50:00 UTC,auto,auto,739,733,703,LA,New Orleans,88,False,False,False,Gas
2,dfb11e6c1c058534612767a680637a9e2427b605,2018-08-19 17:25:00 UTC,auto,hold,799,795,725,LA,Baton Rouge,28,False,False,False,Gas
3,dfb11e6c1c058534612767a680637a9e2427b605,2018-08-19 19:25:00 UTC,auto,hold,794,795,725,LA,Baton Rouge,28,False,False,False,Gas
4,d99472a41eeb8f0263c6bf73814aec3d52a76243,2018-08-04 12:00:00 UTC,auto,hold,714,732,668,LA,Shreveport,47,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
404559,8621d1da423b509f94faf9cc5aed3862e3c10c1a,2018-08-21 12:05:00 UTC,auto,auto,757,750,700,LA,New Orleans,120,True,False,False,Gas
404560,8621d1da423b509f94faf9cc5aed3862e3c10c1a,2018-08-24 14:05:00 UTC,auto,auto,777,780,700,LA,New Orleans,120,True,False,False,Gas
404561,8621d1da423b509f94faf9cc5aed3862e3c10c1a,2018-08-28 12:50:00 UTC,auto,hold,745,750,700,LA,New Orleans,120,True,False,False,Gas
404562,8621d1da423b509f94faf9cc5aed3862e3c10c1a,2018-08-17 12:15:00 UTC,auto,hold,749,750,700,LA,New Orleans,120,True,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/LA/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/LA-day/2019-aug-day-LA.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2019-08-17 18:50:00 UTC,cool,hold,761,750,701,LA,Lafayette,70,True,False,True,Electric
3,dfb11e6c1c058534612767a680637a9e2427b605,2019-08-12 11:50:00 UTC,auto,auto,776,769,662,LA,Baton Rouge,28,False,False,False,Gas
8,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2019-08-16 19:30:00 UTC,cool,hold,754,750,701,LA,Lafayette,70,True,False,True,Electric
9,329373c9083e6c134f988acf99b81c2a24087a1a,2019-08-04 15:25:00 UTC,cool,hold,779,801,688,LA,New Orleans,88,False,False,False,Gas
11,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2019-08-08 16:25:00 UTC,cool,auto,773,770,701,LA,Lafayette,70,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
549556,05d6742631d44cf568e468696c42a95d1f7466da,2019-08-06 12:15:00 UTC,cool,hold,745,760,760,LA,New Orleans,120,True,False,False,Gas
549557,05d6742631d44cf568e468696c42a95d1f7466da,2019-08-29 17:00:00 UTC,cool,hold,758,760,760,LA,New Orleans,120,True,False,False,Gas
549558,05d6742631d44cf568e468696c42a95d1f7466da,2019-08-21 19:20:00 UTC,cool,hold,760,760,760,LA,New Orleans,120,True,False,False,Gas
549559,05d6742631d44cf568e468696c42a95d1f7466da,2019-08-19 17:25:00 UTC,cool,hold,757,760,760,LA,New Orleans,120,True,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/LA/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/LA-day/2020-aug-day-LA.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,bc52a4fd4d7a2c7481f9939815455660da55064f,2020-08-14 13:40:00 UTC,cool,hold,723,744,736,LA,Denham Springs,36,False,False,False,Gas
7,bc52a4fd4d7a2c7481f9939815455660da55064f,2020-08-02 18:00:00 UTC,cool,hold,741,745,745,LA,Denham Springs,36,False,False,False,Gas
10,803186f1dd6b30f81edad384f78074e732e2a3bc,2020-08-26 07:45:00 UTC,cool,auto,745,760,753,LA,Baton Rouge,36,False,False,False,Gas
11,803186f1dd6b30f81edad384f78074e732e2a3bc,2020-08-26 14:15:00 UTC,cool,auto,759,760,753,LA,Baton Rouge,36,False,False,False,Gas
14,803186f1dd6b30f81edad384f78074e732e2a3bc,2020-08-26 10:35:00 UTC,cool,auto,751,760,753,LA,Baton Rouge,36,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
530526,491b4d1c4862a8e581f0d82389a32d97ddd56714,2020-08-17 17:20:00 UTC,cool,auto,743,740,730,LA,New Orleans,120,False,False,False,Gas
530527,491b4d1c4862a8e581f0d82389a32d97ddd56714,2020-08-04 18:40:00 UTC,cool,auto,751,740,730,LA,New Orleans,120,False,False,False,Gas
530528,491b4d1c4862a8e581f0d82389a32d97ddd56714,2020-08-02 18:45:00 UTC,cool,auto,735,730,730,LA,New Orleans,120,False,False,False,Gas
530529,491b4d1c4862a8e581f0d82389a32d97ddd56714,2020-08-01 19:30:00 UTC,cool,hold,752,740,730,LA,New Orleans,120,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/LA/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/LA/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
LA_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/LA/aug/" + file)
    LA_aug = pd.concat([LA_aug, df])
    
LA_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01231b3659604ae515f75ef6288754f18e167b01,aug,2017,cool,auto,Covington,751.940086,751.285307,750.741797,50.0,False,False,False
1,01231b3659604ae515f75ef6288754f18e167b01,aug,2017,cool,hold,Covington,749.558505,748.932153,748.954769,50.0,False,False,False
2,026f9cb5ea5ea90f77c5e550fa2e827561cddef7,aug,2017,cool,auto,Luling,735.661980,737.518337,723.728606,0.0,False,False,False
3,026f9cb5ea5ea90f77c5e550fa2e827561cddef7,aug,2017,cool,hold,Luling,722.003974,723.381457,723.377483,0.0,False,False,False
4,049b5df73fed19a6c5084ad5433f112a0849f724,aug,2017,cool,auto,Gonzales,758.437340,756.312020,653.792839,25.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
743,fe284678f060eac820ce5928292bb3bdede08f9c,aug,2020,cool,hold,New Orleans,756.038130,765.649446,765.649446,15.0,False,False,False
744,fe66b30e171f94a05f6059cd6b94528c71106f2f,aug,2020,auto,auto,New Orleans,760.583333,760.208333,710.000000,60.0,False,False,False
745,fe66b30e171f94a05f6059cd6b94528c71106f2f,aug,2020,auto,hold,New Orleans,769.666667,760.000000,710.000000,60.0,False,False,False
746,ff4033ac05e748f162f68652c332b76627d948c6,aug,2020,cool,auto,Lake Charles,710.615385,710.461538,710.461538,10.0,False,False,False


In [160]:
LA_aug.to_csv("Scraper_Output/State_Month_Day/LA/LA_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/LA-day/2017-dec-day-LA.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,088afc790d7b9e119bb67470c35d985fdb5afe22,2017-12-10 17:20:00 UTC,heat,auto,705,780,709,LA,Baton Rouge,1,False,False,False,Gas
1,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2017-12-06 15:05:00 UTC,auto,hold,715,775,725,LA,New Orleans,117,False,False,False,Gas
2,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2017-12-06 13:45:00 UTC,auto,hold,716,775,725,LA,New Orleans,117,False,False,False,Gas
3,8fa13e7e50301f8a651baabde86f373451ec5ce5,2017-12-28 16:45:00 UTC,heat,auto,692,706,706,LA,New Orleans,110,True,False,False,Gas
4,b30836d5193c063d586905aa9551d15c438282c8,2017-12-10 14:20:00 UTC,heat,hold,654,654,636,LA,New Orleans,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
257511,81640304f6d55978ea76eafcccd97a08c13fb89d,2017-12-16 15:10:00 UTC,auto,hold,747,790,740,LA,New Orleans,120,False,False,False,Gas
257512,81640304f6d55978ea76eafcccd97a08c13fb89d,2017-12-14 15:55:00 UTC,auto,hold,736,790,740,LA,New Orleans,120,False,False,False,Gas
257513,81640304f6d55978ea76eafcccd97a08c13fb89d,2017-12-18 16:05:00 UTC,auto,hold,756,790,740,LA,New Orleans,120,False,False,False,Gas
257514,81640304f6d55978ea76eafcccd97a08c13fb89d,2017-12-15 19:45:00 UTC,auto,hold,739,790,740,LA,New Orleans,120,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/LA/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/LA-day/2018-dec-day-LA.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dfb11e6c1c058534612767a680637a9e2427b605,2018-12-04 12:45:00 UTC,auto,hold,693,785,687,LA,Baton Rouge,28,False,False,False,Gas
1,8fa13e7e50301f8a651baabde86f373451ec5ce5,2018-12-18 16:00:00 UTC,heat,hold,694,760,709,LA,New Orleans,110,True,False,False,Gas
2,8fa13e7e50301f8a651baabde86f373451ec5ce5,2018-12-18 19:00:00 UTC,heat,hold,723,760,688,LA,New Orleans,110,True,False,False,Gas
3,8fa13e7e50301f8a651baabde86f373451ec5ce5,2018-12-25 19:05:00 UTC,heat,auto,699,760,703,LA,New Orleans,110,True,False,False,Gas
4,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2018-12-31 13:35:00 UTC,auto,hold,721,783,703,LA,New Orleans,117,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
404781,86596de2451ab672599e1a621be703e098ef0157,2018-12-28 14:05:00 UTC,auto,auto,747,800,750,LA,New Orleans,120,False,False,False,Gas
404782,317355f77acab7c643125a0ad60a681803675b66,2018-12-12 16:40:00 UTC,auto,auto,749,810,750,LA,New Orleans,120,False,False,False,Gas
404783,8621d1da423b509f94faf9cc5aed3862e3c10c1a,2018-12-22 09:45:00 UTC,auto,auto,747,800,750,LA,New Orleans,120,True,False,False,Gas
404784,86596de2451ab672599e1a621be703e098ef0157,2018-12-28 15:30:00 UTC,auto,auto,755,800,750,LA,New Orleans,120,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/LA/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/LA-day/2019-dec-day-LA.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7445735f5e05d141f97c02791af735f7d1942e27,2019-12-30 18:15:00 UTC,auto,hold,720,773,723,LA,New Orleans,99,False,False,False,Gas
1,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2019-12-04 16:55:00 UTC,heat,auto,625,650,648,LA,Lafayette,70,True,False,True,Electric
2,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2019-12-11 19:35:00 UTC,heat,hold,630,650,648,LA,Lafayette,70,True,False,True,Electric
3,15c153beeaf8b031ee6b6d65ee2bd838ac39439d,2019-12-01 15:30:00 UTC,auto,hold,744,771,721,LA,New Orleans,117,False,False,False,Gas
4,9765d8a021ce77d5d4adb56c5ed19f1b58362321,2019-12-04 16:30:00 UTC,heat,auto,626,650,648,LA,Lafayette,70,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
491963,317355f77acab7c643125a0ad60a681803675b66,2019-12-14 19:00:00 UTC,auto,auto,723,780,730,LA,New Orleans,120,False,False,False,Gas
491964,05d6742631d44cf568e468696c42a95d1f7466da,2019-12-01 15:05:00 UTC,cool,auto,714,750,750,LA,New Orleans,120,True,False,False,Gas
491965,05d6742631d44cf568e468696c42a95d1f7466da,2019-12-01 15:25:00 UTC,cool,auto,712,750,750,LA,New Orleans,120,True,False,False,Gas
491966,05d6742631d44cf568e468696c42a95d1f7466da,2019-12-01 16:20:00 UTC,cool,auto,709,750,750,LA,New Orleans,120,True,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/LA/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/LA-day/2020-dec-day-LA.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,329373c9083e6c134f988acf99b81c2a24087a1a,2020-12-02 13:50:00 UTC,heat,auto,673,684,652,LA,New Orleans,88,False,False,False,Gas
1,d99472a41eeb8f0263c6bf73814aec3d52a76243,2020-12-16 12:55:00 UTC,auto,hold,690,752,702,LA,Shreveport,47,False,False,False,Gas
2,d5082ef9335a7f4cb281d4b6231383a69e737e56,2020-12-11 13:10:00 UTC,auto,hold,708,754,704,LA,New Orleans,110,False,False,False,Gas
3,9e0ebe61c571e1c0e5e4740dca1bb786f2ae78de,2020-12-26 17:20:00 UTC,auto,hold,627,739,629,LA,New Orleans,70,False,False,False,Gas
4,d99472a41eeb8f0263c6bf73814aec3d52a76243,2020-12-16 11:35:00 UTC,auto,hold,693,752,702,LA,Shreveport,47,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
429807,81640304f6d55978ea76eafcccd97a08c13fb89d,2020-12-19 16:10:00 UTC,auto,auto,728,780,730,LA,New Orleans,120,False,False,False,Gas
429808,317355f77acab7c643125a0ad60a681803675b66,2020-12-19 17:50:00 UTC,auto,auto,726,780,730,LA,New Orleans,120,False,False,False,Gas
429809,81640304f6d55978ea76eafcccd97a08c13fb89d,2020-12-26 14:15:00 UTC,auto,hold,723,780,730,LA,New Orleans,120,False,False,False,Gas
429810,81640304f6d55978ea76eafcccd97a08c13fb89d,2020-12-17 15:25:00 UTC,auto,auto,722,780,730,LA,New Orleans,120,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/LA/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/LA/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
LA_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/LA/dec/" + file)
    LA_dec = pd.concat([LA_dec, df])
    
LA_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,008f4e8d0b1ee2d5311b860fe53fcf0935d191b3,dec,2017,heat,hold,Kenner,722.314815,723.259259,723.259259,37.0,False,False,False
1,01231b3659604ae515f75ef6288754f18e167b01,dec,2017,auto,auto,Covington,698.733333,729.720000,673.653333,50.0,False,False,False
2,01231b3659604ae515f75ef6288754f18e167b01,dec,2017,auto,hold,Covington,724.300245,773.112745,715.392157,50.0,False,False,False
3,01231b3659604ae515f75ef6288754f18e167b01,dec,2017,cool,hold,Covington,720.858696,783.978261,783.565217,50.0,False,False,False
4,01231b3659604ae515f75ef6288754f18e167b01,dec,2017,heat,auto,Covington,735.121429,737.485714,737.485714,50.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
955,ff4033ac05e748f162f68652c332b76627d948c6,dec,2020,cool,hold,Lake Charles,705.712418,726.124183,726.124183,10.0,False,False,False
956,ff4033ac05e748f162f68652c332b76627d948c6,dec,2020,heat,auto,Lake Charles,695.362745,700.000000,700.000000,10.0,False,False,False
957,ff4033ac05e748f162f68652c332b76627d948c6,dec,2020,heat,hold,Lake Charles,705.034483,707.113300,707.113300,10.0,False,False,False
958,ff6ef6f57a627a2ddf34ff918c56a0b2315c9b40,dec,2020,auto,auto,Bossier City,725.080357,781.250000,731.250000,75.0,False,False,False


In [187]:
LA_dec.to_csv("Scraper_Output/State_Month_Day/LA/LA_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/LA/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
LA_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/LA/" + file)
    LA_all = pd.concat([LA_all, df])
    
LA_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01231b3659604ae515f75ef6288754f18e167b01,aug,2017,cool,auto,Covington,751.940086,751.285307,750.741797,50.0,False,False,False
1,01231b3659604ae515f75ef6288754f18e167b01,aug,2017,cool,hold,Covington,749.558505,748.932153,748.954769,50.0,False,False,False
2,026f9cb5ea5ea90f77c5e550fa2e827561cddef7,aug,2017,cool,auto,Luling,735.661980,737.518337,723.728606,0.0,False,False,False
3,026f9cb5ea5ea90f77c5e550fa2e827561cddef7,aug,2017,cool,hold,Luling,722.003974,723.381457,723.377483,0.0,False,False,False
4,049b5df73fed19a6c5084ad5433f112a0849f724,aug,2017,cool,auto,Gonzales,758.437340,756.312020,653.792839,25.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2790,fd6b5be819774676538d6ebe4bde298eba7853c4,jun,2021,auto,hold,Shreveport,732.664495,730.635179,629.915309,10.0,True,False,False
2791,fe284678f060eac820ce5928292bb3bdede08f9c,jun,2021,cool,hold,New Orleans,747.503526,767.414669,767.416079,15.0,False,False,False
2792,fe66b30e171f94a05f6059cd6b94528c71106f2f,jun,2021,auto,hold,New Orleans,760.958333,760.208333,690.000000,60.0,False,False,False
2793,ff4033ac05e748f162f68652c332b76627d948c6,jun,2021,cool,hold,Lake Charles,732.744220,736.656069,736.554913,10.0,False,False,False


In [190]:
LA_all.to_csv("Scraper_Output/State_Month_Day/LA_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mLAe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['LA']
Unique jan_2018: ['LA']
Unique jan_2019: ['LA']
Unique jan_2020: ['LA']
Unique jan_2021: ['LA']
Unique feb_2017: ['LA']
Unique feb_2018: ['LA']
Unique feb_2019: ['LA']
Unique feb_2020: ['LA']
Unique feb_2021: ['LA']
Unique jun_2017: ['LA']
Unique jun_2018: ['LA']
Unique jun_2019: ['LA']
Unique jun_2020: ['LA']
Unique jun_2021: ['LA']
Unique jul_2017: ['LA']
Unique jul_2018: ['LA']
Unique jul_2019: ['LA']
Unique jul_2020: ['LA']
Unique jul_2021: ['LA']
Unique aug_2017: ['LA']
Unique aug_2018: ['LA']
Unique aug_2019: ['LA']
Unique aug_2020: ['LA']
Unique dec_2017: ['LA']
Unique dec_2018: ['LA']
Unique dec_2019: ['LA']
Unique dec_2020: ['LA']
