# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/FL-day/2017-jan-day-FL.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f7010700c59175433a883eba80cfcf5fa2edad2f,2017-01-22 17:15:00 UTC,auto,hold,748,800,750,FL,Newberry,10,False,False,True,Electric
1,fbc4bb2d5762500329e74ad73cf523fc50df7f3d,2017-01-30 18:55:00 UTC,auto,auto,720,760,700,FL,Orlando,15,False,False,True,Electric
2,e20a3e765cdf2b7a2626a87af28bd4f0b6587df8,2017-01-01 12:35:00 UTC,auto,hold,725,770,720,FL,South Pasadena,50,True,False,False,Gas
3,ade94a0e1eb1ff8d488cb6e6036362a8d2391975,2017-01-28 17:45:00 UTC,auto,hold,680,730,650,FL,Riverview,5,False,False,True,Electric
4,ddda85d74ec02ede30a53c68043b86a3e0ee52be,2017-01-14 18:20:00 UTC,auto,hold,755,760,710,FL,Hialeah,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
915550,805b979c88c6beb513fb8be8963da99c2cb05798,2017-01-01 14:00:00 UTC,auto,auto,627,800,640,FL,Green Cover Springs,5,True,False,True,Electric
915551,805b979c88c6beb513fb8be8963da99c2cb05798,2017-01-13 16:45:00 UTC,auto,auto,657,800,640,FL,Green Cover Springs,5,True,False,True,Electric
915552,430c429dc9875cf26ef69755a5b31c4b60e4ea75,2017-01-01 19:05:00 UTC,auto,hold,748,800,650,FL,Green Cover Springs,5,True,False,True,Electric
915553,430c429dc9875cf26ef69755a5b31c4b60e4ea75,2017-01-10 12:25:00 UTC,auto,hold,665,800,650,FL,Green Cover Springs,5,True,False,True,Electric


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
00020f16d85e6d8c70fc67725f03ab06b116fead,Jan,2017,auto,auto,Jacksonville,713.266667,760.000000,670.000000,5.0,False,False,True
0024f0f900a4ff6923e95a2a20ee94c699ecf167,Jan,2017,cool,auto,Miami,742.527842,800.000000,780.939675,0.0,False,False,False
0024f0f900a4ff6923e95a2a20ee94c699ecf167,Jan,2017,cool,hold,Miami,710.424242,754.848485,754.848485,0.0,False,False,False
01047c4cced05cfd81b6a7c9c263239317203975,Jan,2017,auto,auto,Oviedo,743.220588,765.000000,715.000000,0.0,False,False,True
01047c4cced05cfd81b6a7c9c263239317203975,Jan,2017,cool,auto,Oviedo,746.829452,760.395943,713.435011,0.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...
fed8bcb67d870524eabe5fe9d83721a808419d6e,Jan,2017,heat,auto,Sebastian,731.666667,739.416667,739.416667,25.0,False,False,False
fed8bcb67d870524eabe5fe9d83721a808419d6e,Jan,2017,heat,hold,Sebastian,726.666667,720.000000,720.000000,25.0,False,False,False
feea0a3dc64015ecc77007c21d048b31346d3261,Jan,2017,auto,auto,Sunrise,753.476190,773.909254,682.776280,6.0,False,False,False
feea0a3dc64015ecc77007c21d048b31346d3261,Jan,2017,auto,hold,Sunrise,746.239939,793.655120,688.470199,6.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/FL/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/FL-day/2018-jan-day-FL.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6b47f19e2ac3de5a5c088d2941ae61bb91f316b0,2018-01-06 15:10:00 UTC,heat,hold,673,709,698,FL,St Petersburg,55,True,False,True,Electric
2,6858313966d2545da544ff439290f5bf27f4fa40,2018-01-11 14:45:00 UTC,auto,hold,735,795,725,FL,New Smyrna Beach,10,True,False,True,Electric
3,c9b4a1b8b7fddea51aa62557d1e0077f59c114dc,2018-01-03 07:05:00 UTC,heat,hold,693,695,695,FL,-,17,False,False,True,Electric
4,0d0fad2a370ee198dc9656c834c57253c8fa1a2f,2018-01-26 15:35:00 UTC,cool,auto,734,752,690,FL,Miami Beach,79,False,False,False,Gas
5,e478562afbe7db9cd8aa42235cb8fb37cc134b92,2018-01-07 16:30:00 UTC,heat,hold,714,695,695,FL,wilton manors,9,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2713878,cfbff081e413476780f609e5c7dcf2020c0677b3,2018-01-19 15:25:00 UTC,auto,hold,713,765,715,FL,Dania Beach,10,False,False,False,Gas
2713879,0f010f452f9f5edaac659b79ea2e3c9701813846,2018-01-05 18:10:00 UTC,heat,hold,766,765,765,FL,Longwood,40,True,False,True,Electric
2713880,f1c94a3ce3513081ef44beb8de5e0e10f522f917,2018-01-24 12:30:00 UTC,auto,auto,692,765,715,FL,Jacksonville,26,False,False,True,Electric
2713881,db1f63574972d52866f2b1c108a4b1f6735886d2,2018-01-17 15:00:00 UTC,auto,hold,718,765,715,FL,Pembroke pines,50,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/FL/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/FL-day/2019-jan-day-FL.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,531ef970345a9647e7bbe147e8de34e458f7c47b,2019-01-23 13:50:00 UTC,heat,auto,756,760,760,FL,Valrico,20,False,False,True,Electric
1,4053afa5c9936c8c8ba088d44f87bfc2fb25bbcc,2019-01-17 16:25:00 UTC,auto,auto,684,720,670,FL,Lutz,0,True,False,True,Electric
2,c1d5949ab8e0c06eb99b45864ee18c529b3b9ccd,2019-01-19 13:45:00 UTC,auto,hold,703,740,690,FL,Orlando,8,False,False,True,Electric
3,7173c493c97ba3c84ef17be09fc2d2a14a137228,2019-01-19 15:15:00 UTC,auto,hold,658,750,650,FL,Orlando,50,False,False,True,Electric
4,56b9dae26f97a5ca2de4f82cb113fb73ecc0e036,2019-01-28 13:10:00 UTC,cool,auto,653,820,650,FL,Kissimmee,5,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4217117,78c544d449e0387edcff821adcda02ed6712c8ea,2019-01-11 18:25:00 UTC,cool,hold,668,760,680,FL,Venice,20,True,False,True,Electric
4217118,fbb4175d69b11497cf8db1965cd6947ffd9e659a,2019-01-28 12:05:00 UTC,auto,auto,718,770,720,FL,West Palm Beach,15,False,False,False,Gas
4217119,6b55ca21b381b89f7da9b98d933d4a31636e4769,2019-01-27 16:50:00 UTC,heat,auto,696,720,720,FL,Navarre,0,False,False,True,Electric
4217120,4d0b2b869cd1299c830b5fd3b09f9a3aa42da99c,2019-01-13 16:20:00 UTC,cool,hold,765,790,690,FL,Tamarac,0,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/FL/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/FL-day/2020-jan-day-FL.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3b09c6048ef138b5049f3ee160c5e9254dda559e,2020-01-14 19:05:00 UTC,auto,auto,767,760,710,FL,Pinellas Park,20,False,False,True,Electric
1,f87f3bd99a20509a3bc203c71cf6e3ffd1cf76f6,2020-01-26 16:10:00 UTC,auto,auto,708,725,675,FL,Homestead,10,False,False,True,Electric
2,fc80700b84027c9e1b092d52b83a5dd558f2e33f,2020-01-01 14:50:00 UTC,heat,hold,710,720,720,FL,destin,30,False,False,True,Electric
3,32ec01b99b2b667fd1ed548a98ce4c0edc5cff38,2020-01-14 15:00:00 UTC,auto,hold,718,730,702,FL,Coral Springs,20,False,False,False,Gas
4,159a3848a6de3d32fa9fcb91bb122a1e1a5d6749,2020-01-18 14:40:00 UTC,cool,auto,712,710,710,FL,Atlantis,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4425109,a708992f20f4f7765e4ae0300ed2c3514a6a2444,2020-01-19 16:15:00 UTC,cool,hold,740,742,742,FL,Boca Raton,0,False,False,False,Gas
4425110,c30325a73b93609a409be5b926382b085930d958,2020-01-05 13:10:00 UTC,heat,hold,697,700,700,FL,Lake Mary,5,False,False,True,Electric
4425111,4a5663d80ac77cbb9922275581c4c44696a6fcd7,2020-01-07 15:45:00 UTC,cool,auto,693,710,710,FL,Melbourne,25,False,False,False,Gas
4425112,e949cb54d57ee86d897c0a539f25756e5d2dd50f,2020-01-03 14:05:00 UTC,auto,auto,732,740,680,FL,DeBary,10,True,False,True,Electric


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/FL/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/FL-day/2021-jan-day-FL.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9c2eaee56dd3dc278aadd1b505f80555e34aabf7,2021-01-03 12:25:00 UTC,cool,hold,756,757,757,FL,Oakland Park,20,False,False,False,Gas
1,39bbeab58bc1578fb593378a6fc8a1c2e37a2ae6,2021-01-20 17:20:00 UTC,auto,hold,731,775,645,FL,Wellington,30,False,False,False,Gas
2,ee1aa1f9de7208a24ef1d093c9cd33fc8aed8193,2021-01-09 17:40:00 UTC,cool,hold,734,755,755,FL,Palmetto,5,True,False,False,Gas
3,e50c55bfd0e580222d9d0bac58312254a443f7ab,2021-01-02 09:10:00 UTC,cool,hold,645,703,703,FL,Panama City Beach,0,False,False,True,Electric
4,dedce87a13dc4cceeb5cded9a4410d594523ac72,2021-01-28 18:00:00 UTC,cool,hold,697,697,697,FL,Miami,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2541669,a9e0c1277d7c419c9c53a177622342ee019c3418,2021-01-25 18:00:00 UTC,heat,hold,799,770,760,FL,Saint Cloud,5,False,False,True,Electric
2541670,e1f04e7283ec625fd77870c409b6849ff6a65b73,2021-01-24 12:30:00 UTC,cool,hold,719,760,760,FL,Saint Petersburg,77,False,False,True,Electric
2541671,522e8de47bb38ecea118c6a329ed93dda0aae3dc,2021-01-22 18:10:00 UTC,cool,hold,716,760,760,FL,Apollo Beach,0,True,False,False,Gas
2541672,1d60ecf4c1cbf82437bbb548de8417f9e7a853c4,2021-01-21 16:00:00 UTC,cool,hold,689,760,760,FL,Orlando,0,True,False,True,Electric


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/FL/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/FL/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
FL_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/FL/jan/" + file)
    FL_jan = pd.concat([FL_jan, df])
    
FL_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00020f16d85e6d8c70fc67725f03ab06b116fead,Jan,2017,auto,auto,Jacksonville,713.266667,760.000000,670.000000,5.0,False,False,True
1,0024f0f900a4ff6923e95a2a20ee94c699ecf167,Jan,2017,cool,auto,Miami,742.527842,800.000000,780.939675,0.0,False,False,False
2,0024f0f900a4ff6923e95a2a20ee94c699ecf167,Jan,2017,cool,hold,Miami,710.424242,754.848485,754.848485,0.0,False,False,False
3,01047c4cced05cfd81b6a7c9c263239317203975,Jan,2017,auto,auto,Oviedo,743.220588,765.000000,715.000000,0.0,False,False,True
4,01047c4cced05cfd81b6a7c9c263239317203975,Jan,2017,cool,auto,Oviedo,746.829452,760.395943,713.435011,0.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3897,ffa4468a31a8e35dd633f0022ea0b855501c5ba9,Jan,2021,heat,hold,Palm Coast,726.165608,725.971416,724.547641,10.0,False,False,True
3898,ffcf942aa8736eaa25440e6817dea4d9efc3c67f,Jan,2021,auto,hold,Oakland Park,721.237500,760.000000,699.350000,30.0,False,False,False
3899,fffcd8322781a8483039a10c259725e9da9cfdec,Jan,2021,cool,hold,North Miami,719.427136,717.492462,716.552764,70.0,False,False,False
3900,ffff46e404a9831c28d8be1a3e427a4377690c6b,Jan,2021,auto,hold,Parrish,753.402299,775.126437,710.896552,0.0,False,False,True


In [34]:
FL_jan.to_csv("Scraper_Output/State_Month_Day/FL/FL_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/FL-day/2017-feb-day-FL.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7e690786f4d499ba5a182bc9bf5f8299b40fd99c,2017-02-26 13:35:00 UTC,cool,hold,770,770,770,FL,Coconut Creek,0,False,False,False,Gas
2,754714343fa0ebaf6d54291c29f4b445680d4a73,2017-02-04 12:45:00 UTC,cool,auto,733,760,670,FL,Kissimmee,25,False,False,True,Electric
3,feea0a3dc64015ecc77007c21d048b31346d3261,2017-02-14 18:50:00 UTC,auto,auto,773,760,630,FL,Sunrise,6,False,False,False,Gas
4,feea0a3dc64015ecc77007c21d048b31346d3261,2017-02-05 18:05:00 UTC,auto,auto,751,780,670,FL,Sunrise,6,False,False,False,Gas
5,ad6110c12ed85aeff51dd158dfe57c48bd9e9b8c,2017-02-12 17:15:00 UTC,cool,hold,732,730,730,FL,Wellington,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
827807,805b979c88c6beb513fb8be8963da99c2cb05798,2017-02-14 13:50:00 UTC,auto,auto,633,800,640,FL,Green Cover Springs,5,True,False,True,Electric
827808,805b979c88c6beb513fb8be8963da99c2cb05798,2017-02-26 18:50:00 UTC,auto,auto,676,800,640,FL,Green Cover Springs,5,True,False,True,Electric
827809,805b979c88c6beb513fb8be8963da99c2cb05798,2017-02-11 12:00:00 UTC,auto,auto,624,800,640,FL,Green Cover Springs,5,True,False,True,Electric
827810,805b979c88c6beb513fb8be8963da99c2cb05798,2017-02-02 14:05:00 UTC,auto,auto,635,800,640,FL,Green Cover Springs,5,True,False,True,Electric


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/FL/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/FL-day/2018-feb-day-FL.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3c2330d42e6e3d81b417b1a03edb0024793e6bbb,2018-02-22 18:40:00 UTC,cool,hold,805,800,800,FL,Homestead,20,False,False,False,Gas
1,a38c9e0c538e17c8d4f31a7acd6095bda8d36855,2018-02-04 16:50:00 UTC,auto,hold,705,705,655,FL,Ponte Vedra,0,False,False,True,Electric
2,4227c3c3ce5b52a02f547d6a8f267836b6e7c6db,2018-02-11 11:30:00 UTC,auto,auto,759,785,735,FL,Ocoee,37,False,False,False,Gas
3,9c6ff4ffc41ab995ea7860bfdf9dfff3a3f209f3,2018-02-06 18:40:00 UTC,cool,hold,740,735,735,FL,The Villages,15,False,False,False,Gas
4,9d9804ca59adb6c9bcf875fafd4195177c1f42bb,2018-02-12 16:30:00 UTC,auto,hold,751,765,667,FL,Belleview,36,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2600953,3216be7ce3e33e00991a0c0a24aa28e14b9de11e,2018-02-28 18:45:00 UTC,cool,hold,766,760,760,FL,Cape Ciral,8,False,False,False,Gas
2600954,a568ba12ae64af9f892bdae9c3c797b4d0bd8a3f,2018-02-17 12:10:00 UTC,cool,hold,740,760,760,FL,bradenton,5,False,False,True,Electric
2600955,9b4541c54b4c824f2fdb592f8a1ed0ff6a7cf737,2018-02-27 19:05:00 UTC,cool,hold,772,760,760,FL,Winter Garden,5,True,False,True,Electric
2600956,db09654404a4bb54e8c19121626a336237a6e28e,2018-02-23 15:20:00 UTC,cool,hold,762,760,760,FL,Cape Coral,0,True,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/FL/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/FL-day/2019-feb-day-FL.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,49304c734512b21c6ad58cd7c16fe605020d5824,2019-02-20 13:40:00 UTC,cool,auto,759,779,779,FL,Orlando,10,True,False,False,Gas
2,ddfcfffed33adf93a7d436ceb2e61ae7e8d8fda7,2019-02-18 16:30:00 UTC,cool,hold,722,722,722,FL,Naples,9,True,False,False,Gas
3,9803938f15ad5bfc82916a352ac6cc1e8d600249,2019-02-06 15:05:00 UTC,auto,hold,690,725,669,FL,Longwood,40,False,False,False,Gas
4,1e3f36227b6d194d253f7093024dbc0dd70549df,2019-02-28 10:40:00 UTC,auto,auto,733,755,705,FL,Lake Worth,0,False,False,False,Gas
7,cad60670d975832582759010db9533090231e9ef,2019-02-02 12:25:00 UTC,heat,hold,728,723,723,FL,Clermont,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2907088,3216be7ce3e33e00991a0c0a24aa28e14b9de11e,2019-02-25 18:45:00 UTC,cool,hold,759,760,760,FL,Cape Ciral,8,False,False,False,Gas
2907089,b0c1b3e03e56da92f7385c04f1e316f0e05b55b5,2019-02-25 13:05:00 UTC,cool,auto,753,760,760,FL,Lakeland,0,False,False,True,Electric
2907090,ab1bf48ba470aeef94f81b11565dc886061a2871,2019-02-01 15:25:00 UTC,cool,auto,739,760,760,FL,Margate,50,False,False,False,Gas
2907091,74fb118d6edaedab3da1f29e297ded25ec319791,2019-02-23 17:10:00 UTC,cool,hold,755,760,760,FL,Pompano Beach,0,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/FL/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/FL-day/2020-feb-day-FL.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,19078efc342b5dd095f36c5b2958a3558a39613c,2020-02-19 16:10:00 UTC,cool,auto,703,700,750,FL,Winter Haven,0,False,False,True,Electric
1,44fa42a91646206f8047908b2359763cbb2bebdd,2020-02-01 18:00:00 UTC,auto,hold,721,719,659,FL,Kissimmee,0,False,False,False,Gas
2,805b979c88c6beb513fb8be8963da99c2cb05798,2020-02-11 13:20:00 UTC,auto,auto,702,720,700,FL,Green Cover Springs,5,True,False,True,Electric
3,c0472acb00956145e2b27225459610c22c636e98,2020-02-09 12:15:00 UTC,auto,auto,721,740,680,FL,Tierra Verde,30,True,False,False,Gas
4,49c4c11b8b27e2b888a6a92ae01f4741d4ac69e3,2020-02-15 12:35:00 UTC,cool,auto,703,700,700,FL,Miami Beach,9,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3998898,63a9630ed3ea7381f7a6f9aee56c6c5d25c39d8d,2020-02-23 18:10:00 UTC,auto,auto,678,735,685,FL,Holmes Beach,30,False,False,False,Gas
3998899,75774d2daf04754cdd5ebdcdc74ab62c7dc883c7,2020-02-09 15:25:00 UTC,cool,hold,731,750,730,FL,Lehigh Acres,19,True,False,False,Gas
3998900,f79bdd3708b2e9b9adbf35a895b6b6929351aa31,2020-02-22 14:15:00 UTC,heat,auto,743,750,750,FL,Tampa,35,True,False,True,Electric
3998901,630ea436ad4ed1e37802d2cc3d2f92c0bc9777f8,2020-02-02 15:30:00 UTC,cool,hold,683,680,680,FL,Pompano Beach,40,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/FL/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/FL-day/2021-feb-day-FL.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2e21444d04b9963cfc5948da4e91962c3ae122b8,2021-02-28 19:25:00 UTC,cool,hold,725,766,766,FL,Panama City,15,False,False,True,Electric
2,bc569bfaf3396e0ecca46e88a333f31a5f984d24,2021-02-11 16:20:00 UTC,cool,hold,749,762,762,FL,Homestead,10,False,False,False,Gas
3,78b50b5a677d87e7551186383ecf31fd1e014b45,2021-02-18 15:15:00 UTC,auto,hold,749,808,708,FL,Tarpon Springs,10,False,False,True,Electric
4,87a9a530caa8878f9628838d182ae0afbced2f90,2021-02-27 17:10:00 UTC,cool,hold,755,743,608,FL,Coral Springs,35,False,False,False,Gas
5,179bf8ccb2198aee1c0053d4a79ad13116630d34,2021-02-07 17:45:00 UTC,cool,hold,765,760,602,FL,Naples,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2331063,d30b1602fbdca6b250f1ff278fc389f84ae44493,2021-02-02 13:45:00 UTC,heat,hold,758,760,760,FL,Windermere,10,False,False,False,Gas
2331064,a9e0c1277d7c419c9c53a177622342ee019c3418,2021-02-15 17:20:00 UTC,cool,hold,764,770,760,FL,Saint Cloud,5,False,False,True,Electric
2331065,38a68c85ecd6047a0c8823b7787e7d68e320a234,2021-02-25 17:30:00 UTC,cool,hold,760,760,760,FL,Boca Raton,40,False,False,False,Gas
2331066,13ab23a3b0b55d9169ad1d4441a7f4f1585d1fb3,2021-02-10 19:40:00 UTC,cool,hold,764,760,760,FL,Pompano Beach,0,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/FL/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/FL/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
FL_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/FL/feb/" + file)
    FL_feb = pd.concat([FL_feb, df])
    
FL_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00020f16d85e6d8c70fc67725f03ab06b116fead,feb,2017,auto,auto,Jacksonville,722.084507,760.000000,670.000000,5.0,False,False,True
1,0024f0f900a4ff6923e95a2a20ee94c699ecf167,feb,2017,cool,auto,Miami,767.633484,799.990950,760.009050,0.0,False,False,False
2,0024f0f900a4ff6923e95a2a20ee94c699ecf167,feb,2017,cool,hold,Miami,749.417382,774.985515,774.975858,0.0,False,False,False
3,01047c4cced05cfd81b6a7c9c263239317203975,feb,2017,cool,auto,Oviedo,747.507784,761.637126,694.265868,0.0,False,False,True
4,01047c4cced05cfd81b6a7c9c263239317203975,feb,2017,cool,hold,Oviedo,751.039216,760.000000,760.000000,0.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3826,ffcf942aa8736eaa25440e6817dea4d9efc3c67f,feb,2021,auto,hold,Oakland Park,709.054054,760.000000,680.000000,30.0,False,False,False
3827,ffcf942aa8736eaa25440e6817dea4d9efc3c67f,feb,2021,cool,hold,Oakland Park,756.978571,759.995238,759.980952,30.0,False,False,False
3828,fffcd8322781a8483039a10c259725e9da9cfdec,feb,2021,cool,hold,North Miami,723.718160,725.439858,724.521226,70.0,False,False,False
3829,ffff46e404a9831c28d8be1a3e427a4377690c6b,feb,2021,auto,hold,Parrish,729.333333,780.000000,730.000000,0.0,False,False,True


In [67]:
FL_feb.to_csv("Scraper_Output/State_Month_Day/FL/FL_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/FL-day/2017-jun-day-FL.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,12a37145fd7cee5f696f62c41b25b77e7884801a,2017-06-15 12:25:00 UTC,auto,auto,762,780,700,FL,Coral Springs,0,False,False,False,Gas
1,6fe42f2a5734babc53ae9e179fb5c61dd5aa2bce,2017-06-05 19:35:00 UTC,cool,auto,773,770,700,FL,Daytona Beach,5,False,False,True,Electric
2,1d71d6b47aa76f9be2339f37a8a4d984bab6f050,2017-06-09 16:45:00 UTC,cool,hold,755,750,750,FL,Casselberry,30,False,False,True,Electric
3,2dbdc0c5b9ee64661a516a3fb359cc277316a415,2017-06-01 14:35:00 UTC,cool,auto,732,760,710,FL,Destin,20,False,False,True,Electric
4,2616da840190b037029be47d0bddbd80e2dd29e9,2017-06-27 19:30:00 UTC,auto,auto,790,760,680,FL,Altamonte Springs,27,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1640739,b14f9eab461a06d27bcaf2bb3103d63fc9aefbdd,2017-06-11 18:00:00 UTC,cool,hold,743,740,740,FL,Palm Beach Gardens,0,False,False,False,Gas
1640740,b14f9eab461a06d27bcaf2bb3103d63fc9aefbdd,2017-06-06 14:00:00 UTC,cool,hold,744,740,740,FL,Palm Beach Gardens,0,False,False,False,Gas
1640741,b68773762e82ce89bfde709432c259f719afb7d0,2017-06-12 12:15:00 UTC,auto,hold,764,760,620,FL,Palm Beach Gardens,5,True,False,False,Gas
1640742,b14f9eab461a06d27bcaf2bb3103d63fc9aefbdd,2017-06-28 17:15:00 UTC,cool,auto,739,730,740,FL,Palm Beach Gardens,0,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/FL/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/FL-day/2018-jun-day-FL.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d3fda431d007897e050b6423e569cf772d1eb6aa,2018-06-16 17:35:00 UTC,auto,hold,746,745,635,FL,Gainesville,30,False,False,False,Gas
1,b4cfc4cc0bc94157625d11f8644ce9dd6e4fb311,2018-06-01 17:15:00 UTC,auto,hold,777,770,702,FL,Miami,57,False,False,False,Gas
2,ea12225706ab26195d1b58fb72e36724a6215482,2018-06-21 15:00:00 UTC,cool,hold,717,705,705,FL,Coral Springs,29,True,False,False,Gas
3,c42808e540740a1918cc4a82c53ec5b93f3af64c,2018-06-25 17:30:00 UTC,auto,auto,776,772,722,FL,Middleburg,5,True,False,True,Electric
4,2ae3555512164c3351067061f9a9525ade404bee,2018-06-16 12:20:00 UTC,cool,hold,694,689,689,FL,Bal Harbour,50,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3723115,b587ccf0e72a372eb7e08a4abc1ea0d9871f52c4,2018-06-19 13:30:00 UTC,cool,auto,741,760,760,FL,Doral,5,True,False,False,Gas
3723116,dbcf059b3900ee79bc9e7ec66067b3c5f417d173,2018-06-25 17:15:00 UTC,cool,auto,771,760,760,FL,Miami,38,False,False,False,Gas
3723117,a59f6ea94a6f1bde3105f36b849b7219b22bdce9,2018-06-30 10:45:00 UTC,cool,hold,760,760,760,FL,Clermont,20,False,False,False,Gas
3723118,29f4e7036c69a1829157ac29c4fd3f7c234ed3c3,2018-06-25 18:25:00 UTC,cool,hold,758,760,760,FL,North Palm Beach,67,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/FL/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/FL-day/2019-jun-day-FL.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b2f11a2b83b3051b1bd11e541e2952dbd58acfe9,2019-06-22 10:40:00 UTC,auto,auto,724,720,650,FL,Tampa,5,False,False,True,Electric
1,d6c2e44b80e82e437e07bc818b818327a926c101,2019-06-10 10:55:00 UTC,auto,auto,779,780,710,FL,Homestead,5,False,False,False,Gas
3,4c2630ecafeab7d1b1c1ba43ea74b6824b73710a,2019-06-26 17:55:00 UTC,cool,auto,778,780,780,FL,Riviera Beach,9,True,False,False,Gas
4,3afd00dfb040a8ee659232ba6cffa6fb62e761c2,2019-06-08 13:40:00 UTC,auto,hold,762,760,620,FL,Eustis,14,True,False,True,Electric
5,d1e41a9099613d8dfb3f4b2f09d9391594040ed8,2019-06-26 18:45:00 UTC,cool,auto,732,730,730,FL,Gainesville,9,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5338418,8adbf7ff551bba4f616e7317bb628efea3ada6ae,2019-06-20 15:00:00 UTC,cool,hold,775,770,770,FL,Tampa,15,True,False,True,Electric
5338419,3585c81d9c8e352062dd4b76e4ab5bc153f60a0c,2019-06-17 11:30:00 UTC,cool,auto,686,710,710,FL,Davie,70,False,False,False,Gas
5338420,9a2a5817132918aa8b4f7d83b5daf5e532b28603,2019-06-25 13:25:00 UTC,cool,hold,730,730,730,FL,Pinecrest,0,False,False,False,Gas
5338421,36b5f816c2bd952f875b0691d0d94aa7b2376b1f,2019-06-03 10:45:00 UTC,auto,auto,731,730,680,FL,Safety Harbor,30,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/FL/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/FL-day/2020-jun-day-FL.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,90784eeb53e8b33dbc4b94a9cf2cca66be14b0c7,2020-06-28 10:50:00 UTC,cool,auto,727,750,750,FL,Brandon,55,False,False,False,Gas
2,03849d3f5ead7bde83ca3a9452958a62e7b2ae7b,2020-06-02 17:25:00 UTC,cool,hold,744,740,740,FL,Lakeland,39,False,False,True,Electric
3,dc285fea74057193e34044a37250b5c27be6e2dd,2020-06-25 13:35:00 UTC,auto,auto,739,740,670,FL,Safety Harbor,39,False,False,True,Electric
4,49bc8e207e437c504d2c3a3603bdbf0878016e17,2020-06-26 14:00:00 UTC,cool,auto,703,675,645,FL,Destin,0,False,False,True,Electric
5,7e6178e51ea702589f93017496790afe2dfc93b0,2020-06-03 19:25:00 UTC,auto,hold,778,780,680,FL,Kissimmee,5,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4828512,7afb3cb57a50d2aec2e614fa2f58dc0967534384,2020-06-16 19:25:00 UTC,cool,hold,718,720,720,FL,Saint Augustine,9,True,False,True,Electric
4828513,31fcf11dc1c1977e1e92f996eb47f5b7c4925437,2020-06-02 19:45:00 UTC,auto,hold,740,740,660,FL,Windermere,0,True,False,True,Electric
4828514,5e6b54a039acb6f90d2fcaaf2461d3dfc5e29dfe,2020-06-07 10:15:00 UTC,auto,hold,765,770,680,FL,Palm Harbor,20,False,False,True,Electric
4828515,f234070440bf10761f0aa33313d1456ffe7eb73c,2020-06-30 11:20:00 UTC,auto,hold,780,780,710,FL,DeBary,10,False,False,True,Electric


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/FL/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/FL-day/2021-jun-day-FL.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,034922a0328fa08cd6e02d903796ae49a76310d3,2021-06-23 17:50:00 UTC,cool,hold,751,739,739,FL,Fort Lauderdale,40,False,False,False,Gas
1,ef503d0d2eea377b740af158f5dbf2580b771fc9,2021-06-22 16:45:00 UTC,auto,hold,752,749,649,FL,Wellington,10,False,False,False,Gas
2,179bf8ccb2198aee1c0053d4a79ad13116630d34,2021-06-27 15:25:00 UTC,cool,hold,771,770,602,FL,Naples,0,True,False,False,Gas
3,2d6b9c2877603139fc936a1dca5018f61e90dc4d,2021-06-12 16:05:00 UTC,cool,hold,772,770,768,FL,Fort Myers,25,True,False,False,Gas
4,0224817a4b93ae69c26f2532615f7d7ab28201a9,2021-06-10 16:20:00 UTC,cool,hold,718,712,712,FL,Sunrise,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2880560,1173246850f4de7086b3d1ce621458f7846f368b,2021-06-13 18:45:00 UTC,cool,hold,760,760,760,FL,Coral Gables,60,False,False,False,Gas
2880561,7da30854b2387f9ae355e3d02197145074a5b5c9,2021-06-09 08:35:00 UTC,cool,hold,761,760,760,FL,Melbourne,67,False,False,False,Gas
2880562,0f06f312e027e0b6d3b572ef5dd2aa01c9dd0879,2021-06-04 19:10:00 UTC,cool,hold,761,760,760,FL,Naples,0,False,False,False,Gas
2880563,de8254a96f114f8f1dbaa7c92d56196276e40b86,2021-06-30 14:45:00 UTC,cool,hold,754,760,760,FL,west palm beach,60,True,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/FL/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/FL/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
FL_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/FL/jun/" + file)
    FL_jun = pd.concat([FL_jun, df])
    
FL_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00020f16d85e6d8c70fc67725f03ab06b116fead,jun,2017,auto,auto,Jacksonville,775.492308,773.423077,667.163462,5.0,False,False,True
1,0024f0f900a4ff6923e95a2a20ee94c699ecf167,jun,2017,cool,auto,Miami,799.983353,804.537455,788.787158,0.0,False,False,False
2,0024f0f900a4ff6923e95a2a20ee94c699ecf167,jun,2017,cool,hold,Miami,796.289140,806.028507,790.000000,0.0,False,False,False
3,0055f438d1ed6195816124e1814d00fe22894e77,jun,2017,cool,hold,Tampa,753.254545,750.000000,750.000000,25.0,False,False,True
4,008b53c11b3b4c56fac3ef938ccd9a34d7755d30,jun,2017,cool,auto,Davie,739.529412,740.000000,690.000000,30.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2875,ff5c81c4eacfb17ff63cecf31c131332abc79ddc,jun,2021,cool,hold,Fort Myers,743.019139,739.555024,652.000000,10.0,False,False,False
2876,ffa4468a31a8e35dd633f0022ea0b855501c5ba9,jun,2021,cool,hold,Palm Coast,760.596356,761.847160,758.819721,10.0,False,False,True
2877,ffcf942aa8736eaa25440e6817dea4d9efc3c67f,jun,2021,cool,hold,Oakland Park,737.767442,730.038760,729.782946,30.0,False,False,False
2878,fffcd8322781a8483039a10c259725e9da9cfdec,jun,2021,cool,hold,North Miami,728.397478,724.251096,723.402412,70.0,False,False,False


In [100]:
FL_jun.to_csv("Scraper_Output/State_Month_Day/FL/FL_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/FL-day/2017-jul-day-FL.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,73f0a3c5962182f20265966172e684d09fb8f462,2017-07-16 16:55:00 UTC,auto,auto,747,750,650,FL,Sunrise,50,False,False,False,Gas
1,1412e97d0de077f4ea9ba1ddcf5fc5b6a908c57a,2017-07-18 18:15:00 UTC,cool,auto,799,800,720,FL,sunrise,40,True,False,False,Gas
2,3b02ebbb3b2f2c288629046a9e1e644b523703ac,2017-07-21 16:45:00 UTC,cool,auto,783,780,760,FL,Fort myers,7,False,False,False,Gas
3,22a792ab3133c33c7d1606a3d34a71fe122ab43b,2017-07-17 11:00:00 UTC,cool,auto,783,790,680,FL,Zephyrhills,35,False,False,False,Gas
4,ce21d76a1d4bb9340435aa4d9d28a3a1c786d979,2017-07-18 14:15:00 UTC,cool,hold,769,772,772,FL,Edgewood,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1947958,ba205797d0b02efeebc04a38559eff425fd64f11,2017-07-10 18:25:00 UTC,cool,hold,774,770,770,FL,Palm Beach Gardens,16,False,False,False,Gas
1947959,ba205797d0b02efeebc04a38559eff425fd64f11,2017-07-18 19:40:00 UTC,cool,hold,765,770,770,FL,Palm Beach Gardens,16,False,False,False,Gas
1947960,9a9c02c01d1723f206201e4967e96eac9533c6eb,2017-07-26 13:20:00 UTC,cool,auto,758,760,760,FL,Palm Beach Gardens,15,False,False,False,Gas
1947961,b14f9eab461a06d27bcaf2bb3103d63fc9aefbdd,2017-07-21 15:30:00 UTC,cool,hold,724,755,755,FL,Palm Beach Gardens,0,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/FL/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/FL-day/2018-jul-day-FL.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2fb7284f2f93c5d760a0dac3c99d3bffb377ef81,2018-07-09 16:05:00 UTC,cool,hold,802,810,720,FL,Green Cove Springs,45,False,False,True,Electric
2,c8f7ca38deb29189a384d1257d26fe9e26e7ac34,2018-07-28 13:35:00 UTC,cool,auto,758,760,760,FL,Orlando,0,False,False,True,Electric
3,122666318d2b0c22e41ec1d2371ebfa8426c1701,2018-07-12 13:25:00 UTC,cool,hold,757,770,770,FL,Orlando,70,False,False,False,Gas
4,91e8d75c9dfae8004f66f6dfac4e1a1a8b1e5c02,2018-07-15 10:25:00 UTC,cool,auto,698,700,700,FL,Miami,40,False,False,False,Gas
5,de3ef8dc674989be2d2b4a80c74ee9a05b35e09a,2018-07-19 12:35:00 UTC,auto,hold,725,750,640,FL,West Miami,70,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4213564,fc80700b84027c9e1b092d52b83a5dd558f2e33f,2018-07-25 07:25:00 UTC,cool,hold,725,720,720,FL,destin,30,False,False,True,Electric
4213565,3b6ce41439420869b05344a7b300caa95a23b08b,2018-07-04 11:05:00 UTC,cool,hold,707,710,710,FL,2381 oak leaf lane,47,False,False,True,Electric
4213566,151dd2d88561ecf24eab86c85c78d698ff513d71,2018-07-18 19:20:00 UTC,auto,hold,754,750,640,FL,West Palm Beach,40,False,False,False,Gas
4213567,b17d8e97a51cd789660aa23ff62b568ef73700b8,2018-07-13 19:15:00 UTC,cool,hold,716,700,700,FL,Naples,0,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/FL/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/FL-day/2019-jul-day-FL.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,be78c32c6828006e4dd1af96d3a17320191c4ece,2019-07-19 15:40:00 UTC,auto,auto,816,800,640,FL,Orlando,10,False,False,False,Gas
1,2ba34551f430515a665a2f51d698dc35fcd7d99f,2019-07-20 13:05:00 UTC,cool,hold,782,780,780,FL,Saint Johns,9,False,False,True,Electric
2,d39fe584087f62be43472f0bf49573073a6d0b32,2019-07-21 14:00:00 UTC,auto,hold,759,755,705,FL,Naples,10,False,False,False,Gas
3,54766be75907ae230c2ff887356cc08db51a3db0,2019-07-17 13:20:00 UTC,cool,hold,754,750,750,FL,Panama City,0,True,False,True,Electric
4,1dc854ef4b2dac72dcb199a2b6a5d91a2b016ca5,2019-07-05 12:20:00 UTC,cool,auto,736,730,730,FL,Miami,10,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5588636,8c4c6ee89b51e3c97e253d86a0ef2d94d12f801d,2019-07-27 08:30:00 UTC,cool,auto,801,800,760,FL,Orlando,5,True,False,True,Electric
5588637,b551ab618fa40c908a11efbac2dd6d8f419e65f2,2019-07-04 10:30:00 UTC,cool,auto,754,750,750,FL,Edgewater,47,False,False,True,Electric
5588638,97188f87a5a801b3f28c4b2a46432229c808beb5,2019-07-15 17:10:00 UTC,cool,hold,757,750,710,FL,North Port,15,False,False,True,Electric
5588639,506127c6f61c2a969ae8a370b846c3d509b22948,2019-07-10 12:05:00 UTC,cool,hold,727,720,700,FL,Ponte Vedra,0,True,False,True,Electric


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/FL/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/FL-day/2020-jul-day-FL.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d941abd96271a0b25eb8e71b53c720f1fb694708,2020-07-17 07:45:00 UTC,cool,hold,696,690,690,FL,Santa Rosa Beach,30,True,False,False,Gas
1,7fb08652ae3c5e0f6736959f05403979a9eb64b9,2020-07-13 14:55:00 UTC,cool,hold,779,775,740,FL,Cory Lake Isles,15,False,False,False,Gas
2,d4e158fdd9053d5209bebd6900644018bf2d49d1,2020-07-23 19:00:00 UTC,auto,auto,769,770,670,FL,Miami,10,True,False,False,Gas
3,266c3f9ff6582c41af2ea09fda79ae92dfe9d75d,2020-07-07 16:20:00 UTC,cool,hold,745,750,750,FL,Winter Haven,99,True,False,True,Electric
4,5b91753ef08f8ba06a84c2a73b77b16b43f3adad,2020-07-13 13:10:00 UTC,cool,auto,739,780,780,FL,Palm Beach Gardens,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4934672,f09a37802504f385407c8c15709060486d2e236e,2020-07-07 18:55:00 UTC,cool,auto,729,730,730,FL,Tampa,39,True,False,True,Electric
4934673,5eada567967bc0eeab469ffb6448b096ab36916e,2020-07-29 18:50:00 UTC,cool,hold,702,700,700,FL,Saint Petersburg,57,False,False,False,Gas
4934674,c0472acb00956145e2b27225459610c22c636e98,2020-07-20 11:05:00 UTC,auto,auto,756,755,705,FL,Tierra Verde,30,True,False,False,Gas
4934675,973dc5326b1caaa1f5e11c59c7d0d8f32643cb8a,2020-07-31 19:55:00 UTC,cool,hold,712,720,720,FL,Bradenton,0,True,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/FL/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/FL-day/2021-jul-day-FL.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,05a76d6c54bfa59df08600271401ebb1d31190b3,2021-07-08 18:50:00 UTC,cool,hold,669,660,702,FL,Miami,0,False,False,False,Gas
1,40cc45fee6a7b9fcc56d490324cb9dd6c1432e71,2021-07-02 11:00:00 UTC,auto,hold,752,755,705,FL,Palm Coast,27,True,False,True,Electric
2,39bbeab58bc1578fb593378a6fc8a1c2e37a2ae6,2021-07-17 10:20:00 UTC,auto,hold,799,795,615,FL,Wellington,30,False,False,False,Gas
3,61ed265e3dfe33e5c80c3db726dacce4eb68200b,2021-07-15 16:50:00 UTC,auto,hold,770,771,721,FL,Orlando,5,False,False,False,Gas
4,831d0c3a02456dcd3e7a4df74c3a8cfccf57ec23,2021-07-07 15:55:00 UTC,cool,hold,716,699,699,FL,Coral Springs,9,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2712525,f7e613e6e3201075b5d6012f67b0b199524dc14b,2021-07-24 10:50:00 UTC,cool,hold,754,760,760,FL,West Palm Beach,19,False,False,False,Gas
2712526,1a815f301521c8455867939a47b48a642409be3e,2021-07-25 17:20:00 UTC,cool,hold,755,760,760,FL,Miramar,20,False,False,False,Gas
2712527,7b9589fae3891f462eb1004a47bc337193fcb11f,2021-07-17 15:30:00 UTC,cool,hold,753,760,760,FL,Coral Gables,60,False,False,False,Gas
2712528,a49c183933e6fbb8785004747fd2fa0a63cce070,2021-07-09 13:55:00 UTC,cool,hold,771,760,760,FL,Miami,45,True,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/FL/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/FL/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
FL_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/FL/jul/" + file)
    FL_jul = pd.concat([FL_jul, df])
    
FL_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00020f16d85e6d8c70fc67725f03ab06b116fead,jul,2017,auto,auto,Jacksonville,784.911854,777.190729,677.597264,5.0,False,False,True
1,0024f0f900a4ff6923e95a2a20ee94c699ecf167,jul,2017,cool,auto,Miami,814.774595,815.139465,782.276668,0.0,False,False,False
2,0024f0f900a4ff6923e95a2a20ee94c699ecf167,jul,2017,cool,hold,Miami,796.202381,796.194444,788.253968,0.0,False,False,False
3,0055f438d1ed6195816124e1814d00fe22894e77,jul,2017,cool,hold,Tampa,745.952381,740.000000,740.000000,25.0,False,False,True
4,008b53c11b3b4c56fac3ef938ccd9a34d7755d30,jul,2017,cool,hold,Davie,752.895141,735.759591,735.759591,30.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2821,ff5c81c4eacfb17ff63cecf31c131332abc79ddc,jul,2021,cool,hold,Fort Myers,742.608025,738.635802,651.953704,10.0,False,False,False
2822,ffa4468a31a8e35dd633f0022ea0b855501c5ba9,jul,2021,cool,hold,Palm Coast,760.157003,760.793128,758.903842,10.0,False,False,True
2823,ffcf942aa8736eaa25440e6817dea4d9efc3c67f,jul,2021,cool,hold,Oakland Park,755.528256,751.933661,751.859951,30.0,False,False,False
2824,fffcd8322781a8483039a10c259725e9da9cfdec,jul,2021,cool,hold,North Miami,731.895143,728.280648,727.478026,70.0,False,False,False


In [133]:
FL_jul.to_csv("Scraper_Output/State_Month_Day/FL/FL_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/FL-day/2017-aug-day-FL.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a5ea43460c16b99656551a6e5781d51bf1b58973,2017-08-28 19:25:00 UTC,cool,auto,793,790,790,FL,Apollo Beach,0,False,False,True,Electric
1,f6a898aff62ba73d411e8837e264820628af98bd,2017-08-05 17:25:00 UTC,cool,auto,776,740,620,FL,Hialeah,36,False,False,False,Gas
2,4239552ec055619245ecff172cb201b0b9a92a7f,2017-08-21 12:20:00 UTC,auto,hold,723,750,700,FL,Lynn Haven,10,False,False,False,Gas
3,2aea9292b9344bb7ab3df64553aee723f78138f3,2017-08-26 18:00:00 UTC,cool,hold,783,780,780,FL,Deerfield Beach,25,False,False,False,Gas
4,22a792ab3133c33c7d1606a3d34a71fe122ab43b,2017-08-17 13:10:00 UTC,cool,auto,781,790,680,FL,Zephyrhills,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2062789,b14f9eab461a06d27bcaf2bb3103d63fc9aefbdd,2017-08-02 15:30:00 UTC,cool,hold,743,740,740,FL,Palm Beach Gardens,0,False,False,False,Gas
2062790,ba205797d0b02efeebc04a38559eff425fd64f11,2017-08-29 14:10:00 UTC,cool,hold,765,770,770,FL,Palm Beach Gardens,16,False,False,False,Gas
2062791,ba205797d0b02efeebc04a38559eff425fd64f11,2017-08-18 19:45:00 UTC,cool,hold,784,780,780,FL,Palm Beach Gardens,16,False,False,False,Gas
2062792,9a9c02c01d1723f206201e4967e96eac9533c6eb,2017-08-20 18:00:00 UTC,cool,auto,751,750,740,FL,Palm Beach Gardens,15,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/FL/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/FL-day/2018-aug-day-FL.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d2c07331cdcbf3f31657d4c870f6fb57f742e637,2018-08-24 17:35:00 UTC,cool,hold,743,740,740,FL,Tampa,5,True,False,True,Electric
1,e478562afbe7db9cd8aa42235cb8fb37cc134b92,2018-08-06 10:30:00 UTC,cool,hold,736,760,760,FL,wilton manors,9,True,False,False,Gas
2,50acae4c25caed8fa44fc2db5e7d534551094247,2018-08-26 10:45:00 UTC,cool,auto,762,760,760,FL,Kissimmee,10,True,False,True,Electric
3,04c61c67ac8e504d333835cb46d9fddfea22b6af,2018-08-24 16:10:00 UTC,cool,hold,720,720,720,FL,Panama City,20,False,False,True,Electric
4,8dce0f3d0feffc4c57c1b1ca09e56e072325615d,2018-08-17 17:55:00 UTC,cool,auto,777,780,760,FL,New Port Richey,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4200342,a91f348d1f2b7641556a9020c83b32f6c524951b,2018-08-31 18:15:00 UTC,cool,auto,766,760,760,FL,Port Saint Lucie,30,False,False,False,Gas
4200343,8177357efd7422d3168925039c70508e0d2f6ffb,2018-08-09 19:25:00 UTC,auto,hold,753,750,660,FL,Lehigh Acres,15,False,False,False,Gas
4200345,829150865af973ebe56f195195814c2c929ac4bc,2018-08-01 18:25:00 UTC,cool,hold,712,710,710,FL,Lake Clarke Shores,20,False,False,False,Gas
4200346,49304c734512b21c6ad58cd7c16fe605020d5824,2018-08-08 19:30:00 UTC,cool,hold,847,842,790,FL,Orlando,10,True,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/FL/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/FL-day/2019-aug-day-FL.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fe1ed9cc5a1195b930b7a5a4e7b036f23a1d9f9f,2019-08-07 19:15:00 UTC,cool,hold,717,740,740,FL,Naples,20,True,False,False,Gas
1,8aa0adcddcac9c1048e4b25d6571ce91e589b232,2019-08-05 14:10:00 UTC,cool,auto,771,788,788,FL,Weston,15,False,False,False,Gas
2,45ff9e901fb365c4b300c5b3b70b588f89efa3bd,2019-08-24 17:05:00 UTC,cool,auto,804,800,790,FL,Brooksville,20,False,False,True,Electric
3,77c062ad98bf705476beb44304454bf06541241c,2019-08-02 13:25:00 UTC,cool,auto,742,760,760,FL,Tampa,39,False,False,False,Gas
4,2ab4fe0bece07e7db7efe65b382dc0d4d32adc34,2019-08-07 12:15:00 UTC,auto,auto,746,750,650,FL,Pompano Beach,9,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5540518,5137642ba2cfc0e1868c3ab0f870bfdc51c5acf4,2019-08-04 12:10:00 UTC,auto,hold,756,770,680,FL,DeBary,28,True,False,True,Electric
5540519,ac34951fa83804c0ba5fedbfc8b7232799b734fa,2019-08-21 12:05:00 UTC,auto,hold,722,730,680,FL,Wellington,25,False,False,False,Gas
5540520,abc4c060958b64613d404eebe095083dd33a5455,2019-08-26 18:35:00 UTC,cool,auto,751,750,620,FL,Miami,0,True,False,False,Gas
5540521,30a8c18e1ef2aaddfda13e7efe137e5679801ecc,2019-08-23 13:50:00 UTC,cool,hold,714,715,715,FL,Panama City Beach,19,True,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/FL/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/FL-day/2020-aug-day-FL.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d4c177a4503855591f0adc0bab3ed9e4b30d6d1c,2020-08-29 17:25:00 UTC,cool,hold,764,760,760,FL,Port St Lucie,15,False,False,False,Gas
1,896739e4214ffa8a8d39fa7de7077d95a1f503d1,2020-08-21 17:50:00 UTC,cool,auto,718,680,680,FL,Royal Palm Beach,0,True,False,False,Gas
2,8744d57ebb50663b3c41af3614e8c445044d844a,2020-08-03 09:20:00 UTC,cool,hold,727,730,730,FL,Green Cove Springs,0,True,False,True,Electric
3,d6c2e44b80e82e437e07bc818b818327a926c101,2020-08-20 18:40:00 UTC,auto,hold,783,780,720,FL,Homestead,5,False,False,False,Gas
4,3dd58b7226a0e7f9bd4ed9f1b1ca976b5388d60d,2020-08-27 13:35:00 UTC,cool,auto,725,720,740,FL,Doram,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995721,2bab5529487e7ac80bf4d34f169a9d908e6ec7b2,2020-08-12 12:10:00 UTC,cool,auto,674,680,680,FL,Freeport,0,False,False,True,Electric
4995722,7603fd4aa87d06e1e13680bdc2ed797c95d1776c,2020-08-22 14:30:00 UTC,cool,auto,820,820,790,FL,Delray Beach,40,False,False,False,Gas
4995723,79ebc57aaa45e453673eeb53cc3d55e6395937ac,2020-08-23 17:50:00 UTC,auto,hold,740,740,670,FL,Delray Beach,0,False,False,False,Gas
4995724,a8b12ab27e5fd886a42b40f7b865429b895ef384,2020-08-04 07:30:00 UTC,cool,auto,741,740,740,FL,Winter Park,60,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/FL/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/FL/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
FL_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/FL/aug/" + file)
    FL_aug = pd.concat([FL_aug, df])
    
FL_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00020f16d85e6d8c70fc67725f03ab06b116fead,aug,2017,auto,auto,Jacksonville,782.218335,780.000000,702.132690,5.0,False,False,True
1,0024f0f900a4ff6923e95a2a20ee94c699ecf167,aug,2017,cool,auto,Miami,816.357640,817.450225,784.571897,0.0,False,False,False
2,0024f0f900a4ff6923e95a2a20ee94c699ecf167,aug,2017,cool,hold,Miami,802.366295,803.643454,790.000000,0.0,False,False,False
3,0055f438d1ed6195816124e1814d00fe22894e77,aug,2017,cool,auto,Tampa,735.250000,734.833333,660.583333,25.0,False,False,True
4,0055f438d1ed6195816124e1814d00fe22894e77,aug,2017,cool,hold,Tampa,750.834862,742.440367,741.862385,25.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5391,ff65db3b80b5965987eb40d20a79a3a0b8045a07,aug,2020,cool,auto,Ponte Vedra Beach,758.774194,760.129032,640.000000,0.0,True,False,True
5392,fffcd8322781a8483039a10c259725e9da9cfdec,aug,2020,cool,auto,North Miami,728.743682,713.379061,712.660650,70.0,False,False,False
5393,fffcd8322781a8483039a10c259725e9da9cfdec,aug,2020,cool,hold,North Miami,725.470588,714.188971,713.086029,70.0,False,False,False
5394,ffff46e404a9831c28d8be1a3e427a4377690c6b,aug,2020,cool,auto,Parrish,752.978723,750.085106,749.716312,0.0,False,False,True


In [160]:
FL_aug.to_csv("Scraper_Output/State_Month_Day/FL/FL_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/FL-day/2017-dec-day-FL.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2aca1298389867e8ccfeea99c00820cdabe5efaf,2017-12-03 16:50:00 UTC,cool,hold,734,729,729,FL,Jacksonville,66,False,False,True,Electric
1,4b5da78a3b30b5e6d3aaa6bf5d2cd9969553788e,2017-12-19 08:15:00 UTC,cool,hold,722,725,725,FL,Cape Coral,15,False,False,False,Gas
2,91385f63c632a2f4c17d0ebbc8cbae4099adfaaa,2017-12-07 17:05:00 UTC,cool,hold,752,755,755,FL,Clearwater,9,False,False,True,Electric
3,7590e23eefb20bb153a55276567d6e6e9c59b3a8,2017-12-29 17:40:00 UTC,cool,auto,715,770,749,FL,Largo,16,False,False,True,Electric
4,2bdd8511d93f1f0d9847002b03864e279b8927b9,2017-12-14 18:55:00 UTC,auto,hold,741,775,725,FL,Kissimmee,5,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2532124,9803f7459c87eefe0f767aa55ba0e50bbc433d4f,2017-12-05 19:05:00 UTC,cool,hold,764,760,760,FL,Surfside,67,False,False,False,Gas
2532125,0e57c47d25177db6398cd2261703eae74453527b,2017-12-26 12:20:00 UTC,cool,hold,702,760,760,FL,Saint Petersburg,60,False,False,False,Gas
2532126,357f5810804e474cf533893fbfadd15eae784ab3,2017-12-23 14:25:00 UTC,cool,auto,777,780,760,FL,Fort Lauderdale,10,False,False,False,Gas
2532127,8a188da39be9d3c57e82aee48ec5d5b3f6384c72,2017-12-18 13:10:00 UTC,cool,hold,733,760,760,FL,Oakland Park,55,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/FL/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/FL-day/2018-dec-day-FL.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9d9804ca59adb6c9bcf875fafd4195177c1f42bb,2018-12-22 12:25:00 UTC,auto,hold,681,760,680,FL,Belleview,36,True,False,True,Electric
1,8345461325a6e244d4ee3a0dbec3b2b6c1876177,2018-12-07 16:35:00 UTC,auto,hold,772,790,630,FL,Miami,10,False,False,False,Gas
2,e20a3e765cdf2b7a2626a87af28bd4f0b6587df8,2018-12-07 11:35:00 UTC,auto,hold,679,770,660,FL,South Pasadena,50,True,False,False,Gas
3,13786be965ee366346ca6109b31adf39fffef7e8,2018-12-05 11:25:00 UTC,cool,auto,706,730,730,FL,Apopka,40,True,False,True,Electric
4,f785c2313faacc4c9b689c0008adcedd462c51b9,2018-12-10 17:30:00 UTC,auto,auto,717,780,730,FL,Winter Park,60,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4018653,b4fb7fc34719d3ac281b6bc35f25cb59601be3cb,2018-12-07 19:30:00 UTC,cool,hold,717,770,770,FL,Orlando,0,True,False,True,Electric
4018654,e0c573fc0956caee9be6df81fadeb66d0685f520,2018-12-04 13:55:00 UTC,auto,hold,733,735,705,FL,Jacksonville,0,False,False,True,Electric
4018655,aa358226200f2f58c623ffc8beb3f5f9901817a8,2018-12-19 19:15:00 UTC,heat,auto,697,700,700,FL,Gulf Breeze,40,True,False,True,Electric
4018657,03e91d495e89a9684d399da6baa0890992bf6aa3,2018-12-08 16:35:00 UTC,heat,hold,719,700,700,FL,Tampa,0,True,False,True,Electric


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/FL/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/FL-day/2019-dec-day-FL.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9b9ad5b92fe32843968fdcbf0ae83f786bdcb2a2,2019-12-26 12:50:00 UTC,auto,hold,695,760,680,FL,Jacksonville,0,False,False,True,Electric
1,0091af859415cf1a74a3666695db239ecfa92c55,2019-12-02 18:50:00 UTC,cool,auto,701,720,700,FL,Altamonte Springs,0,True,False,True,Electric
2,43556c904d05d3fe19d094253d7a09acc42ad61d,2019-12-21 14:10:00 UTC,cool,auto,664,770,770,FL,Miramar Beach,20,True,False,True,Electric
3,dda8987a7ceee737a30cd6f25c81e755320f1272,2019-12-27 17:20:00 UTC,auto,auto,729,730,670,FL,Orlando,20,False,False,True,Electric
4,f67a59ac9a3fd1b5ac966b3dd6f77593911b0207,2019-12-07 16:00:00 UTC,cool,hold,708,700,700,FL,Palm City,35,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4415886,7e13a89c5f749bd9972324c45b0ad38f23c992ec,2019-12-08 13:45:00 UTC,cool,auto,704,820,790,FL,Port Charlotte,40,False,False,False,Gas
4415887,91c39be38e40567a1043c1c3c00c3ac6a948f5f1,2019-12-25 18:45:00 UTC,cool,hold,743,750,750,FL,Miami,0,False,False,False,Gas
4415888,28891111b428da1b8a00ffbedee22c44503e44fc,2019-12-15 17:45:00 UTC,auto,hold,699,745,685,FL,Destin,45,False,False,False,Gas
4415890,2c378c79ac67770fe44c51241998f7da174f22f4,2019-12-16 09:15:00 UTC,cool,hold,751,751,751,FL,Wellington,10,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/FL/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/FL-day/2020-dec-day-FL.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,738e8e6953cc4235c6d459106399d1eda0ea2a36,2020-12-27 14:35:00 UTC,heat,hold,663,668,668,FL,Niceville,29,False,False,False,Gas
1,9c2eaee56dd3dc278aadd1b505f80555e34aabf7,2020-12-22 17:05:00 UTC,cool,hold,758,757,757,FL,Oakland Park,20,False,False,False,Gas
2,f7fda93a084c48fc27fb866d97a035740ab63bb8,2020-12-23 19:00:00 UTC,cool,hold,728,739,739,FL,Altamonte Springs,15,False,False,True,Electric
3,72ead96fa5e75f9860b885ccc10248ff4d4abcd3,2020-12-01 17:00:00 UTC,cool,auto,665,840,739,FL,Apopka,10,True,False,True,Electric
5,b07d91b813253c89b2d83b35f3778f999ba87915,2020-12-06 18:40:00 UTC,auto,hold,717,716,656,FL,Miami Springs,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3523556,e1f04e7283ec625fd77870c409b6849ff6a65b73,2020-12-20 17:15:00 UTC,cool,auto,676,760,760,FL,Saint Petersburg,77,False,False,True,Electric
3523557,30cde2844d0dec8628b457fb1de79230a8f0effa,2020-12-15 15:50:00 UTC,cool,auto,736,760,760,FL,Boynton Beach,0,False,False,False,Gas
3523558,42464971f8783480212e38851682da1aac7bb996,2020-12-01 10:05:00 UTC,cool,auto,742,760,760,FL,Naples,9,True,False,False,Gas
3523559,d6af9ee92932a616a159ebe38d277692fb8130b4,2020-12-12 19:05:00 UTC,cool,auto,718,760,760,FL,Dunedin,19,False,False,True,Electric


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/FL/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/FL/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
FL_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/FL/dec/" + file)
    FL_dec = pd.concat([FL_dec, df])
    
FL_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00020f16d85e6d8c70fc67725f03ab06b116fead,dec,2017,auto,auto,Jacksonville,713.775000,760.000000,660.000000,5.0,False,False,True
1,000901820a1c537e3432a15731cf77e975629e0e,dec,2017,auto,auto,Tampa,727.890756,750.000000,700.000000,17.0,False,False,False
2,0024f0f900a4ff6923e95a2a20ee94c699ecf167,dec,2017,cool,auto,Miami,774.085502,799.877323,780.029740,0.0,False,False,False
3,0024f0f900a4ff6923e95a2a20ee94c699ecf167,dec,2017,cool,hold,Miami,747.778309,796.191786,789.730064,0.0,False,False,False
4,0055f438d1ed6195816124e1814d00fe22894e77,dec,2017,auto,hold,Tampa,621.000000,750.000000,602.000000,25.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6800,ffa4468a31a8e35dd633f0022ea0b855501c5ba9,dec,2020,heat,hold,Palm Coast,718.169048,719.428571,719.392857,10.0,False,False,True
6801,fffcd8322781a8483039a10c259725e9da9cfdec,dec,2020,cool,hold,North Miami,736.558282,747.098160,745.914110,70.0,False,False,False
6802,ffff46e404a9831c28d8be1a3e427a4377690c6b,dec,2020,auto,auto,Parrish,728.545455,760.000000,710.000000,0.0,False,False,True
6803,ffff46e404a9831c28d8be1a3e427a4377690c6b,dec,2020,auto,hold,Parrish,738.437500,775.010417,713.031250,0.0,False,False,True


In [187]:
FL_dec.to_csv("Scraper_Output/State_Month_Day/FL/FL_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/FL/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
FL_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/FL/" + file)
    FL_all = pd.concat([FL_all, df])
    
FL_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00020f16d85e6d8c70fc67725f03ab06b116fead,aug,2017,auto,auto,Jacksonville,782.218335,780.000000,702.132690,5.0,False,False,True
1,0024f0f900a4ff6923e95a2a20ee94c699ecf167,aug,2017,cool,auto,Miami,816.357640,817.450225,784.571897,0.0,False,False,False
2,0024f0f900a4ff6923e95a2a20ee94c699ecf167,aug,2017,cool,hold,Miami,802.366295,803.643454,790.000000,0.0,False,False,False
3,0055f438d1ed6195816124e1814d00fe22894e77,aug,2017,cool,auto,Tampa,735.250000,734.833333,660.583333,25.0,False,False,True
4,0055f438d1ed6195816124e1814d00fe22894e77,aug,2017,cool,hold,Tampa,750.834862,742.440367,741.862385,25.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
21160,ff5c81c4eacfb17ff63cecf31c131332abc79ddc,jun,2021,cool,hold,Fort Myers,743.019139,739.555024,652.000000,10.0,False,False,False
21161,ffa4468a31a8e35dd633f0022ea0b855501c5ba9,jun,2021,cool,hold,Palm Coast,760.596356,761.847160,758.819721,10.0,False,False,True
21162,ffcf942aa8736eaa25440e6817dea4d9efc3c67f,jun,2021,cool,hold,Oakland Park,737.767442,730.038760,729.782946,30.0,False,False,False
21163,fffcd8322781a8483039a10c259725e9da9cfdec,jun,2021,cool,hold,North Miami,728.397478,724.251096,723.402412,70.0,False,False,False


In [190]:
FL_all.to_csv("Scraper_Output/State_Month_Day/FL_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mFLe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['FL']
Unique jan_2018: ['FL']
Unique jan_2019: ['FL']
Unique jan_2020: ['FL']
Unique jan_2021: ['FL']
Unique feb_2017: ['FL']
Unique feb_2018: ['FL']
Unique feb_2019: ['FL']
Unique feb_2020: ['FL']
Unique feb_2021: ['FL']
Unique jun_2017: ['FL']
Unique jun_2018: ['FL']
Unique jun_2019: ['FL']
Unique jun_2020: ['FL']
Unique jun_2021: ['FL']
Unique jul_2017: ['FL']
Unique jul_2018: ['FL']
Unique jul_2019: ['FL']
Unique jul_2020: ['FL']
Unique jul_2021: ['FL']
Unique aug_2017: ['FL']
Unique aug_2018: ['FL']
Unique aug_2019: ['FL']
Unique aug_2020: ['FL']
Unique dec_2017: ['FL']
Unique dec_2018: ['FL']
Unique dec_2019: ['FL']
Unique dec_2020: ['FL']
