# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/NJ-day/2017-jan-day-NJ.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fe013028a9185411235f2cb7ddcb7fdb687017f1,2017-01-18 18:50:00 UTC,heat,hold,685,690,690,NJ,Red Bank,100,False,False,False,Gas
1,330477b47f383a9b36a1a7906d462aec91634236,2017-01-19 19:30:00 UTC,heat,hold,679,670,670,NJ,Glassboro,55,False,False,False,Gas
2,955939ffe1d9fd44a65a91f88cf3416591100238,2017-01-25 15:35:00 UTC,heat,auto,712,710,710,NJ,Rutherford,0,False,False,False,Gas
3,e926e5589e6c8e150b37580a1570b0512ce041e3,2017-01-28 19:40:00 UTC,heat,hold,740,740,740,NJ,Berkeley Heights,0,False,False,False,Gas
5,abffbb0b23f18ab064c8390da5fbb932ccb6b293,2017-01-18 16:00:00 UTC,heat,auto,641,820,620,NJ,Budd Lake,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
401498,f5c547be91b176e3bb84401fc6658ac7fbc5f937,2017-01-14 12:05:00 UTC,heat,hold,623,650,630,NJ,Englewood,0,False,False,False,Gas
401499,f5c547be91b176e3bb84401fc6658ac7fbc5f937,2017-01-01 16:00:00 UTC,heat,hold,678,670,670,NJ,Englewood,0,False,False,False,Gas
401500,f5c547be91b176e3bb84401fc6658ac7fbc5f937,2017-01-31 13:40:00 UTC,heat,hold,645,650,650,NJ,Englewood,0,False,False,False,Gas
401501,f5c547be91b176e3bb84401fc6658ac7fbc5f937,2017-01-02 13:35:00 UTC,heat,hold,670,650,640,NJ,Englewood,0,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
00cfe51c25b825270bc4509dfadca8d4f50a4e99,Jan,2017,heat,hold,Medford,689.656934,698.226277,697.970803,0.0,False,False,False
01344d14a623a52997206e073b17517a488ddb6c,Jan,2017,heat,auto,Piscataway,696.955056,682.685393,682.393258,5.0,False,False,False
01344d14a623a52997206e073b17517a488ddb6c,Jan,2017,heat,hold,Piscataway,702.070866,705.047244,705.047244,5.0,False,False,False
0218f5e261862c4e29bc80b3fd87638a4dd80584,Jan,2017,auto,auto,Lindenwold,689.480000,751.224615,694.043077,10.0,True,False,False
0218f5e261862c4e29bc80b3fd87638a4dd80584,Jan,2017,auto,hold,Lindenwold,702.494949,748.262626,711.262626,10.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fe013028a9185411235f2cb7ddcb7fdb687017f1,Jan,2017,auto,hold,Red Bank,659.333333,661.851852,616.111111,100.0,False,False,False
fe013028a9185411235f2cb7ddcb7fdb687017f1,Jan,2017,heat,auto,Red Bank,676.682171,689.847545,681.832041,100.0,False,False,False
fe013028a9185411235f2cb7ddcb7fdb687017f1,Jan,2017,heat,hold,Red Bank,677.893921,683.466031,682.918951,100.0,False,False,False
fed532fe4fb9b0318ad6822647e83bf9f647877e,Jan,2017,heat,auto,Jersey City,761.935897,760.000000,760.000000,120.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/NJ/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/NJ-day/2018-jan-day-NJ.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,577d8d174977a3125a5f92ae94a22b2aaa79e5c7,2018-01-05 16:40:00 UTC,heat,hold,712,705,705,NJ,Toms River Township,20,False,False,False,Gas
1,d37109679aedec1fce5a81b6cc4ed9a7278e7c05,2018-01-24 18:15:00 UTC,heat,hold,732,728,728,NJ,Jackson,30,False,False,False,Gas
2,b597ef7a2b7435e18f0c833e161bad40e65d4085,2018-01-19 13:20:00 UTC,heat,hold,681,675,675,NJ,Tinton Falls,65,False,False,False,Gas
3,f0376c6b07664a352507d18d771b3afbe349aa77,2018-01-30 17:25:00 UTC,heat,hold,702,705,705,NJ,Fair Lawn,65,False,False,False,Gas
4,f07d67d15c361d501977b6d4e5500f2c321f2416,2018-01-27 17:40:00 UTC,auto,hold,723,840,690,NJ,Monmouth Junction,17,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1036656,e712adec63b5ee578e75a6a293bfb048754d7515,2018-01-12 12:05:00 UTC,auto,auto,699,760,700,NJ,Saddle Brook,75,False,False,False,Gas
1036657,057331f5a151522c563bcf0efb49f0b37b733a7d,2018-01-13 11:30:00 UTC,auto,auto,700,760,700,NJ,Point Pleasant Beach,45,False,False,False,Gas
1036658,3feed3251c993ad584ca33481c115d25de9bad20,2018-01-17 19:20:00 UTC,heat,auto,741,760,760,NJ,Union City,90,True,False,False,Gas
1036659,275f12a74bb0b37af045dbd8b1b53d74c9f07946,2018-01-07 15:40:00 UTC,heat,auto,753,760,760,NJ,Evesham,16,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/NJ/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/NJ-day/2019-jan-day-NJ.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a1348dbfed8425aee385aca041820e0deadadb82,2019-01-03 11:30:00 UTC,heat,hold,698,626,626,NJ,Ridgewood,60,False,False,False,Gas
1,5f0e2feee4d0ed9cccba0c60f7b3be843059bb4e,2019-01-20 18:55:00 UTC,heat,hold,693,712,712,NJ,Sea Girt,5,False,False,False,Gas
2,2eec69a6ea2000df3d4a08508e75b2691b0fb900,2019-01-26 18:40:00 UTC,auto,hold,645,745,645,NJ,Montville,48,False,False,False,Gas
3,61d5b67391246e1c46d918cff478154ebf4ae71b,2019-01-22 15:55:00 UTC,auto,hold,708,795,715,NJ,Green Brook,30,False,False,False,Gas
4,138f670a27f442a2620981e94b5b73e619629984,2019-01-31 18:50:00 UTC,heat,hold,729,734,734,NJ,toms river,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1622676,754a29221076050e925e6d09922eb1237a53e3db,2019-01-21 18:00:00 UTC,auto,auto,647,760,650,NJ,Woodstown,85,False,False,False,Gas
1622677,4cb39ac043209d58e79a06402eac00eecc6adbee,2019-01-27 16:15:00 UTC,auto,hold,706,760,710,NJ,Wayne,25,False,False,False,Gas
1622678,cec5effeb362e573c7488f6f5445d3f6f2339aea,2019-01-03 13:30:00 UTC,auto,hold,676,760,680,NJ,Montclair,0,False,False,False,Gas
1622679,a027131b5db9cc12c3c5e6117ab7d412ceddaa62,2019-01-12 17:05:00 UTC,auto,hold,701,760,700,NJ,Warren,17,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/NJ/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/NJ-day/2020-jan-day-NJ.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,975e208dd09c056c95037ce654a74fdb8d8caf4d,2020-01-21 14:15:00 UTC,heat,auto,674,630,630,NJ,Freehold,25,False,False,False,Gas
1,fe4927c7c7792ea973b4840bbbcf76757cc61fe6,2020-01-14 13:15:00 UTC,auto,hold,705,745,695,NJ,Princeton,9,True,False,False,Gas
2,2b10c358c75983a3bd811c5645e19520c22cfbe5,2020-01-29 13:15:00 UTC,heat,hold,716,718,718,NJ,Mahwah,0,False,False,False,Gas
3,4dacf6559c4e2555cf463516914138ef093acab5,2020-01-08 13:20:00 UTC,heat,auto,662,672,670,NJ,Ringwood,30,True,False,False,Gas
4,c74773486a56f2b0f50c7d3e78e3e22b3a05ba53,2020-01-27 13:25:00 UTC,auto,hold,707,762,712,NJ,Bridgewater,20,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1860198,1e01e0692664bb154774669a1330cd15ecccb453,2020-01-12 12:05:00 UTC,heat,hold,758,760,760,NJ,Franklin Township,0,True,False,False,Gas
1860199,2406f1c08f3758a93961d01674e31daf625fab21,2020-01-04 12:50:00 UTC,auto,hold,654,760,640,NJ,Madison,90,False,False,False,Gas
1860200,d07c7da92a8a543a676bf4c3cf458f11ca8dba9e,2020-01-20 19:05:00 UTC,heat,hold,748,760,760,NJ,Fort Lee,99,False,False,False,Gas
1860202,0885a196a9c6a38baf5f352bbeefa6e75b5cf40c,2020-01-04 19:20:00 UTC,heat,hold,757,760,760,NJ,Point Pleasant Boro,0,True,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/NJ/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/NJ-day/2021-jan-day-NJ.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6d9ae47183e5b4373e62c07c96f2abba847d3694,2021-01-10 17:40:00 UTC,heat,hold,719,709,709,NJ,Chesterfield,10,False,False,False,Gas
1,9a06657297ff3a5ae7889f50724c76bd6ab0ba1b,2021-01-18 10:10:00 UTC,heat,hold,703,711,711,NJ,Middletown,69,False,False,False,Gas
2,3734528620473667d6bb2e3b5e01bddb55a5a781,2021-01-31 11:50:00 UTC,heat,hold,727,729,729,NJ,Montclair,90,True,False,False,Gas
3,76b3c925ff149d91c908ee800517742bc3118821,2021-01-27 12:35:00 UTC,heat,hold,673,686,680,NJ,Lindenwold,69,True,False,False,Gas
4,9a06657297ff3a5ae7889f50724c76bd6ab0ba1b,2021-01-27 19:15:00 UTC,heat,hold,729,729,729,NJ,Middletown,69,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1089482,d2b4b0edff4d937154c31af3dbc4c54bbccb5179,2021-01-18 13:50:00 UTC,auto,hold,708,760,710,NJ,Evesham,9,True,False,False,Gas
1089483,82e64c33a0e15163337ea079b31c43d313ce3f90,2021-01-25 17:35:00 UTC,auto,hold,710,760,710,NJ,Manahawkin,0,False,False,False,Gas
1089484,0bbc5cd611212f85a36f79f4ab8994ddb2dcea95,2021-01-22 13:15:00 UTC,auto,hold,685,760,690,NJ,North Brunswick,35,False,False,False,Gas
1089485,d07c7da92a8a543a676bf4c3cf458f11ca8dba9e,2021-01-29 14:45:00 UTC,heat,hold,757,760,760,NJ,Fort Lee,99,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/NJ/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NJ/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NJ_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NJ/jan/" + file)
    NJ_jan = pd.concat([NJ_jan, df])
    
NJ_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00cfe51c25b825270bc4509dfadca8d4f50a4e99,Jan,2017,heat,hold,Medford,689.656934,698.226277,697.970803,0.0,False,False,False
1,01344d14a623a52997206e073b17517a488ddb6c,Jan,2017,heat,auto,Piscataway,696.955056,682.685393,682.393258,5.0,False,False,False
2,01344d14a623a52997206e073b17517a488ddb6c,Jan,2017,heat,hold,Piscataway,702.070866,705.047244,705.047244,5.0,False,False,False
3,0218f5e261862c4e29bc80b3fd87638a4dd80584,Jan,2017,auto,auto,Lindenwold,689.480000,751.224615,694.043077,10.0,True,False,False
4,0218f5e261862c4e29bc80b3fd87638a4dd80584,Jan,2017,auto,hold,Lindenwold,702.494949,748.262626,711.262626,10.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1275,fd608a31545a8a10a6cb5ed7a74d21e831339989,Jan,2021,heat,hold,Westfield,683.169317,690.681946,690.564079,50.0,False,False,False
1276,fe66c5051b7e13329c0c52a696a10794fa5efd7d,Jan,2021,heat,hold,Swedesboro,689.569444,690.819444,689.819444,10.0,True,False,False
1277,fe6af6e7fde79582ca0fa187198d1240b25652ee,Jan,2021,heat,hold,Phillipsburg,656.256098,660.000000,660.000000,50.0,False,False,False
1278,ff635b7556b71ecf409661242fa988bf1361c850,Jan,2021,heat,hold,newark,687.153084,693.581498,690.455947,120.0,False,False,False


In [34]:
NJ_jan.to_csv("Scraper_Output/State_Month_Day/NJ/NJ_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/NJ-day/2017-feb-day-NJ.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d3c6c4504a7501861b2f21c71016e8484a441958,2017-02-04 13:45:00 UTC,heat,hold,742,740,740,NJ,Mount Holly,50,False,False,False,Gas
1,63dfcb9581139a2d268e9495d886d661e162b1cd,2017-02-13 16:25:00 UTC,heat,auto,719,720,720,NJ,Highlands,0,False,False,False,Gas
2,8aea502962b5b263b484c9758ce83a542d441265,2017-02-05 15:15:00 UTC,heat,auto,664,650,630,NJ,Hamilton,30,False,False,False,Gas
3,16af22058149fc79833d767883046d2083b8ba64,2017-02-12 09:25:00 UTC,heat,hold,685,690,690,NJ,West Orange,25,False,False,False,Gas
4,d2535263ab073546e3bc0f3c6b92de5eeb960913,2017-02-05 19:30:00 UTC,heat,auto,766,680,680,NJ,Rutherford,96,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
359472,f5c547be91b176e3bb84401fc6658ac7fbc5f937,2017-02-14 13:10:00 UTC,heat,hold,675,680,680,NJ,Englewood,0,False,False,False,Gas
359473,f5c547be91b176e3bb84401fc6658ac7fbc5f937,2017-02-09 16:15:00 UTC,heat,hold,641,650,640,NJ,Englewood,0,False,False,False,Gas
359474,f5c547be91b176e3bb84401fc6658ac7fbc5f937,2017-02-18 12:20:00 UTC,heat,hold,714,720,720,NJ,Englewood,0,False,False,False,Gas
359475,f5c547be91b176e3bb84401fc6658ac7fbc5f937,2017-02-28 12:35:00 UTC,heat,hold,655,660,660,NJ,Englewood,0,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/NJ/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/NJ-day/2018-feb-day-NJ.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8326662d3d25b26835ae230895324431d31ad774,2018-02-22 16:15:00 UTC,heat,hold,714,708,708,NJ,Leonia,90,False,False,False,Gas
1,dda598d15aed73aa5988163c7eda93f005c650b6,2018-02-19 11:50:00 UTC,heat,hold,671,675,675,NJ,Wall,0,False,False,False,Gas
2,bad2d0d1a1383feaedc836b267505beee1a40149,2018-02-23 16:05:00 UTC,auto,auto,712,765,715,NJ,Long Branch,0,True,False,False,Gas
3,8326662d3d25b26835ae230895324431d31ad774,2018-02-10 19:00:00 UTC,heat,hold,719,719,719,NJ,Leonia,90,False,False,False,Gas
4,0735fd45c22def7a595e22134db926c8bc02d5c9,2018-02-03 14:45:00 UTC,heat,auto,689,640,640,NJ,West Milford,45,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
959300,4c7daababb5aa316174d3155f664373420731e32,2018-02-18 18:25:00 UTC,auto,auto,688,760,670,NJ,Plumsted,20,False,False,False,Gas
959301,081b1ac4839127f89386f2192a9862aeca8176b2,2018-02-13 15:40:00 UTC,auto,auto,622,760,610,NJ,Paramus,20,False,False,False,Gas
959302,a138cf72954559db33a45516278b0c42b4c95d3c,2018-02-27 13:00:00 UTC,heat,auto,756,760,760,NJ,Trenton,30,False,False,False,Gas
959303,193be8b5b1d1c343b24f78aa1adc3f719c7d6484,2018-02-17 14:15:00 UTC,auto,hold,654,760,650,NJ,Scotch Plains,70,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/NJ/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/NJ-day/2019-feb-day-NJ.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8f21e67628d86d4d95831e3fa9eb69705f381d8c,2019-02-23 15:30:00 UTC,heat,auto,644,651,650,NJ,Middletown,30,True,False,False,Gas
1,4ec3904e4ecf43d0e7b465e6e1ea6a587e543ee2,2019-02-18 12:05:00 UTC,heat,hold,705,705,705,NJ,Sayreville,60,False,False,False,Gas
3,712b2b3bd64c28fda669dd129001570ce44b39af,2019-02-16 17:00:00 UTC,heat,hold,689,658,658,NJ,Elmwood Park,90,False,False,False,Gas
4,9cf1156c73de72dc595489619d33f81c3847ea96,2019-02-23 18:45:00 UTC,heat,hold,676,677,677,NJ,Trenton,50,False,False,False,Gas
5,2ae1dca646d7c84098b1c0ce89ae17c0f41c8b0c,2019-02-18 12:30:00 UTC,auto,auto,691,840,690,NJ,Mount Laurel,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1142197,9bd8656fb70ecba362d749a6f894dc56f82107ee,2019-02-16 13:55:00 UTC,heat,auto,699,760,700,NJ,Wenonah,70,False,False,False,Gas
1142198,d942a8853fab067ec80c18ad5c1c8f59ed79cef7,2019-02-03 12:50:00 UTC,heat,auto,706,760,710,NJ,Evesham Township,0,False,False,True,Electric
1142199,d01223e33a8e008d33116a5619e808ffee8154e9,2019-02-24 18:30:00 UTC,heat,auto,707,760,700,NJ,Glen Rock,80,False,False,False,Gas
1142200,1af2a6b1c8114d8cca0b140a5b59f06bc81d5899,2019-02-25 12:30:00 UTC,heat,auto,697,760,700,NJ,Cherry Hill,50,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/NJ/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/NJ-day/2020-feb-day-NJ.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fb8242e4344a8eb957aae8e71943bbc5641b5c87,2020-02-22 17:20:00 UTC,auto,hold,726,775,695,NJ,Sicklerville,10,False,False,False,Gas
1,0735fd45c22def7a595e22134db926c8bc02d5c9,2020-02-20 19:10:00 UTC,heat,auto,615,620,620,NJ,West Milford,45,False,False,False,Gas
2,eea162e43c8dd1f3950760333c8b73c940980e97,2020-02-04 17:50:00 UTC,heat,auto,696,698,698,NJ,West New York,0,True,False,True,Electric
3,268b60939773a9951f49a06e762db62a62422056,2020-02-02 15:05:00 UTC,heat,hold,729,715,715,NJ,Collegeville,0,False,False,False,Gas
4,4358ca81e74c20f993ce976c013caaf6ce2c2151,2020-02-27 17:40:00 UTC,auto,hold,754,830,760,NJ,Green Brook,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1644439,10b7ae2ac1367cf6022e3e98d43614ce1803b161,2020-02-05 14:00:00 UTC,heat,auto,675,760,660,NJ,Point Pleasant Boro,9,False,False,False,Gas
1644440,0dc05bbafb3d7026d8a8742ebdb0376c0cb723ea,2020-02-23 19:10:00 UTC,heat,hold,784,760,760,NJ,Glassboro,20,False,False,False,Gas
1644442,aa657e198b0561d5944ff2b8e6c31efdca34b2f1,2020-02-12 12:35:00 UTC,auto,auto,654,760,660,NJ,Egg Harbor Township,30,False,False,False,Gas
1644443,87a2141d6eaebde08c70b77d8286208d2a4c1ca3,2020-02-10 16:10:00 UTC,heat,auto,700,760,680,NJ,Old Bridge,30,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/NJ/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/NJ-day/2021-feb-day-NJ.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a7f535bb801820946a99ec2c3dac4202029a1ec3,2021-02-27 18:40:00 UTC,heat,hold,710,711,711,NJ,Cherry Hill,50,False,False,False,Gas
1,066419ee7f1366fd00952dc760e12b20ee033f68,2021-02-15 12:55:00 UTC,heat,hold,688,688,688,NJ,Readington,25,False,False,False,Gas
2,bba5d3aa4933a44e06c7a2b30ff876c6c25e3ccc,2021-02-13 16:15:00 UTC,auto,hold,738,812,762,NJ,Colts Neck,35,False,False,False,Gas
3,1260825fc41783d4513ad7b7e812f9d9d236e82f,2021-02-11 19:20:00 UTC,heat,hold,713,718,718,NJ,New Providence,0,False,False,False,Gas
4,a1348dbfed8425aee385aca041820e0deadadb82,2021-02-21 18:40:00 UTC,heat,hold,670,644,644,NJ,Ridgewood,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
947804,70818e164c5c9b317e1781cf4f976f246c923c5c,2021-02-03 14:20:00 UTC,heat,hold,747,760,760,NJ,Atlantic City,10,False,False,True,Electric
947805,82e64c33a0e15163337ea079b31c43d313ce3f90,2021-02-16 16:15:00 UTC,auto,hold,708,760,710,NJ,Manahawkin,0,False,False,False,Gas
947806,82e64c33a0e15163337ea079b31c43d313ce3f90,2021-02-28 13:55:00 UTC,auto,hold,708,760,710,NJ,Manahawkin,0,False,False,False,Gas
947807,41efecf9ac21e53d41762e2209a96cb5fa960706,2021-02-11 19:00:00 UTC,heat,hold,740,760,760,NJ,Florham Park,0,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/NJ/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NJ/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NJ_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NJ/feb/" + file)
    NJ_feb = pd.concat([NJ_feb, df])
    
NJ_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004f71a0c52a570bac8cebc13392ba54fc98da79,feb,2017,heat,auto,Millstone,665.956284,651.311475,641.759563,0.0,False,False,False
1,004f71a0c52a570bac8cebc13392ba54fc98da79,feb,2017,heat,hold,Millstone,706.992278,698.891892,698.084942,0.0,False,False,False
2,00cfe51c25b825270bc4509dfadca8d4f50a4e99,feb,2017,heat,auto,Medford,703.496815,706.242038,706.242038,0.0,False,False,False
3,00cfe51c25b825270bc4509dfadca8d4f50a4e99,feb,2017,heat,hold,Medford,713.112847,715.000000,715.000000,0.0,False,False,False
4,01344d14a623a52997206e073b17517a488ddb6c,feb,2017,auto,auto,Piscataway,747.782609,750.000000,680.000000,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1171,fe765ce8408d615dc0bce6dbbc286288933313fd,feb,2021,heat,hold,Glen Rock,705.411765,707.882353,706.117647,60.0,False,False,False
1172,ff635b7556b71ecf409661242fa988bf1361c850,feb,2021,heat,hold,newark,696.583001,702.657371,700.134130,120.0,False,False,False
1173,ffacf5dda3b0257b3891ad670f2d1f42187e2277,feb,2021,heat,hold,Oak Ridge,668.722222,664.666667,658.444444,99.0,True,False,False
1174,ffb88d949cb4d05658d8445b5cc64eb853aea426,feb,2021,heat,hold,Livingston,716.250000,721.875000,719.708333,0.0,False,False,False


In [67]:
NJ_feb.to_csv("Scraper_Output/State_Month_Day/NJ/NJ_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/NJ-day/2017-jun-day-NJ.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6f05ed40009c4d2311ccd097b6b6b8563a7de7c6,2017-06-16 12:05:00 UTC,cool,hold,742,743,743,NJ,Edgewater,20,False,False,True,Electric
1,6c09865f6673a1f668d36eb5bfb2137545ef792f,2017-06-19 14:30:00 UTC,cool,hold,764,763,763,NJ,Mays Landing,30,False,False,False,Gas
2,ebf5db3a589e4589388886191ae1283d764af30d,2017-06-24 18:05:00 UTC,cool,auto,723,720,690,NJ,Harrington Park,0,False,False,False,Gas
3,ebf5db3a589e4589388886191ae1283d764af30d,2017-06-28 16:15:00 UTC,cool,auto,734,740,710,NJ,Harrington Park,0,False,False,False,Gas
4,7318e078f87e016d9ae71ff9ab408e4166d29457,2017-06-07 17:05:00 UTC,cool,auto,708,820,640,NJ,Hillside,46,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
523517,c0b793813c1d559990ce01ec9fb2c598e16f367d,2017-06-14 13:55:00 UTC,cool,auto,787,790,720,NJ,Egg Harbor Township,57,False,False,False,Gas
523518,feada91765d02a17397b41f77689a4129b6b57d1,2017-06-30 17:05:00 UTC,cool,auto,753,750,700,NJ,Egg Harbor Township,19,False,False,True,Electric
523519,2aa6414da36a2525d14a9558312566818161044a,2017-06-09 19:30:00 UTC,cool,auto,758,790,670,NJ,Egg Harbor Township,50,False,False,False,Gas
523520,2aa6414da36a2525d14a9558312566818161044a,2017-06-18 14:30:00 UTC,cool,auto,764,770,780,NJ,Egg Harbor Township,50,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/NJ/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/NJ-day/2018-jun-day-NJ.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,c978fda9a1bb73a1a2376a3c821aea5fa80bfa59,2018-06-27 12:50:00 UTC,cool,hold,712,707,707,NJ,Westfield,5,False,False,False,Gas
2,e624929e3542550e6ec536a903858a5a06ce470c,2018-06-11 10:45:00 UTC,auto,auto,682,705,655,NJ,Chatham,70,False,False,False,Gas
3,c350527357f7149fcd95f6fa7706010bd6275a53,2018-06-13 11:00:00 UTC,cool,hold,728,726,726,NJ,Lyndhurst,5,True,False,True,Electric
4,b6465a892ec628096be7b4a7122b2da0e631cbea,2018-06-13 15:40:00 UTC,cool,hold,752,784,784,NJ,West Orange,25,False,False,False,Gas
5,f358bffffd7738fa2f72b8b4e5a116dd28558946,2018-06-04 19:15:00 UTC,cool,auto,716,712,682,NJ,Jersey City,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1159640,7f4e81372e5d5d60cec5fd02a0474985135dc8e2,2018-06-26 17:20:00 UTC,cool,hold,760,760,760,NJ,Salem,80,False,False,False,Gas
1159641,d91e5e7494c24a0ee29e528da6fafbd1d5e099be,2018-06-17 17:45:00 UTC,cool,hold,756,760,760,NJ,Hoboken,0,False,False,False,Gas
1159642,0735fd45c22def7a595e22134db926c8bc02d5c9,2018-06-26 11:35:00 UTC,cool,hold,710,760,760,NJ,West Milford,45,False,False,False,Gas
1159643,393c8adc3ca3535636e9220b92eade38bffa7853,2018-06-06 19:15:00 UTC,cool,auto,750,760,760,NJ,North Brunswick,15,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/NJ/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/NJ-day/2019-jun-day-NJ.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bddd5cea65be1b55e157533cbc4f13c8f506154b,2019-06-01 14:20:00 UTC,cool,auto,724,680,657,NJ,Mount Laurel,0,True,False,False,Gas
1,2abf248171a5a0f1023ce7910e4e63cb6cd47c30,2019-06-26 12:10:00 UTC,cool,auto,816,705,655,NJ,N Wildwood,15,True,False,False,Gas
2,9cd0112e4c82cc0c3690ea5b0c60e1c3e838b024,2019-06-01 14:25:00 UTC,cool,hold,706,705,705,NJ,North Cape May,27,False,False,False,Gas
3,faf82d5780c46cd24e15d66fba2106fb21d2c115,2019-06-06 15:40:00 UTC,auto,hold,773,770,626,NJ,West New York,0,True,False,True,Electric
5,1f98c42a78f42e25e5b0c2dec6b5377e988dcf91,2019-06-11 14:15:00 UTC,cool,hold,705,726,726,NJ,Holmdel,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1756600,b330f8153b5c97be1d0460742f0cc5ed0711208e,2019-06-16 19:35:00 UTC,cool,hold,766,760,760,NJ,Vineland,19,True,False,False,Gas
1756601,d2b4b0edff4d937154c31af3dbc4c54bbccb5179,2019-06-10 12:55:00 UTC,cool,hold,720,760,760,NJ,Evesham,9,True,False,False,Gas
1756602,d02414f35a6a5293acd7e4826936afd4088d7f09,2019-06-23 18:55:00 UTC,cool,hold,760,760,760,NJ,Hoboken,10,False,False,False,Gas
1756603,d59a49513285b9ecf44b1e3774c0fff492c7c4a0,2019-06-30 15:05:00 UTC,cool,hold,775,760,760,NJ,Newton,40,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/NJ/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/NJ-day/2020-jun-day-NJ.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2f7fd3de7c0ea58e68e58efae2493e3ab1b65e58,2020-06-09 13:45:00 UTC,cool,auto,709,670,677,NJ,Mt. Laurel,0,True,False,False,Gas
1,e1be0f8a7ad278c797f4715a492a1872cc015636,2020-06-17 19:55:00 UTC,cool,hold,680,677,677,NJ,Runnemede,80,False,False,False,Gas
2,3584d50be0c0ac7fa814da32446fca6c212a6aba,2020-06-09 13:55:00 UTC,auto,hold,681,695,645,NJ,Montclair,0,False,False,False,Gas
3,55fe049d27d78ce50052a06c5a2ec4313e00ff61,2020-06-16 11:00:00 UTC,cool,hold,737,752,752,NJ,Westfield,5,False,False,False,Gas
4,a584a8df86b73288dc9e904ca3d9491836f82abc,2020-06-26 12:35:00 UTC,auto,hold,764,761,617,NJ,Union,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1698140,f0943a6be8a30d9ae76dbeb5574b339273be31b1,2020-06-10 19:00:00 UTC,cool,auto,767,760,760,NJ,Cranford,99,True,False,False,Gas
1698141,84d188e0b9f8fe4036682ae0d5c2b152124b2274,2020-06-28 15:45:00 UTC,cool,hold,759,760,760,NJ,Ventnor City,9,False,False,False,Gas
1698142,57f42c5925331c03754ca37b6cb8071fd9295e07,2020-06-14 17:20:00 UTC,cool,auto,734,760,760,NJ,Freehold,10,False,False,False,Gas
1698143,287f02374ce923dfd3bb89e40c480c600a9520e7,2020-06-19 15:20:00 UTC,cool,auto,764,760,760,NJ,Franklin Park,118,True,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/NJ/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/NJ-day/2021-jun-day-NJ.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,91ccec7bc792576ac0589fd5fd9261601671668f,2021-06-08 18:30:00 UTC,cool,hold,810,809,809,NJ,Leonia,90,False,False,False,Gas
2,06a03eaa78e22728cc2eb752b910457ed5b108ce,2021-06-08 14:40:00 UTC,cool,hold,697,694,694,NJ,Stewartsville,10,False,False,False,Gas
4,89053abc9571ee3f6824c4a9dee660a640b8d5c3,2021-06-06 12:40:00 UTC,cool,hold,727,687,687,NJ,Long Branch,0,False,False,False,Gas
6,887d3c611f098f2a969ddac52e32d42d4feea641,2021-06-03 17:50:00 UTC,auto,hold,695,692,632,NJ,Moorestown,16,False,False,False,Gas
7,bba5d3aa4933a44e06c7a2b30ff876c6c25e3ccc,2021-06-30 19:25:00 UTC,auto,hold,778,774,712,NJ,Colts Neck,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995956,166b48b5154d91187b73316eaa6e59154c1854c2,2021-06-07 10:30:00 UTC,cool,hold,756,760,760,NJ,Ridgewood,0,False,False,False,Gas
995957,3c13c0b947f008e20c7fe2bc7370f16a9130e1a0,2021-06-27 18:20:00 UTC,cool,hold,762,760,760,NJ,Ventnor City,9,True,False,False,Gas
995958,ba0791c82ba05dce044df8c5afb240523811e248,2021-06-24 16:45:00 UTC,cool,hold,751,760,760,NJ,East Brunswick,0,True,False,False,Gas
995959,99108fa4c6bead747a7cb820badbc28eab58b7d3,2021-06-11 12:05:00 UTC,cool,hold,743,760,760,NJ,Voorhees,30,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/NJ/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NJ/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NJ_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NJ/jun/" + file)
    NJ_jun = pd.concat([NJ_jun, df])
    
NJ_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004f71a0c52a570bac8cebc13392ba54fc98da79,jun,2017,cool,auto,Millstone,762.734266,824.440559,692.370629,0.0,False,False,False
1,004f71a0c52a570bac8cebc13392ba54fc98da79,jun,2017,cool,hold,Millstone,766.899225,776.651163,775.478036,0.0,False,False,False
2,00cfe51c25b825270bc4509dfadca8d4f50a4e99,jun,2017,heat,auto,Medford,724.961538,720.000000,720.000000,0.0,False,False,False
3,00fffc77256e65d8b7e1657b30e446dd9fa32eb5,jun,2017,auto,auto,Hillsborough,704.910543,703.948083,653.891374,5.0,False,False,False
4,00fffc77256e65d8b7e1657b30e446dd9fa32eb5,jun,2017,auto,hold,Hillsborough,700.143667,729.039698,669.370510,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1459,fe66c5051b7e13329c0c52a696a10794fa5efd7d,jun,2021,cool,hold,Swedesboro,730.272727,721.806061,720.733333,10.0,True,False,False
1460,feada91765d02a17397b41f77689a4129b6b57d1,jun,2021,cool,hold,Egg Harbor Township,711.272727,685.303030,685.363636,19.0,False,False,True
1461,ff635b7556b71ecf409661242fa988bf1361c850,jun,2021,cool,hold,newark,751.298401,760.071048,759.863233,120.0,False,False,False
1462,ffb88d949cb4d05658d8445b5cc64eb853aea426,jun,2021,cool,hold,Livingston,774.692308,768.307692,768.307692,0.0,False,False,False


In [100]:
NJ_jun.to_csv("Scraper_Output/State_Month_Day/NJ/NJ_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/NJ-day/2017-jul-day-NJ.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3cb7d514907a4874c1a37060fcb43bd961ea7812,2017-07-26 14:35:00 UTC,cool,auto,789,800,800,NJ,Mullica Hill,25,True,False,False,Gas
1,c815289897954a071941d2d1a32759f483d695c1,2017-07-02 15:30:00 UTC,cool,auto,750,750,740,NJ,Willingboro,30,False,False,False,Gas
2,71e4ab391d61f4e94db93e360d6d075e8cfe0017,2017-07-13 13:10:00 UTC,cool,hold,759,750,750,NJ,North Bergen,0,False,False,False,Gas
3,936ada79ee18c1afe2eb969ead5ee8fca773b08c,2017-07-07 18:35:00 UTC,auto,auto,761,760,700,NJ,Secaucus,40,False,False,False,Gas
4,57b6772e7fab5a780378f4fe5f63d718e020ba5e,2017-07-28 10:40:00 UTC,cool,hold,699,700,700,NJ,River Vale,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
680574,22f9a18d556c4f013209c6c834b04e5c0df714ad,2017-07-14 17:55:00 UTC,cool,hold,778,770,770,NJ,Cape May Court House,0,False,False,False,Gas
680575,22f9a18d556c4f013209c6c834b04e5c0df714ad,2017-07-30 19:25:00 UTC,cool,hold,769,770,770,NJ,Cape May Court House,0,False,False,False,Gas
680576,22f9a18d556c4f013209c6c834b04e5c0df714ad,2017-07-20 12:55:00 UTC,cool,hold,759,760,760,NJ,Cape May Court House,0,False,False,False,Gas
680577,22f9a18d556c4f013209c6c834b04e5c0df714ad,2017-07-05 14:50:00 UTC,cool,hold,763,770,770,NJ,Cape May Court House,0,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/NJ/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/NJ-day/2018-jul-day-NJ.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,aed225d4198c685deca8020969a789e1aa6d36cb,2018-07-05 15:35:00 UTC,cool,hold,686,684,684,NJ,Hawthorne,60,False,False,False,Gas
2,5e4f3de0c4aa6aea8e9da8ba07bf8e669a648b30,2018-07-22 17:30:00 UTC,cool,hold,701,708,684,NJ,Palisades Park,60,True,False,False,Gas
3,e7c2709c4db263deb49322593636f0d83aa7cedf,2018-07-22 16:45:00 UTC,heat,hold,792,650,635,NJ,Bernards,27,False,False,False,Gas
4,9e3bf300e5ed900af7c553d657a1dc3f2e84b55a,2018-07-30 19:45:00 UTC,cool,auto,790,788,788,NJ,Jersey City,60,False,False,False,Gas
5,97718cb4671bb743659fde0b29c83393c402bda4,2018-07-14 19:55:00 UTC,cool,hold,718,715,715,NJ,Howell,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1423994,dfbe11b04ab2af18f3e3e80e5b0725b80c1f681f,2018-07-22 14:35:00 UTC,cool,auto,713,720,760,NJ,Paramus,28,False,False,False,Gas
1423995,650cebbaad3dff0df534495c9623c480a27c7b72,2018-07-14 12:50:00 UTC,cool,hold,722,760,760,NJ,Clifton,0,False,False,False,Gas
1423996,14c2199c5f222f738c33922a0cdcdd6272737834,2018-07-30 17:40:00 UTC,cool,hold,763,760,760,NJ,Blackwood,40,False,False,False,Gas
1423997,0dc05bbafb3d7026d8a8742ebdb0376c0cb723ea,2018-07-28 19:40:00 UTC,cool,hold,761,760,760,NJ,Glassboro,20,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/NJ/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/NJ-day/2019-jul-day-NJ.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,da73bce9ec82573f791be6ad0ddaa86f541f903a,2019-07-11 11:45:00 UTC,cool,auto,669,670,625,NJ,Bernards,7,False,False,False,Gas
1,471571826f5dc2062ce7b1694699a164f65b5428,2019-07-07 17:10:00 UTC,cool,hold,655,642,642,NJ,Hawthorne,9,True,False,False,Gas
2,0d36823a876cdc9cccd7a304aa1b83ba96340d75,2019-07-14 18:40:00 UTC,cool,hold,759,760,729,NJ,edison,5,False,False,False,Gas
3,e3a5276c1b757eeedbbe0f523c4601b67e7bd710,2019-07-13 13:00:00 UTC,cool,auto,757,810,749,NJ,Whiting,40,False,False,True,Electric
4,d93f6b92b171e1393fe1811bf6125081feab12fe,2019-07-06 17:05:00 UTC,auto,hold,728,725,615,NJ,Clarksboro,50,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2090512,d99dea0e4e63941ee49154886f66b2b512b3d244,2019-07-27 13:20:00 UTC,cool,hold,765,760,760,NJ,Lyndhurst Township,15,False,False,False,Gas
2090513,43bfcc8dca763e62e17c0649b4fa2302ba51b788,2019-07-29 17:45:00 UTC,cool,auto,760,760,760,NJ,Montclair,85,True,False,False,Gas
2090514,acae27e79f228517afb36247f0c33e6ee1554a70,2019-07-09 09:35:00 UTC,cool,hold,739,760,760,NJ,Jackson Township,0,False,False,False,Gas
2090515,393c8adc3ca3535636e9220b92eade38bffa7853,2019-07-11 11:50:00 UTC,cool,hold,760,760,760,NJ,North Brunswick,15,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/NJ/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/NJ-day/2020-jul-day-NJ.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7d806f8c16b04b89c5050be6fe34fe46725284c1,2020-07-16 11:00:00 UTC,cool,auto,795,820,733,NJ,Ocean City,20,False,False,False,Gas
1,9f886ba821b603f7ee8b7743b1c67e74977f6427,2020-07-05 12:10:00 UTC,cool,hold,728,728,703,NJ,Bayonne,10,True,False,False,Gas
2,f4bf9e58556aef964b509d6cb8d16e4044df48ea,2020-07-12 14:15:00 UTC,cool,hold,757,761,761,NJ,Westfield,5,False,False,False,Gas
3,0b68615a3380664a93a5b796eb46132fa5ef699d,2020-07-10 17:50:00 UTC,cool,auto,742,752,752,NJ,Middletown,37,False,False,False,Gas
4,3370ca7e2d9ece26c820bf1b819fadb3cd46f1b1,2020-07-08 19:00:00 UTC,cool,auto,779,780,747,NJ,Chesterfield,120,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1937972,745f2411605d497ab6e2fc336f18573206e09db3,2020-07-07 16:05:00 UTC,cool,auto,771,760,760,NJ,North Brunswick,30,False,False,False,Gas
1937973,f99a051f8bea5fb3bc5db59c6266a699ac680f67,2020-07-24 18:50:00 UTC,cool,auto,764,760,760,NJ,Jersey City,17,False,False,False,Gas
1937974,9c9287511ca977dfd2ae1b2811e90e9c00c66485,2020-07-14 19:00:00 UTC,cool,auto,765,760,760,NJ,Hazlet,67,False,False,False,Gas
1937975,811e24c75b356dfee19d870838d2393f7962a8ce,2020-07-08 11:15:00 UTC,cool,hold,764,760,760,NJ,Ridgewood,0,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/NJ/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/NJ-day/2021-jul-day-NJ.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,befc73d1fb35a7d9f2c67381c15bdd0e72d1cae7,2021-07-25 17:25:00 UTC,cool,hold,692,655,655,NJ,Newark,0,False,False,False,Gas
2,0a6713d7f3e6fde7f01b28b16dbc8b5233afc40e,2021-07-10 18:35:00 UTC,auto,hold,715,715,665,NJ,West Orange,99,True,False,False,Gas
3,6c7009e15f487b1a55bdb9b8c51edd3a99b8fc1d,2021-07-17 18:20:00 UTC,cool,hold,712,701,701,NJ,Piscataway Township,58,True,False,False,Gas
4,d5c59f9481aad97245fc0ae8c9e1bbb25f0962be,2021-07-20 18:50:00 UTC,cool,hold,762,744,744,NJ,Neptune,15,False,False,False,Gas
5,dced3c100abcc7f2f98e7ceb62e6639d1ff4edee,2021-07-28 13:40:00 UTC,cool,hold,686,688,688,NJ,Neshanic Station,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
980237,6911efa295d59fc278311170dd63ec7cb9344767,2021-07-17 18:05:00 UTC,cool,hold,762,760,760,NJ,Lumberton,0,True,False,False,Gas
980238,f18f830085ceaa87d28b51b1d7e35802033497e6,2021-07-09 17:25:00 UTC,cool,hold,763,760,760,NJ,Westfield,60,False,False,False,Gas
980239,281eb39a3c8307508e872aec94790c2673ccc3ee,2021-07-01 14:50:00 UTC,cool,hold,760,760,760,NJ,newark,0,False,False,False,Gas
980240,adb79a920d5b3858907d6460b67c70b4c6842d71,2021-07-07 13:15:00 UTC,cool,hold,759,760,760,NJ,Egg Harbor Township,20,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/NJ/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NJ/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NJ_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NJ/jul/" + file)
    NJ_jul = pd.concat([NJ_jul, df])
    
NJ_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004f71a0c52a570bac8cebc13392ba54fc98da79,jul,2017,cool,auto,Millstone,794.312500,796.395833,694.729167,0.0,False,False,False
1,004f71a0c52a570bac8cebc13392ba54fc98da79,jul,2017,cool,hold,Millstone,764.654731,756.971867,756.516624,0.0,False,False,False
2,00fffc77256e65d8b7e1657b30e446dd9fa32eb5,jul,2017,auto,auto,Hillsborough,703.583333,701.363636,651.363636,5.0,False,False,False
3,00fffc77256e65d8b7e1657b30e446dd9fa32eb5,jul,2017,auto,hold,Hillsborough,710.639241,709.338608,656.015823,5.0,False,False,False
4,0129f1aa4f23beac88b2e4bd022213101f2cac8f,jul,2017,cool,auto,Haddon Township,640.850365,645.395985,649.640511,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1182,fe3fa3e93234275a47cc79efbfeb6989acf638c7,jul,2021,auto,hold,Delran,719.906977,682.325581,632.325581,0.0,False,False,False
1183,fe66c5051b7e13329c0c52a696a10794fa5efd7d,jul,2021,cool,hold,Swedesboro,734.887473,728.479830,727.447983,10.0,True,False,False
1184,ff635b7556b71ecf409661242fa988bf1361c850,jul,2021,cool,hold,newark,797.430348,804.747512,788.419154,120.0,False,False,False
1185,ffb88d949cb4d05658d8445b5cc64eb853aea426,jul,2021,cool,hold,Livingston,775.714286,772.319048,771.676190,0.0,False,False,False


In [133]:
NJ_jul.to_csv("Scraper_Output/State_Month_Day/NJ/NJ_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/NJ-day/2017-aug-day-NJ.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b473f19c66c4eccefe0a6288ac031835e3104db9,2017-08-25 15:10:00 UTC,cool,hold,720,720,720,NJ,Metuchen,37,False,False,False,Gas
1,6c09865f6673a1f668d36eb5bfb2137545ef792f,2017-08-20 13:15:00 UTC,cool,auto,734,730,660,NJ,Mays Landing,30,False,False,False,Gas
2,87a2141d6eaebde08c70b77d8286208d2a4c1ca3,2017-08-19 15:35:00 UTC,cool,auto,716,710,680,NJ,Old Bridge,30,False,False,False,Gas
3,6f05ed40009c4d2311ccd097b6b6b8563a7de7c6,2017-08-23 11:35:00 UTC,cool,hold,732,734,734,NJ,Edgewater,20,False,False,True,Electric
4,e12f2bbd3ae2a7369dd8b03ffd96a5e665bb3791,2017-08-09 14:30:00 UTC,cool,auto,697,700,700,NJ,Hamilton Township,50,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
669908,22f9a18d556c4f013209c6c834b04e5c0df714ad,2017-08-31 13:50:00 UTC,cool,hold,742,800,790,NJ,Cape May Court House,0,False,False,False,Gas
669909,22f9a18d556c4f013209c6c834b04e5c0df714ad,2017-08-05 17:30:00 UTC,cool,auto,763,760,730,NJ,Cape May Court House,0,False,False,False,Gas
669910,22f9a18d556c4f013209c6c834b04e5c0df714ad,2017-08-31 15:25:00 UTC,cool,hold,749,800,790,NJ,Cape May Court House,0,False,False,False,Gas
669911,22f9a18d556c4f013209c6c834b04e5c0df714ad,2017-08-18 16:30:00 UTC,cool,hold,767,760,760,NJ,Cape May Court House,0,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/NJ/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/NJ-day/2018-aug-day-NJ.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,49dabc74be9f8342d35798fe199126b5ef58b502,2018-08-17 16:45:00 UTC,cool,hold,716,705,705,NJ,Spring Lake,55,False,False,False,Gas
1,9a06657297ff3a5ae7889f50724c76bd6ab0ba1b,2018-08-05 19:40:00 UTC,cool,hold,702,697,697,NJ,Middletown,69,False,False,False,Gas
2,c978fda9a1bb73a1a2376a3c821aea5fa80bfa59,2018-08-05 19:05:00 UTC,cool,hold,734,788,788,NJ,Westfield,5,False,False,False,Gas
3,9b9097f5da7e71defa94d636bf9b92e8f19249b6,2018-08-18 15:15:00 UTC,cool,hold,753,761,761,NJ,Basking Ridge,30,False,False,False,Gas
4,1cf24fdded566ea27d4330a2c02fb6d8c9bf86b7,2018-08-05 17:30:00 UTC,cool,hold,779,702,702,NJ,Sewell,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1460720,adb79a920d5b3858907d6460b67c70b4c6842d71,2018-08-04 13:30:00 UTC,cool,hold,764,760,760,NJ,Egg Harbor Township,20,False,False,False,Gas
1460721,61b3a1e8213f8e4548373b700c0a0f6c30135ba7,2018-08-03 11:45:00 UTC,cool,hold,763,760,760,NJ,Ridgefield,0,False,False,False,Gas
1460722,59100307e80e06a6b9d0526d75cc8cc9b7dff228,2018-08-15 11:05:00 UTC,cool,auto,751,750,760,NJ,Hawthorne,25,False,False,False,Gas
1460723,7858e46ce26dce8e5a31962d1953622f0178155c,2018-08-19 15:10:00 UTC,cool,hold,756,760,760,NJ,Madison,5,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/NJ/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/NJ-day/2019-aug-day-NJ.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9a06657297ff3a5ae7889f50724c76bd6ab0ba1b,2019-08-12 15:10:00 UTC,cool,hold,724,723,723,NJ,Middletown,69,False,False,False,Gas
1,6ff5c470fde7fa363790025da01a29340c47f7f8,2019-08-05 15:50:00 UTC,auto,auto,728,723,673,NJ,Secaucus,40,False,False,False,Gas
2,b5bfc715bbebaa8507644bee6644e4582cd25bd6,2019-08-03 17:40:00 UTC,cool,hold,772,780,742,NJ,Mahwah,10,False,False,False,Gas
3,6f05ed40009c4d2311ccd097b6b6b8563a7de7c6,2019-08-15 13:15:00 UTC,cool,auto,736,734,734,NJ,Edgewater,20,False,False,True,Electric
4,9416792730ff93925d9fdb7f1c13c618959d0381,2019-08-30 11:10:00 UTC,cool,hold,703,760,747,NJ,Cranbury Township,20,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025177,7f4e81372e5d5d60cec5fd02a0474985135dc8e2,2019-08-15 11:10:00 UTC,cool,auto,783,780,760,NJ,Salem,80,False,False,False,Gas
2025178,8b8f1e563136f9119a1ea5b10378f8d7ebc8af18,2019-08-31 13:40:00 UTC,cool,hold,757,760,760,NJ,Cherry Hill,9,False,False,False,Gas
2025179,d07c7da92a8a543a676bf4c3cf458f11ca8dba9e,2019-08-03 12:10:00 UTC,cool,hold,758,760,760,NJ,Fort Lee,99,False,False,False,Gas
2025180,287f02374ce923dfd3bb89e40c480c600a9520e7,2019-08-16 12:05:00 UTC,cool,hold,758,760,760,NJ,Franklin Park,118,True,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/NJ/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/NJ-day/2020-aug-day-NJ.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,268b60939773a9951f49a06e762db62a62422056,2020-08-08 18:35:00 UTC,cool,hold,735,745,745,NJ,Collegeville,0,False,False,False,Gas
1,be2df2679f5521a721caed3d48bd4a02113648c7,2020-08-23 12:25:00 UTC,auto,auto,775,770,608,NJ,Basking Ridge,25,False,False,False,Gas
2,9c9287511ca977dfd2ae1b2811e90e9c00c66485,2020-08-17 14:50:00 UTC,cool,auto,741,758,758,NJ,Hazlet,67,False,False,False,Gas
3,3584d50be0c0ac7fa814da32446fca6c212a6aba,2020-08-10 19:50:00 UTC,auto,hold,709,695,645,NJ,Montclair,0,False,False,False,Gas
4,cbc8294ee740f98c64db35add9bee5032c645508,2020-08-09 14:10:00 UTC,cool,hold,750,750,749,NJ,Colonia,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1891229,e5f0583c0c069406881c13d9a6bc19a4110cbc78,2020-08-02 17:40:00 UTC,cool,auto,757,760,760,NJ,Cranford,99,True,False,False,Gas
1891230,bb203770e52355aa59ebf5879a6237adc572ccc7,2020-08-30 10:50:00 UTC,cool,hold,755,760,760,NJ,Matawan,60,True,False,True,Electric
1891231,1400685c74fb4010dd2a97b109c51150e92017a6,2020-08-29 15:25:00 UTC,cool,hold,764,760,760,NJ,Ewing,60,True,False,False,Gas
1891232,adb79a920d5b3858907d6460b67c70b4c6842d71,2020-08-18 18:55:00 UTC,cool,hold,759,760,760,NJ,Egg Harbor Township,20,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/NJ/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NJ/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NJ_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NJ/aug/" + file)
    NJ_aug = pd.concat([NJ_aug, df])
    
NJ_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004f71a0c52a570bac8cebc13392ba54fc98da79,aug,2017,cool,auto,Millstone,775.569975,774.788804,668.452926,0.0,False,False,False
1,004f71a0c52a570bac8cebc13392ba54fc98da79,aug,2017,cool,hold,Millstone,773.368932,767.621359,767.317152,0.0,False,False,False
2,00fffc77256e65d8b7e1657b30e446dd9fa32eb5,aug,2017,auto,auto,Hillsborough,709.096579,707.939638,657.931590,5.0,False,False,False
3,00fffc77256e65d8b7e1657b30e446dd9fa32eb5,aug,2017,auto,hold,Hillsborough,675.947368,675.410526,624.842105,5.0,False,False,False
4,0129f1aa4f23beac88b2e4bd022213101f2cac8f,aug,2017,cool,auto,Haddon Township,619.096234,638.418410,625.585774,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2279,ff635b7556b71ecf409661242fa988bf1361c850,aug,2020,cool,hold,newark,779.097668,770.459184,769.724490,120.0,False,False,False
2280,ff6e08f27d804c684e9ea9002f77db7e6f597cc3,aug,2020,auto,auto,Howell,731.355932,720.000000,670.000000,15.0,False,False,False
2281,ffacf5dda3b0257b3891ad670f2d1f42187e2277,aug,2020,heat,auto,Oak Ridge,721.000000,750.000000,750.000000,99.0,True,False,False
2282,ffb88d949cb4d05658d8445b5cc64eb853aea426,aug,2020,cool,hold,Livingston,783.851852,780.148148,779.666667,0.0,False,False,False


In [160]:
NJ_aug.to_csv("Scraper_Output/State_Month_Day/NJ/NJ_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/NJ-day/2017-dec-day-NJ.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c978fda9a1bb73a1a2376a3c821aea5fa80bfa59,2017-12-01 16:05:00 UTC,heat,hold,683,653,653,NJ,Westfield,5,False,False,False,Gas
1,de606ee4b2c3dcb4d48c7be11f59038350cc1e7c,2017-12-04 12:10:00 UTC,heat,auto,678,810,680,NJ,South River,5,False,False,False,Gas
2,de606ee4b2c3dcb4d48c7be11f59038350cc1e7c,2017-12-24 12:05:00 UTC,heat,auto,675,810,640,NJ,South River,5,False,False,False,Gas
3,44a932e8bd5eb3becee2579895587f54a33b66fd,2017-12-03 11:40:00 UTC,heat,hold,668,684,684,NJ,Leonardo,5,False,False,False,Gas
4,2a01d81b8e560607d42f278ba5234d19e8d3740e,2017-12-04 15:40:00 UTC,auto,hold,704,755,705,NJ,Lawrence,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
959207,69933dd9a30b29ff8606fe3a3e78f1baef8eb714,2017-12-24 16:10:00 UTC,heat,hold,755,760,760,NJ,Clifton,5,False,False,False,Gas
959208,0a41e735b50199a402317d745aa10f29020ee3c3,2017-12-12 11:15:00 UTC,heat,auto,733,760,760,NJ,Elmer,25,False,False,False,Gas
959209,ba36eb1f67ff36cde03a2dc11fd8b75c64b3f722,2017-12-08 14:20:00 UTC,auto,hold,686,760,690,NJ,Weehawken,107,False,False,False,Gas
959210,7658acde76a881ac0f31b56dfe570f483d84c835,2017-12-24 13:45:00 UTC,auto,hold,701,760,700,NJ,Colts Neck,50,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/NJ/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/NJ-day/2018-dec-day-NJ.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,298d090c07f3843a9e9ea44a3dac1e7fa53206c6,2018-12-09 18:20:00 UTC,heat,hold,760,743,743,NJ,West New York,0,True,False,True,Electric
1,be78777594a54b99d136c1b41213a62ed009c45c,2018-12-02 17:25:00 UTC,heat,hold,701,705,705,NJ,Morris,20,False,False,False,Gas
2,70ad65a35913e0bb2ea598e92dbd5f3996c88ec5,2018-12-29 19:50:00 UTC,heat,hold,681,685,685,NJ,Montgomery,40,False,False,False,Gas
3,d48bc977fa2820c8ebdb5379d3b296ee47cf7e3b,2018-12-19 17:45:00 UTC,heat,auto,681,703,680,NJ,East Brunswick,0,True,False,False,Gas
4,d48bc977fa2820c8ebdb5379d3b296ee47cf7e3b,2018-12-27 13:55:00 UTC,heat,hold,676,703,680,NJ,East Brunswick,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1552290,ba750e1e8ea0125697626086bdd9a980b401746a,2018-12-02 15:15:00 UTC,heat,hold,767,760,760,NJ,Berlin,17,False,False,False,Gas
1552291,644c49d19f4bda542983482244b1121fc3be036c,2018-12-17 18:05:00 UTC,heat,auto,756,760,750,NJ,Linden,0,False,False,False,Gas
1552292,d07c7da92a8a543a676bf4c3cf458f11ca8dba9e,2018-12-14 14:20:00 UTC,heat,hold,758,760,760,NJ,Fort Lee,99,False,False,False,Gas
1552293,2bb3929a2e7a73f199948464d2a4f85442ddb0ca,2018-12-25 18:30:00 UTC,heat,auto,681,760,660,NJ,Trenton,10,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/NJ/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/NJ-day/2019-dec-day-NJ.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,453ba6643107e17bd5d4d10d6a8d36c6e5e06ccb,2019-12-13 16:55:00 UTC,heat,hold,703,725,675,NJ,Hoboken,0,True,False,False,Gas
1,c6b6bc6efa103ff2ad9b7cecd1ded574f8bf4bd3,2019-12-10 15:55:00 UTC,heat,hold,707,689,689,NJ,Wall,0,False,False,False,Gas
2,71cd3bad8e9c69534d8339e0f2efe6eb2bda909e,2019-12-18 12:40:00 UTC,heat,hold,645,652,652,NJ,Lafayette,0,False,False,False,Gas
3,71cd3bad8e9c69534d8339e0f2efe6eb2bda909e,2019-12-22 12:20:00 UTC,heat,hold,661,662,662,NJ,Lafayette,0,False,False,False,Gas
4,9c3324feb0c5b373e621d724b95f19af2010abaa,2019-12-15 15:00:00 UTC,heat,hold,726,729,729,NJ,Mahwah,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1951286,644c49d19f4bda542983482244b1121fc3be036c,2019-12-07 12:40:00 UTC,heat,hold,694,760,700,NJ,Linden,0,False,False,False,Gas
1951287,599ee305b52c541b948c1d82c2aeef4946d817b5,2019-12-04 14:50:00 UTC,auto,hold,619,760,620,NJ,Voorhees,30,True,False,False,Gas
1951288,7e2f3c9de5fb143793f233c07a049710196bc56a,2019-12-24 13:50:00 UTC,auto,auto,706,760,710,NJ,Cherry Hill,100,False,False,False,Gas
1951289,653b5617a4575acb4aafa5889cb3463c1298b9bf,2019-12-20 17:50:00 UTC,heat,hold,755,760,760,NJ,North Plainfield,40,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/NJ/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/NJ-day/2020-dec-day-NJ.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,193be8b5b1d1c343b24f78aa1adc3f719c7d6484,2020-12-18 17:00:00 UTC,auto,hold,660,830,650,NJ,Scotch Plains,70,False,False,False,Gas
1,eea162e43c8dd1f3950760333c8b73c940980e97,2020-12-29 11:50:00 UTC,auto,hold,711,815,716,NJ,West New York,0,True,False,True,Electric
2,d37109679aedec1fce5a81b6cc4ed9a7278e7c05,2020-12-03 17:30:00 UTC,heat,hold,697,699,699,NJ,Jackson,30,False,False,False,Gas
3,7720ec22884af3da9000a9b7a6a6f4ac67a7b10e,2020-12-12 14:30:00 UTC,heat,hold,710,716,716,NJ,Mount Laurel,20,False,False,False,Gas
4,d7d4bbf4c40cafc1d3f46c2e67429754c388951c,2020-12-20 14:50:00 UTC,heat,hold,702,705,705,NJ,Ventnor City,25,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1567019,7ea729c651850cfbc825e2ddbf8a15cf91b6d042,2020-12-20 14:20:00 UTC,heat,hold,753,760,760,NJ,East Rutherford,0,False,False,False,Gas
1567020,acf49d5329268c9aee85ab01e38e1bb05277d876,2020-12-18 18:05:00 UTC,auto,auto,698,760,700,NJ,North Brunswick,30,True,False,False,Gas
1567021,1a64c8a947516a7687bc6a237802da436b21c7c4,2020-12-14 19:15:00 UTC,auto,hold,665,760,660,NJ,Howell,0,True,False,False,Gas
1567022,2406f1c08f3758a93961d01674e31daf625fab21,2020-12-03 16:55:00 UTC,auto,hold,666,760,670,NJ,Madison,90,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/NJ/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NJ/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NJ_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NJ/dec/" + file)
    NJ_dec = pd.concat([NJ_dec, df])
    
NJ_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004f71a0c52a570bac8cebc13392ba54fc98da79,dec,2017,heat,auto,Millstone,674.580038,674.732580,673.749529,0.0,False,False,False
1,004f71a0c52a570bac8cebc13392ba54fc98da79,dec,2017,heat,hold,Millstone,695.525301,700.939759,700.679518,0.0,False,False,False
2,0077dbe1c8326d5fbe2f7379172ae07159a24393,dec,2017,heat,auto,Ridgewood,630.330435,650.268478,630.450000,0.0,False,False,False
3,0077dbe1c8326d5fbe2f7379172ae07159a24393,dec,2017,heat,hold,Ridgewood,653.267806,661.336182,638.943020,0.0,False,False,False
4,00cfe51c25b825270bc4509dfadca8d4f50a4e99,dec,2017,heat,hold,Medford,620.880000,651.400000,620.280000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2300,ff635b7556b71ecf409661242fa988bf1361c850,dec,2020,heat,auto,newark,680.130178,692.143984,688.765286,120.0,False,False,False
2301,ff635b7556b71ecf409661242fa988bf1361c850,dec,2020,heat,hold,newark,684.165545,689.464334,685.562584,120.0,False,False,False
2302,ffacf5dda3b0257b3891ad670f2d1f42187e2277,dec,2020,heat,auto,Oak Ridge,732.026316,727.631579,724.500000,99.0,True,False,False
2303,ffb88d949cb4d05658d8445b5cc64eb853aea426,dec,2020,heat,hold,Livingston,713.411765,720.000000,720.000000,0.0,False,False,False


In [187]:
NJ_dec.to_csv("Scraper_Output/State_Month_Day/NJ/NJ_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/NJ/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NJ_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/NJ/" + file)
    NJ_all = pd.concat([NJ_all, df])
    
NJ_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004f71a0c52a570bac8cebc13392ba54fc98da79,aug,2017,cool,auto,Millstone,775.569975,774.788804,668.452926,0.0,False,False,False
1,004f71a0c52a570bac8cebc13392ba54fc98da79,aug,2017,cool,hold,Millstone,773.368932,767.621359,767.317152,0.0,False,False,False
2,00fffc77256e65d8b7e1657b30e446dd9fa32eb5,aug,2017,auto,auto,Hillsborough,709.096579,707.939638,657.931590,5.0,False,False,False
3,00fffc77256e65d8b7e1657b30e446dd9fa32eb5,aug,2017,auto,hold,Hillsborough,675.947368,675.410526,624.842105,5.0,False,False,False
4,0129f1aa4f23beac88b2e4bd022213101f2cac8f,aug,2017,cool,auto,Haddon Township,619.096234,638.418410,625.585774,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
9668,fe66c5051b7e13329c0c52a696a10794fa5efd7d,jun,2021,cool,hold,Swedesboro,730.272727,721.806061,720.733333,10.0,True,False,False
9669,feada91765d02a17397b41f77689a4129b6b57d1,jun,2021,cool,hold,Egg Harbor Township,711.272727,685.303030,685.363636,19.0,False,False,True
9670,ff635b7556b71ecf409661242fa988bf1361c850,jun,2021,cool,hold,newark,751.298401,760.071048,759.863233,120.0,False,False,False
9671,ffb88d949cb4d05658d8445b5cc64eb853aea426,jun,2021,cool,hold,Livingston,774.692308,768.307692,768.307692,0.0,False,False,False


In [190]:
NJ_all.to_csv("Scraper_Output/State_Month_Day/NJ_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mNJe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['NJ']
Unique jan_2018: ['NJ']
Unique jan_2019: ['NJ']
Unique jan_2020: ['NJ']
Unique jan_2021: ['NJ']
Unique feb_2017: ['NJ']
Unique feb_2018: ['NJ']
Unique feb_2019: ['NJ']
Unique feb_2020: ['NJ']
Unique feb_2021: ['NJ']
Unique jun_2017: ['NJ']
Unique jun_2018: ['NJ']
Unique jun_2019: ['NJ']
Unique jun_2020: ['NJ']
Unique jun_2021: ['NJ']
Unique jul_2017: ['NJ']
Unique jul_2018: ['NJ']
Unique jul_2019: ['NJ']
Unique jul_2020: ['NJ']
Unique jul_2021: ['NJ']
Unique aug_2017: ['NJ']
Unique aug_2018: ['NJ']
Unique aug_2019: ['NJ']
Unique aug_2020: ['NJ']
Unique dec_2017: ['NJ']
Unique dec_2018: ['NJ']
Unique dec_2019: ['NJ']
Unique dec_2020: ['NJ']
