# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/NM-day/2017-jan-day-NM.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a114233b386abe252a20033b06e047e310933b18,2017-01-31 19:45:00 UTC,auto,hold,652,830,650,NM,Roswell,60,True,False,True,Electric
1,a114233b386abe252a20033b06e047e310933b18,2017-01-29 18:15:00 UTC,auto,hold,662,830,660,NM,Roswell,60,True,False,True,Electric
3,a114233b386abe252a20033b06e047e310933b18,2017-01-26 17:55:00 UTC,auto,hold,664,830,660,NM,Roswell,60,True,False,True,Electric
4,cc0f82ae264d16860a0821913230c6c65fa41ec2,2017-01-14 14:20:00 UTC,heat,auto,707,716,698,NM,Roswell,40,True,True,True,Electric
6,a114233b386abe252a20033b06e047e310933b18,2017-01-26 17:15:00 UTC,auto,hold,659,830,660,NM,Roswell,60,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
75093,aeefec474242b71d93784f5678af310d2f9e9a0d,2017-01-07 08:20:00 UTC,heat,hold,732,760,760,NM,Roswell,65,False,False,False,Gas
75094,aeefec474242b71d93784f5678af310d2f9e9a0d,2017-01-24 14:40:00 UTC,heat,hold,743,760,760,NM,Roswell,65,False,False,False,Gas
75095,aeefec474242b71d93784f5678af310d2f9e9a0d,2017-01-17 08:55:00 UTC,heat,hold,760,760,760,NM,Roswell,65,False,False,False,Gas
75096,aeefec474242b71d93784f5678af310d2f9e9a0d,2017-01-16 14:35:00 UTC,heat,hold,760,760,760,NM,Roswell,65,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
016512bd92404310d5ad9f1aaf7c4c38f6cd18a7,Jan,2017,heat,auto,Clovis,612.954545,700.636364,640.045455,20.0,False,False,False
016512bd92404310d5ad9f1aaf7c4c38f6cd18a7,Jan,2017,heat,hold,Clovis,643.425000,700.000000,650.000000,20.0,False,False,False
034f98fe8f4117d011824b1b5c1cfe823fbc29fb,Jan,2017,auto,auto,CLOVIS,733.000000,790.000000,730.000000,15.0,False,False,True
034f98fe8f4117d011824b1b5c1cfe823fbc29fb,Jan,2017,auxHeatOnly,auto,CLOVIS,739.036145,742.506024,742.144578,15.0,False,False,True
034f98fe8f4117d011824b1b5c1cfe823fbc29fb,Jan,2017,auxHeatOnly,hold,CLOVIS,735.296053,738.486842,738.065789,15.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...
f99c76c2e2e6d485a427ec1b3fd29b4ed907238c,Jan,2017,auto,auto,Albuquerque,666.687500,760.000000,670.125000,50.0,False,False,False
f99c76c2e2e6d485a427ec1b3fd29b4ed907238c,Jan,2017,auto,hold,Albuquerque,691.867816,760.000000,690.000000,50.0,False,False,False
f99c76c2e2e6d485a427ec1b3fd29b4ed907238c,Jan,2017,heat,auto,Albuquerque,682.804878,760.000000,685.365854,50.0,False,False,False
f99c76c2e2e6d485a427ec1b3fd29b4ed907238c,Jan,2017,heat,hold,Albuquerque,700.494759,760.000000,700.000000,50.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/NM/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/NM-day/2018-jan-day-NM.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fd5688a621c17d37669b21ac97e057b2364b8ed2,2018-01-07 15:30:00 UTC,heat,hold,719,719,719,NM,Albuquerque,0,False,False,False,Gas
1,bb11f43e65f7316674ae308b87a38cf37446e96a,2018-01-10 13:10:00 UTC,heat,auto,687,703,678,NM,Santa Fe,25,False,False,False,Gas
2,1ce5e25af91558b159a5d6e1719abbd25dd78c0c,2018-01-08 14:40:00 UTC,heat,hold,661,665,665,NM,Albuquerque,30,True,False,True,Electric
4,fd5688a621c17d37669b21ac97e057b2364b8ed2,2018-01-27 18:55:00 UTC,heat,hold,725,718,718,NM,Albuquerque,0,False,False,False,Gas
5,3356490e4502335e6db722bc20c9fd3a51791959,2018-01-15 12:05:00 UTC,heat,hold,716,762,734,NM,Albuquerque,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123188,aeefec474242b71d93784f5678af310d2f9e9a0d,2018-01-12 09:55:00 UTC,heat,hold,751,760,760,NM,Roswell,65,False,False,False,Gas
123189,aeefec474242b71d93784f5678af310d2f9e9a0d,2018-01-25 19:55:00 UTC,heat,auto,762,760,760,NM,Roswell,65,False,False,False,Gas
123190,aeefec474242b71d93784f5678af310d2f9e9a0d,2018-01-06 08:55:00 UTC,heat,hold,755,760,760,NM,Roswell,65,False,False,False,Gas
123191,aeefec474242b71d93784f5678af310d2f9e9a0d,2018-01-13 18:10:00 UTC,heat,hold,751,760,760,NM,Roswell,65,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/NM/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/NM-day/2019-jan-day-NM.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ebb6f96b3a9baf198ce6a59141442efbde465103,2019-01-10 15:15:00 UTC,heat,hold,714,719,719,NM,Albuquerque,30,False,False,False,Gas
2,0f141439935d7193c2eeb31603d02a6cad179500,2019-01-13 16:40:00 UTC,auto,hold,725,813,721,NM,Albuquerque,30,False,False,False,Gas
4,abc96661d6d4e75dc6a7b38fb39069f3e5a70f6a,2019-01-30 14:55:00 UTC,heat,auto,680,742,679,NM,Alamogordo,35,False,False,False,Gas
6,ebb6f96b3a9baf198ce6a59141442efbde465103,2019-01-11 16:35:00 UTC,heat,hold,720,719,719,NM,Albuquerque,30,False,False,False,Gas
9,a43c3f319060e9669ead6c65f5cc4240f7cfa71f,2019-01-04 14:15:00 UTC,heat,hold,666,717,697,NM,Roswell,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183771,bb8d819be02a51a0cce4ccb6177401dce03c46a2,2019-01-11 13:40:00 UTC,heat,hold,751,760,760,NM,Albuquerque,60,False,False,False,Gas
183772,bbdc89a7cd6ab9abc711b538fdac33e9544aabd3,2019-01-23 19:30:00 UTC,auxHeatOnly,auto,753,760,760,NM,Clovis,75,False,False,True,Electric
183773,bbdc89a7cd6ab9abc711b538fdac33e9544aabd3,2019-01-23 19:05:00 UTC,auxHeatOnly,auto,747,760,760,NM,Clovis,75,False,False,True,Electric
183774,bbdc89a7cd6ab9abc711b538fdac33e9544aabd3,2019-01-23 19:10:00 UTC,auxHeatOnly,auto,754,760,760,NM,Clovis,75,False,False,True,Electric


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/NM/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/NM-day/2020-jan-day-NM.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,54b5cb33754b7116caa7e6f8f8aa11d5c25a3162,2020-01-01 08:35:00 UTC,auto,hold,757,815,765,NM,1,20,True,False,False,Gas
3,54b5cb33754b7116caa7e6f8f8aa11d5c25a3162,2020-01-09 09:10:00 UTC,auto,hold,752,815,755,NM,1,20,True,False,False,Gas
4,fb02b674b40edbbc6475d037b5f73b1fe464e34d,2020-01-25 18:00:00 UTC,auto,hold,661,775,665,NM,Albuquerque,39,True,False,False,Gas
6,61e41a7afbedd2d39eb3514e7b84881ce6b225df,2020-01-13 16:25:00 UTC,auto,hold,712,767,717,NM,Albuquerque,10,False,False,False,Gas
7,61e41a7afbedd2d39eb3514e7b84881ce6b225df,2020-01-09 14:00:00 UTC,auto,hold,717,767,717,NM,Albuquerque,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
214009,50696b0d33ecfa22fd0a5b80d567048be4bc8200,2020-01-02 19:10:00 UTC,auto,hold,762,810,760,NM,Hobbs,0,False,False,False,Gas
214010,35e491939b0a8c3ff868d8de111d6eecf706ecf0,2020-01-02 13:20:00 UTC,heat,hold,754,760,760,NM,Albuquerque,25,True,False,False,Gas
214011,35e491939b0a8c3ff868d8de111d6eecf706ecf0,2020-01-02 12:55:00 UTC,heat,hold,734,760,760,NM,Albuquerque,25,True,False,False,Gas
214012,3dafa303709de7880515b9a2ef6b9b6d819d739f,2020-01-17 19:05:00 UTC,heat,hold,738,760,760,NM,Albuquerque,40,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/NM/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/NM-day/2021-jan-day-NM.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fd5688a621c17d37669b21ac97e057b2364b8ed2,2021-01-02 18:10:00 UTC,heat,hold,726,729,729,NM,Albuquerque,0,False,False,False,Gas
1,bff80cf9a50ac35964e74b92cd472f91a1c1b5ad,2021-01-05 16:25:00 UTC,auto,hold,631,715,635,NM,Albuquerque,60,False,False,False,Gas
2,db3607e0fd5d6736b4354ed446024bcb4f157400,2021-01-23 15:55:00 UTC,heat,hold,633,650,635,NM,Albuquerque,26,False,False,False,Gas
3,0f141439935d7193c2eeb31603d02a6cad179500,2021-01-10 15:05:00 UTC,heat,hold,675,704,682,NM,Albuquerque,30,False,False,False,Gas
4,e7498d85e36b6dd72f0f35153304e10e0cb84f77,2021-01-12 17:10:00 UTC,auto,hold,724,775,725,NM,Alamogordo,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
128721,af200fc82423dec602af20da6b40bef89120dc4a,2021-01-09 16:10:00 UTC,auto,hold,689,800,690,NM,Albuquerque,20,False,False,False,Gas
128722,af200fc82423dec602af20da6b40bef89120dc4a,2021-01-08 16:00:00 UTC,auto,hold,686,800,690,NM,Albuquerque,20,False,False,False,Gas
128723,af200fc82423dec602af20da6b40bef89120dc4a,2021-01-09 16:20:00 UTC,auto,hold,685,800,690,NM,Albuquerque,20,False,False,False,Gas
128724,af200fc82423dec602af20da6b40bef89120dc4a,2021-01-08 15:05:00 UTC,auto,hold,687,800,690,NM,Albuquerque,20,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/NM/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NM/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NM_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NM/jan/" + file)
    NM_jan = pd.concat([NM_jan, df])
    
NM_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,016512bd92404310d5ad9f1aaf7c4c38f6cd18a7,Jan,2017,heat,auto,Clovis,612.954545,700.636364,640.045455,20.0,False,False,False
1,016512bd92404310d5ad9f1aaf7c4c38f6cd18a7,Jan,2017,heat,hold,Clovis,643.425000,700.000000,650.000000,20.0,False,False,False
2,034f98fe8f4117d011824b1b5c1cfe823fbc29fb,Jan,2017,auto,auto,CLOVIS,733.000000,790.000000,730.000000,15.0,False,False,True
3,034f98fe8f4117d011824b1b5c1cfe823fbc29fb,Jan,2017,auxHeatOnly,auto,CLOVIS,739.036145,742.506024,742.144578,15.0,False,False,True
4,034f98fe8f4117d011824b1b5c1cfe823fbc29fb,Jan,2017,auxHeatOnly,hold,CLOVIS,735.296053,738.486842,738.065789,15.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
167,fbf4eec386d0bbb5c8254b475505ae844b0fc576,Jan,2021,heat,hold,Albuquerque,723.739583,726.704545,726.704545,29.0,True,False,False
168,fcef293019a9fff02bdef3c6d2c08c6233b63de7,Jan,2021,heat,hold,Corrales,668.624585,674.325581,673.016611,40.0,False,False,False
169,fd5688a621c17d37669b21ac97e057b2364b8ed2,Jan,2021,heat,hold,Albuquerque,722.362952,723.521084,723.521084,0.0,False,False,False
170,febcf404654edba17a02972a5c8ba9160c8ed7fa,Jan,2021,auto,hold,Albuquerque,732.156863,800.000000,740.000000,45.0,False,False,False


In [34]:
NM_jan.to_csv("Scraper_Output/State_Month_Day/NM/NM_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/NM-day/2017-feb-day-NM.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a114233b386abe252a20033b06e047e310933b18,2017-02-01 14:45:00 UTC,auto,auto,646,685,635,NM,Roswell,60,True,False,True,Electric
1,3ffad9dfad948840b995e2595470eaaf1ec6d7f8,2017-02-09 15:20:00 UTC,heat,hold,676,666,666,NM,Albuquerque,25,True,False,False,Gas
2,8013970f019c3a99ca5607fd2601a1ccb7f431e0,2017-02-25 18:20:00 UTC,heat,hold,727,797,662,NM,Gallup,35,False,False,False,Gas
3,a114233b386abe252a20033b06e047e310933b18,2017-02-01 14:05:00 UTC,auto,auto,652,685,635,NM,Roswell,60,True,False,True,Electric
5,db3607e0fd5d6736b4354ed446024bcb4f157400,2017-02-18 15:55:00 UTC,auto,auto,664,747,653,NM,Albuquerque,26,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63035,1475fecad1c559962afb1d690b01d87cba9156ea,2017-02-15 19:30:00 UTC,auto,hold,710,765,715,NM,Clovis,0,False,False,False,Gas
63036,1475fecad1c559962afb1d690b01d87cba9156ea,2017-02-14 13:30:00 UTC,auto,hold,707,765,715,NM,Clovis,0,False,False,False,Gas
63037,1475fecad1c559962afb1d690b01d87cba9156ea,2017-02-14 14:40:00 UTC,auto,hold,710,765,715,NM,Clovis,0,False,False,False,Gas
63038,1475fecad1c559962afb1d690b01d87cba9156ea,2017-02-15 15:20:00 UTC,auto,hold,708,765,715,NM,Clovis,0,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/NM/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/NM-day/2018-feb-day-NM.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a56d9a9ca4e57468d79fd6db450d9a3ab47a89e7,2018-02-11 17:35:00 UTC,auto,hold,742,775,725,NM,Las Cruces,45,False,False,False,Gas
1,3586d1916b4708ee4ee48c4c962b16575c90a782,2018-02-22 14:00:00 UTC,heat,auto,679,679,679,NM,Albuquerque,30,False,False,False,Gas
2,ac94b3f02ddf206132d5ac320e3b284f5adae67c,2018-02-27 13:45:00 UTC,auto,hold,627,675,625,NM,Las Cruces,0,False,False,False,Gas
3,a56d9a9ca4e57468d79fd6db450d9a3ab47a89e7,2018-02-11 13:40:00 UTC,auto,hold,728,775,725,NM,Las Cruces,45,False,False,False,Gas
4,a56d9a9ca4e57468d79fd6db450d9a3ab47a89e7,2018-02-10 15:05:00 UTC,auto,hold,722,775,725,NM,Las Cruces,45,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112166,bbdc89a7cd6ab9abc711b538fdac33e9544aabd3,2018-02-13 15:40:00 UTC,auxHeatOnly,hold,763,760,760,NM,Clovis,75,False,False,True,Electric
112167,bbdc89a7cd6ab9abc711b538fdac33e9544aabd3,2018-02-13 14:10:00 UTC,auxHeatOnly,hold,758,760,760,NM,Clovis,75,False,False,True,Electric
112168,bbdc89a7cd6ab9abc711b538fdac33e9544aabd3,2018-02-25 17:25:00 UTC,auxHeatOnly,auto,758,760,760,NM,Clovis,75,False,False,True,Electric
112169,bbdc89a7cd6ab9abc711b538fdac33e9544aabd3,2018-02-25 13:55:00 UTC,auxHeatOnly,auto,708,760,760,NM,Clovis,75,False,False,True,Electric


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/NM/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/NM-day/2019-feb-day-NM.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,db3607e0fd5d6736b4354ed446024bcb4f157400,2019-02-02 13:50:00 UTC,heat,auto,641,682,682,NM,Albuquerque,26,False,False,False,Gas
3,ebb6f96b3a9baf198ce6a59141442efbde465103,2019-02-26 15:40:00 UTC,heat,hold,695,699,699,NM,Albuquerque,30,False,False,False,Gas
5,ebb6f96b3a9baf198ce6a59141442efbde465103,2019-02-28 17:25:00 UTC,heat,hold,697,699,699,NM,Albuquerque,30,False,False,False,Gas
6,ebb6f96b3a9baf198ce6a59141442efbde465103,2019-02-26 19:05:00 UTC,heat,hold,700,699,699,NM,Albuquerque,30,False,False,False,Gas
8,ebb6f96b3a9baf198ce6a59141442efbde465103,2019-02-27 14:20:00 UTC,heat,hold,699,699,699,NM,Albuquerque,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124946,aeefec474242b71d93784f5678af310d2f9e9a0d,2019-02-28 07:15:00 UTC,heat,hold,761,760,760,NM,Roswell,65,False,False,False,Gas
124947,bb8d819be02a51a0cce4ccb6177401dce03c46a2,2019-02-27 19:25:00 UTC,heat,hold,760,760,760,NM,Albuquerque,60,False,False,False,Gas
124948,bb8d819be02a51a0cce4ccb6177401dce03c46a2,2019-02-27 19:35:00 UTC,heat,hold,767,760,760,NM,Albuquerque,60,False,False,False,Gas
124949,bb8d819be02a51a0cce4ccb6177401dce03c46a2,2019-02-15 19:25:00 UTC,heat,hold,787,760,760,NM,Albuquerque,60,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/NM/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/NM-day/2020-feb-day-NM.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,61e41a7afbedd2d39eb3514e7b84881ce6b225df,2020-02-01 19:00:00 UTC,auto,hold,713,767,717,NM,Albuquerque,10,False,False,False,Gas
1,a1f365fa4405aeecd828fb559bd6cae35193afa2,2020-02-18 16:45:00 UTC,heat,hold,685,684,684,NM,Gallup,70,False,False,False,Gas
2,8ff26813afd811a56ebd3d3024730ac8f44f5da8,2020-02-23 16:35:00 UTC,auto,hold,707,830,690,NM,Albuquerque,10,True,False,False,Gas
3,0f141439935d7193c2eeb31603d02a6cad179500,2020-02-14 16:25:00 UTC,heat,hold,680,815,680,NM,Albuquerque,30,False,False,False,Gas
4,54b5cb33754b7116caa7e6f8f8aa11d5c25a3162,2020-02-28 11:10:00 UTC,auto,hold,750,815,755,NM,1,20,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
184377,b2ce2650f520958eb8ced6e24f23c8ea752cc6ba,2020-02-16 16:40:00 UTC,heat,auto,760,710,760,NM,Carlsbad,30,False,False,False,Gas
184378,b2ce2650f520958eb8ced6e24f23c8ea752cc6ba,2020-02-16 14:50:00 UTC,heat,auto,748,710,760,NM,Carlsbad,30,False,False,False,Gas
184379,b2ce2650f520958eb8ced6e24f23c8ea752cc6ba,2020-02-16 17:00:00 UTC,heat,auto,752,710,760,NM,Carlsbad,30,False,False,False,Gas
184380,b2ce2650f520958eb8ced6e24f23c8ea752cc6ba,2020-02-16 18:00:00 UTC,heat,auto,753,710,760,NM,Carlsbad,30,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/NM/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/NM-day/2021-feb-day-NM.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fd5688a621c17d37669b21ac97e057b2364b8ed2,2021-02-20 18:35:00 UTC,heat,hold,706,709,709,NM,Albuquerque,0,False,False,False,Gas
1,6696b70d15ff680c948557249ca0c5c47c66a53c,2021-02-15 19:05:00 UTC,heat,hold,724,725,725,NM,Clovis,30,False,False,False,Gas
2,b82e56d502d388ca052daa5a15b4518e48878af5,2021-02-20 19:30:00 UTC,heat,hold,695,699,699,NM,Portales,40,False,False,True,Electric
3,fd5688a621c17d37669b21ac97e057b2364b8ed2,2021-02-28 17:20:00 UTC,heat,hold,720,722,722,NM,Albuquerque,0,False,False,False,Gas
5,893b5bfc35cfad44f1bf19fb36bec385ee5dc022,2021-02-15 19:50:00 UTC,auto,hold,716,795,735,NM,Tularosa,15,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
116685,37e507636b62c644594504a3da41a1ca4e82287f,2021-02-25 15:20:00 UTC,heat,hold,763,760,760,NM,Las Cruces,50,False,False,False,Gas
116686,37e507636b62c644594504a3da41a1ca4e82287f,2021-02-25 12:05:00 UTC,heat,hold,754,760,760,NM,Las Cruces,50,False,False,False,Gas
116687,37e507636b62c644594504a3da41a1ca4e82287f,2021-02-25 13:35:00 UTC,heat,hold,756,760,760,NM,Las Cruces,50,False,False,False,Gas
116688,37e507636b62c644594504a3da41a1ca4e82287f,2021-02-25 11:30:00 UTC,heat,hold,754,760,760,NM,Las Cruces,50,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/NM/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NM/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NM_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NM/feb/" + file)
    NM_feb = pd.concat([NM_feb, df])
    
NM_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,016512bd92404310d5ad9f1aaf7c4c38f6cd18a7,feb,2017,auto,hold,Clovis,561.400000,800.000000,633.000000,20.0,False,False,False
1,016512bd92404310d5ad9f1aaf7c4c38f6cd18a7,feb,2017,heat,auto,Clovis,600.519737,700.000000,637.105263,20.0,False,False,False
2,016512bd92404310d5ad9f1aaf7c4c38f6cd18a7,feb,2017,heat,hold,Clovis,586.428571,700.000000,640.000000,20.0,False,False,False
3,034f98fe8f4117d011824b1b5c1cfe823fbc29fb,feb,2017,auxHeatOnly,hold,CLOVIS,732.432432,730.000000,730.000000,15.0,False,False,True
4,09128eee40d1b9752e3aec43ebf9db92c8f7a61d,feb,2017,heat,auto,Las Cruces,708.772727,727.348485,711.969697,25.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
174,fbdba8c27abf08db10ea6a2a1c1ba3544a5fcd37,feb,2021,heat,hold,Albuquerque,685.875000,690.141026,689.891026,10.0,True,False,False
175,fbf4eec386d0bbb5c8254b475505ae844b0fc576,feb,2021,heat,hold,Albuquerque,709.311183,711.312804,711.312804,29.0,True,False,False
176,fcef293019a9fff02bdef3c6d2c08c6233b63de7,feb,2021,heat,hold,Corrales,661.292000,668.144000,667.544000,40.0,False,False,False
177,fd5688a621c17d37669b21ac97e057b2364b8ed2,feb,2021,heat,hold,Albuquerque,716.710963,716.747508,716.747508,0.0,False,False,False


In [67]:
NM_feb.to_csv("Scraper_Output/State_Month_Day/NM/NM_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/NM-day/2017-jun-day-NM.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
17,fe54b5a39fa876d401d5b44c800c0321a5445797,2017-06-27 17:05:00 UTC,cool,hold,739,745,745,NM,Albuquerque,15,False,False,False,Gas
29,51c93dd45f847dcd2d8b853ea685fc6435c5d45e,2017-06-10 17:35:00 UTC,cool,hold,763,810,810,NM,Las Cruces,15,False,False,False,Gas
40,51c93dd45f847dcd2d8b853ea685fc6435c5d45e,2017-06-24 19:35:00 UTC,cool,hold,756,810,810,NM,Las Cruces,15,False,False,False,Gas
45,51c93dd45f847dcd2d8b853ea685fc6435c5d45e,2017-06-10 18:45:00 UTC,cool,hold,753,777,777,NM,Las Cruces,15,False,False,False,Gas
51,fe54b5a39fa876d401d5b44c800c0321a5445797,2017-06-27 15:55:00 UTC,cool,hold,749,745,745,NM,Albuquerque,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
87861,e97f4fd13b53880ccaec0f49b27ff8c75f49d7d5,2017-06-23 16:10:00 UTC,cool,hold,755,760,760,NM,Albuquerque,0,False,False,False,Gas
87862,e97f4fd13b53880ccaec0f49b27ff8c75f49d7d5,2017-06-24 11:55:00 UTC,cool,hold,761,760,760,NM,Albuquerque,0,False,False,False,Gas
87863,e97f4fd13b53880ccaec0f49b27ff8c75f49d7d5,2017-06-23 11:45:00 UTC,cool,hold,763,760,760,NM,Albuquerque,0,False,False,False,Gas
87864,e97f4fd13b53880ccaec0f49b27ff8c75f49d7d5,2017-06-24 11:45:00 UTC,cool,hold,760,760,760,NM,Albuquerque,0,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/NM/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/NM-day/2018-jun-day-NM.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,18393b92831b0a51ebcac5bd4ba6b6433199a92e,2018-06-10 17:05:00 UTC,auto,hold,758,759,669,NM,Artesia,0,False,False,False,Gas
1,cbc15f57d6d098f0607b3cbc67dd6e6043ca9c3e,2018-06-14 19:30:00 UTC,cool,hold,740,741,741,NM,Albuquerque,10,False,False,False,Gas
2,fd5688a621c17d37669b21ac97e057b2364b8ed2,2018-06-02 17:20:00 UTC,cool,hold,737,734,734,NM,Albuquerque,0,False,False,False,Gas
4,e33fbc4411338f419c91ea34fd023069034393b9,2018-06-24 15:45:00 UTC,cool,auto,722,715,665,NM,Las Cruces,40,False,False,False,Gas
5,9d74c92277c5cee76f6ef84538d23fcac07ab307,2018-06-01 11:45:00 UTC,auto,hold,720,722,672,NM,Rio Rancho,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
153292,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2018-06-06 15:15:00 UTC,cool,hold,772,770,760,NM,Carlsbad,5,False,False,True,Electric
153293,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2018-06-29 18:40:00 UTC,cool,auto,758,760,760,NM,Carlsbad,5,False,False,True,Electric
153294,fbf4eec386d0bbb5c8254b475505ae844b0fc576,2018-06-30 14:55:00 UTC,cool,hold,759,760,760,NM,Albuquerque,29,True,False,False,Gas
153295,fbf4eec386d0bbb5c8254b475505ae844b0fc576,2018-06-30 14:30:00 UTC,cool,hold,757,760,760,NM,Albuquerque,29,True,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/NM/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/NM-day/2019-jun-day-NM.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cbc15f57d6d098f0607b3cbc67dd6e6043ca9c3e,2019-06-25 16:00:00 UTC,cool,hold,714,725,725,NM,Albuquerque,10,False,False,False,Gas
1,e7498d85e36b6dd72f0f35153304e10e0cb84f77,2019-06-23 12:40:00 UTC,auto,hold,699,695,645,NM,Alamogordo,40,False,False,False,Gas
2,54ce6433b149d0b03bd7e5b22f6ac3b005d7f9dc,2019-06-20 19:10:00 UTC,cool,hold,753,751,751,NM,Albuquerque,47,False,False,False,Gas
3,15d162b914f457f91f466e002bd0118511e7dd7e,2019-06-26 13:25:00 UTC,cool,auto,698,700,735,NM,Albuquerque,0,True,False,False,Gas
4,1d3dba41a74ba5c9c7b9c0c4e4e61ba272f2b3d6,2019-06-19 19:25:00 UTC,auto,hold,737,715,665,NM,Santa Fe,59,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
210867,0ec51b6ac76933e8ca30d8963d6d71d43a266f61,2019-06-04 15:00:00 UTC,cool,hold,705,720,720,NM,Albuquerque,35,False,False,False,Gas
210868,0ec51b6ac76933e8ca30d8963d6d71d43a266f61,2019-06-06 11:00:00 UTC,cool,hold,700,720,720,NM,Albuquerque,35,False,False,False,Gas
210869,0ec51b6ac76933e8ca30d8963d6d71d43a266f61,2019-06-05 17:50:00 UTC,cool,hold,700,720,720,NM,Albuquerque,35,False,False,False,Gas
210870,0ec51b6ac76933e8ca30d8963d6d71d43a266f61,2019-06-06 17:15:00 UTC,cool,hold,702,720,720,NM,Albuquerque,35,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/NM/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/NM-day/2020-jun-day-NM.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cbc15f57d6d098f0607b3cbc67dd6e6043ca9c3e,2020-06-19 13:20:00 UTC,cool,auto,690,720,696,NM,Albuquerque,10,False,False,False,Gas
2,6af92558b51924ff942095568a4da824e1dc43a4,2020-06-14 16:40:00 UTC,cool,hold,705,755,755,NM,Clovis,30,False,False,False,Gas
3,cbc15f57d6d098f0607b3cbc67dd6e6043ca9c3e,2020-06-11 19:40:00 UTC,cool,auto,728,720,696,NM,Albuquerque,10,False,False,False,Gas
4,cbc15f57d6d098f0607b3cbc67dd6e6043ca9c3e,2020-06-25 12:50:00 UTC,cool,auto,704,720,696,NM,Albuquerque,10,False,False,False,Gas
5,cbc15f57d6d098f0607b3cbc67dd6e6043ca9c3e,2020-06-24 18:45:00 UTC,cool,auto,727,720,696,NM,Albuquerque,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
222157,d736093ed96ba0f43324559238e922065f5ec121,2020-06-20 19:50:00 UTC,cool,hold,765,760,760,NM,las cruces,0,False,False,False,Gas
222158,d736093ed96ba0f43324559238e922065f5ec121,2020-06-10 16:10:00 UTC,cool,hold,735,760,760,NM,las cruces,0,False,False,False,Gas
222159,d736093ed96ba0f43324559238e922065f5ec121,2020-06-20 16:20:00 UTC,cool,hold,752,760,760,NM,las cruces,0,False,False,False,Gas
222160,d736093ed96ba0f43324559238e922065f5ec121,2020-06-29 13:05:00 UTC,cool,hold,756,760,760,NM,las cruces,0,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/NM/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/NM-day/2021-jun-day-NM.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,d35ec4e561fa03fdc86b5ab885a74ad044d3f4a5,2021-06-25 16:25:00 UTC,cool,hold,696,697,697,NM,Hobbs,15,False,False,False,Gas
2,2818adf5822b1f2a3276bf379a90a57f32777861,2021-06-14 19:15:00 UTC,auto,hold,739,729,709,NM,Alamogordo,25,False,False,False,Gas
4,1e20a0c7db767f86be4c91e31c81c9751d2994a0,2021-06-03 19:10:00 UTC,cool,hold,683,678,678,NM,Albuquerque,5,False,False,False,Gas
5,1e20a0c7db767f86be4c91e31c81c9751d2994a0,2021-06-17 17:15:00 UTC,auto,hold,683,683,633,NM,Albuquerque,5,False,False,False,Gas
6,1e20a0c7db767f86be4c91e31c81c9751d2994a0,2021-06-15 16:05:00 UTC,auto,hold,683,683,633,NM,Albuquerque,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131596,c39ff740f7a46be6bad2d5a4744b79fcdf0e09c0,2021-06-22 18:45:00 UTC,cool,hold,762,760,760,NM,Roswell,0,False,False,True,Electric
131597,c39ff740f7a46be6bad2d5a4744b79fcdf0e09c0,2021-06-22 17:20:00 UTC,cool,hold,763,760,760,NM,Roswell,0,False,False,True,Electric
131598,c39ff740f7a46be6bad2d5a4744b79fcdf0e09c0,2021-06-29 19:10:00 UTC,cool,hold,752,760,760,NM,Roswell,0,False,False,True,Electric
131599,c39ff740f7a46be6bad2d5a4744b79fcdf0e09c0,2021-06-26 14:50:00 UTC,cool,hold,756,760,760,NM,Roswell,0,False,False,True,Electric


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/NM/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NM/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NM_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NM/jun/" + file)
    NM_jun = pd.concat([NM_jun, df])
    
NM_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,034f98fe8f4117d011824b1b5c1cfe823fbc29fb,jun,2017,auto,auto,CLOVIS,759.803279,769.868852,660.524590,15.0,False,False,True
1,034f98fe8f4117d011824b1b5c1cfe823fbc29fb,jun,2017,auto,hold,CLOVIS,752.472303,747.381924,695.160350,15.0,False,False,True
2,09128eee40d1b9752e3aec43ebf9db92c8f7a61d,jun,2017,cool,auto,Las Cruces,735.100000,753.750000,692.500000,25.0,False,False,False
3,09128eee40d1b9752e3aec43ebf9db92c8f7a61d,jun,2017,cool,hold,Las Cruces,741.968354,747.050633,704.424051,25.0,False,False,False
4,0c683e9af00af3beb63a8a80e406315b523683d0,jun,2017,cool,auto,Ruidoso,731.256579,738.605263,735.289474,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
168,fa4c846354d5d50b89f2bbb32307575fa44e32ef,jun,2021,auto,hold,Albuquerque,754.333333,740.000000,690.000000,19.0,False,False,False
169,fa4c846354d5d50b89f2bbb32307575fa44e32ef,jun,2021,cool,hold,Albuquerque,762.222222,750.444444,748.888889,19.0,False,False,False
170,fbdba8c27abf08db10ea6a2a1c1ba3544a5fcd37,jun,2021,cool,hold,Albuquerque,707.022779,709.102506,709.064541,10.0,True,False,False
171,fcd9e3e40b18915de13a4d63fb6e5ffc4e81fdd1,jun,2021,auto,hold,Albuquerque,766.122549,782.500000,620.000000,17.0,False,False,False


In [100]:
NM_jun.to_csv("Scraper_Output/State_Month_Day/NM/NM_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/NM-day/2017-jul-day-NM.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,a70abd5d944363c751b42e9a15a417b8de3a90b9,2017-07-06 16:45:00 UTC,auto,auto,788,785,735,NM,Albuquerque,15,False,False,False,Gas
4,fd5688a621c17d37669b21ac97e057b2364b8ed2,2017-07-29 19:55:00 UTC,cool,hold,746,741,741,NM,Albuquerque,0,False,False,False,Gas
5,fe54b5a39fa876d401d5b44c800c0321a5445797,2017-07-04 17:25:00 UTC,auto,hold,716,715,665,NM,Albuquerque,15,False,False,False,Gas
7,7b1b223d7ae944db32d5800acaa3bf19382ceaea,2017-07-31 18:20:00 UTC,cool,hold,755,800,800,NM,Silver City,40,False,False,False,Gas
8,7b1b223d7ae944db32d5800acaa3bf19382ceaea,2017-07-29 16:30:00 UTC,cool,hold,750,810,800,NM,Silver City,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
114741,fbf4eec386d0bbb5c8254b475505ae844b0fc576,2017-07-18 13:30:00 UTC,cool,hold,755,760,760,NM,Albuquerque,29,True,False,False,Gas
114742,fbf4eec386d0bbb5c8254b475505ae844b0fc576,2017-07-19 15:45:00 UTC,cool,hold,758,760,760,NM,Albuquerque,29,True,False,False,Gas
114743,fbf4eec386d0bbb5c8254b475505ae844b0fc576,2017-07-18 13:05:00 UTC,cool,hold,752,760,760,NM,Albuquerque,29,True,False,False,Gas
114744,fbf4eec386d0bbb5c8254b475505ae844b0fc576,2017-07-25 14:15:00 UTC,cool,hold,762,760,760,NM,Albuquerque,29,True,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/NM/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/NM-day/2018-jul-day-NM.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,18393b92831b0a51ebcac5bd4ba6b6433199a92e,2018-07-06 19:15:00 UTC,cool,hold,715,717,717,NM,Artesia,0,False,False,False,Gas
1,0a624d227aed8533e1ea0de2c5b4f624591d72de,2018-07-02 17:30:00 UTC,auto,hold,716,685,635,NM,Santa Fe,20,False,False,False,Gas
2,cbc15f57d6d098f0607b3cbc67dd6e6043ca9c3e,2018-07-08 13:45:00 UTC,cool,hold,737,742,742,NM,Albuquerque,10,False,False,False,Gas
3,0a624d227aed8533e1ea0de2c5b4f624591d72de,2018-07-21 17:25:00 UTC,auto,hold,713,690,635,NM,Santa Fe,20,False,False,False,Gas
4,0a624d227aed8533e1ea0de2c5b4f624591d72de,2018-07-17 17:55:00 UTC,auto,hold,722,730,635,NM,Santa Fe,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165119,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2018-07-02 19:30:00 UTC,cool,auto,752,760,760,NM,Carlsbad,5,False,False,True,Electric
165120,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2018-07-17 19:35:00 UTC,cool,auto,760,760,760,NM,Carlsbad,5,False,False,True,Electric
165121,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2018-07-29 13:15:00 UTC,cool,hold,791,800,760,NM,Carlsbad,5,False,False,True,Electric
165122,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2018-07-11 17:15:00 UTC,cool,hold,767,770,760,NM,Carlsbad,5,False,False,True,Electric


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/NM/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/NM-day/2019-jul-day-NM.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,fd5688a621c17d37669b21ac97e057b2364b8ed2,2019-07-20 14:30:00 UTC,cool,hold,744,738,738,NM,Albuquerque,0,False,False,False,Gas
1,7a3484fbb7463274302a39b4abec0a91053c75c2,2019-07-30 19:20:00 UTC,auto,hold,731,725,635,NM,Albuquerque,30,False,False,False,Gas
2,15d162b914f457f91f466e002bd0118511e7dd7e,2019-07-08 13:20:00 UTC,cool,auto,741,800,755,NM,Albuquerque,0,True,False,False,Gas
3,0d1846228bf4d3de1ea29bd4c4ae51606878f776,2019-07-27 16:55:00 UTC,auto,hold,723,723,673,NM,CLOVIS,15,False,False,True,Electric
4,0f141439935d7193c2eeb31603d02a6cad179500,2019-07-18 14:20:00 UTC,auto,auto,770,770,678,NM,Albuquerque,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
234251,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2019-07-31 19:05:00 UTC,cool,hold,767,770,760,NM,Carlsbad,5,False,False,True,Electric
234252,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2019-07-20 19:25:00 UTC,cool,hold,742,780,760,NM,Carlsbad,5,False,False,True,Electric
234253,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2019-07-26 19:35:00 UTC,cool,hold,755,770,760,NM,Carlsbad,5,False,False,True,Electric
234254,e97f4fd13b53880ccaec0f49b27ff8c75f49d7d5,2019-07-27 13:20:00 UTC,cool,hold,753,760,760,NM,Albuquerque,0,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/NM/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/NM-day/2020-jul-day-NM.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,482f74a48aae0d657fcd576f99ac50113cba86a4,2020-07-26 10:05:00 UTC,auto,auto,709,710,667,NM,Roswell,45,False,False,False,Gas
1,a1f365fa4405aeecd828fb559bd6cae35193afa2,2020-07-01 17:20:00 UTC,heat,auto,723,780,644,NM,Gallup,70,False,False,False,Gas
3,e7498d85e36b6dd72f0f35153304e10e0cb84f77,2020-07-09 17:00:00 UTC,auto,hold,727,707,627,NM,Alamogordo,40,False,False,False,Gas
4,92adc475baa3e0dbe5a0bdd8a6f40f3faa291e4b,2020-07-06 17:15:00 UTC,auto,hold,707,685,645,NM,Albuquerque,20,True,False,False,Gas
6,0c1cc54cbd5e7030b0946152616aabe3cb31ff5e,2020-07-08 17:40:00 UTC,auto,hold,762,750,649,NM,Alamogordo,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239633,d5eece6549d594c92f8e505116d3ec8d8aae7c68,2020-07-30 17:10:00 UTC,cool,hold,720,720,720,NM,Rio Rancho,7,False,False,False,Gas
239634,d5eece6549d594c92f8e505116d3ec8d8aae7c68,2020-07-28 14:25:00 UTC,cool,hold,725,720,720,NM,Rio Rancho,7,False,False,False,Gas
239635,d5eece6549d594c92f8e505116d3ec8d8aae7c68,2020-07-31 16:00:00 UTC,cool,hold,728,720,720,NM,Rio Rancho,7,False,False,False,Gas
239636,d5eece6549d594c92f8e505116d3ec8d8aae7c68,2020-07-29 15:00:00 UTC,cool,hold,708,720,720,NM,Rio Rancho,7,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/NM/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/NM-day/2021-jul-day-NM.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1e20a0c7db767f86be4c91e31c81c9751d2994a0,2021-07-26 11:45:00 UTC,auto,hold,682,683,633,NM,Albuquerque,5,False,False,False,Gas
1,c23ea2557d8988af45696eeb2cc768022b966597,2021-07-14 15:45:00 UTC,cool,hold,742,737,737,NM,Albuquerque,30,True,False,False,Gas
2,cbc15f57d6d098f0607b3cbc67dd6e6043ca9c3e,2021-07-13 13:15:00 UTC,cool,hold,723,720,696,NM,Albuquerque,10,False,False,False,Gas
3,d07f1d6e9b382df0127bbb5284e61ce476b9e2d3,2021-07-10 17:00:00 UTC,cool,hold,743,745,745,NM,Deming,50,False,False,False,Gas
4,cbc15f57d6d098f0607b3cbc67dd6e6043ca9c3e,2021-07-12 18:20:00 UTC,cool,hold,722,720,696,NM,Albuquerque,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124221,d736093ed96ba0f43324559238e922065f5ec121,2021-07-19 14:00:00 UTC,cool,hold,747,760,760,NM,las cruces,0,False,False,False,Gas
124222,d736093ed96ba0f43324559238e922065f5ec121,2021-07-19 14:10:00 UTC,cool,hold,747,760,760,NM,las cruces,0,False,False,False,Gas
124223,d736093ed96ba0f43324559238e922065f5ec121,2021-07-19 13:40:00 UTC,cool,hold,765,760,760,NM,las cruces,0,False,False,False,Gas
124224,d736093ed96ba0f43324559238e922065f5ec121,2021-07-19 13:45:00 UTC,cool,hold,761,760,760,NM,las cruces,0,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/NM/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NM/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NM_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NM/jul/" + file)
    NM_jul = pd.concat([NM_jul, df])
    
NM_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,034f98fe8f4117d011824b1b5c1cfe823fbc29fb,jul,2017,auto,auto,CLOVIS,767.276836,767.689266,700.000000,15.0,False,False,True
1,034f98fe8f4117d011824b1b5c1cfe823fbc29fb,jul,2017,auto,hold,CLOVIS,753.503817,749.206107,695.419847,15.0,False,False,True
2,09128eee40d1b9752e3aec43ebf9db92c8f7a61d,jul,2017,cool,auto,Las Cruces,733.342857,734.257143,700.000000,25.0,False,False,False
3,09128eee40d1b9752e3aec43ebf9db92c8f7a61d,jul,2017,cool,hold,Las Cruces,733.775000,750.125000,699.750000,25.0,False,False,False
4,0c683e9af00af3beb63a8a80e406315b523683d0,jul,2017,cool,auto,Ruidoso,735.434783,736.434783,750.717391,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
159,f99c76c2e2e6d485a427ec1b3fd29b4ed907238c,jul,2021,cool,hold,Albuquerque,739.306122,733.000000,733.000000,50.0,False,False,False
160,fa4c846354d5d50b89f2bbb32307575fa44e32ef,jul,2021,auto,hold,Albuquerque,759.219512,695.560976,645.365854,19.0,False,False,False
161,fbdba8c27abf08db10ea6a2a1c1ba3544a5fcd37,jul,2021,cool,hold,Albuquerque,718.962687,720.671642,720.671642,10.0,True,False,False
162,fcd9e3e40b18915de13a4d63fb6e5ffc4e81fdd1,jul,2021,auto,hold,Albuquerque,767.737374,770.106061,620.000000,17.0,False,False,False


In [133]:
NM_jul.to_csv("Scraper_Output/State_Month_Day/NM/NM_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/NM-day/2017-aug-day-NM.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,fd5688a621c17d37669b21ac97e057b2364b8ed2,2017-08-05 17:20:00 UTC,cool,hold,745,741,741,NM,Albuquerque,0,False,False,False,Gas
4,7db13b190ac0b42d853e92d177944d09167c0f79,2017-08-16 14:40:00 UTC,auto,auto,742,742,672,NM,Albuquerque,20,True,False,False,Gas
5,fd5688a621c17d37669b21ac97e057b2364b8ed2,2017-08-12 14:25:00 UTC,cool,hold,743,741,741,NM,Albuquerque,0,False,False,False,Gas
6,fe54b5a39fa876d401d5b44c800c0321a5445797,2017-08-10 17:45:00 UTC,cool,hold,733,725,725,NM,Albuquerque,15,False,False,False,Gas
8,fe54b5a39fa876d401d5b44c800c0321a5445797,2017-08-28 14:00:00 UTC,auto,auto,766,785,735,NM,Albuquerque,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112431,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2017-08-27 16:25:00 UTC,cool,hold,745,760,760,NM,Carlsbad,5,False,False,True,Electric
112432,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2017-08-27 15:00:00 UTC,cool,hold,733,760,760,NM,Carlsbad,5,False,False,True,Electric
112433,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2017-08-27 15:45:00 UTC,cool,hold,740,760,760,NM,Carlsbad,5,False,False,True,Electric
112434,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2017-08-18 17:10:00 UTC,cool,hold,761,760,760,NM,Carlsbad,5,False,False,True,Electric


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/NM/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/NM-day/2018-aug-day-NM.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b84b557cc11958326b116aae202e69a31d8d7f62,2018-08-14 14:05:00 UTC,auto,hold,774,820,660,NM,Elephant Butte,5,False,False,False,Gas
1,b84b557cc11958326b116aae202e69a31d8d7f62,2018-08-17 13:05:00 UTC,auto,hold,778,820,650,NM,Elephant Butte,5,False,False,False,Gas
2,b84b557cc11958326b116aae202e69a31d8d7f62,2018-08-21 13:20:00 UTC,auto,hold,786,820,640,NM,Elephant Butte,5,False,False,False,Gas
3,b84b557cc11958326b116aae202e69a31d8d7f62,2018-08-12 17:35:00 UTC,auto,hold,754,820,660,NM,Elephant Butte,5,False,False,False,Gas
4,b84b557cc11958326b116aae202e69a31d8d7f62,2018-08-12 16:45:00 UTC,auto,hold,742,820,660,NM,Elephant Butte,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175816,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2018-08-10 15:25:00 UTC,cool,auto,724,760,760,NM,Carlsbad,5,False,False,True,Electric
175817,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2018-08-23 15:15:00 UTC,cool,auto,734,760,760,NM,Carlsbad,5,False,False,True,Electric
175818,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2018-08-10 16:05:00 UTC,cool,auto,726,760,760,NM,Carlsbad,5,False,False,True,Electric
175819,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2018-08-16 13:40:00 UTC,cool,auto,722,760,760,NM,Carlsbad,5,False,False,True,Electric


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/NM/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/NM-day/2019-aug-day-NM.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,be4102eaa05968900db9bc045bdd6e710c63d875,2019-08-29 13:35:00 UTC,cool,hold,730,733,733,NM,Albuquerque,50,False,False,False,Gas
1,07205074e862181d57fb995714da5559142dedcf,2019-08-23 18:40:00 UTC,auto,hold,721,722,672,NM,Albuquerque,39,False,False,False,Gas
2,0a966d2478b1d4f1a6eb0859587beb3723abf220,2019-08-25 19:50:00 UTC,auto,hold,708,701,651,NM,Rio Rancho,5,False,False,False,Gas
3,0a966d2478b1d4f1a6eb0859587beb3723abf220,2019-08-30 19:25:00 UTC,auto,hold,726,721,651,NM,Rio Rancho,5,False,False,False,Gas
4,871d6106e45f880bc8a8fc3aadbddc0e2c94824c,2019-08-25 18:10:00 UTC,auto,hold,734,732,672,NM,Rio Rancho,19,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
232957,28aea5ca4686334e0b1cb3ad68a171310bbf2a32,2019-08-31 14:45:00 UTC,cool,auto,699,710,710,NM,Rio Rancho,5,False,False,False,Gas
232958,28aea5ca4686334e0b1cb3ad68a171310bbf2a32,2019-08-30 14:35:00 UTC,cool,hold,700,710,710,NM,Rio Rancho,5,False,False,False,Gas
232959,28aea5ca4686334e0b1cb3ad68a171310bbf2a32,2019-08-30 14:15:00 UTC,cool,hold,695,710,710,NM,Rio Rancho,5,False,False,False,Gas
232960,28aea5ca4686334e0b1cb3ad68a171310bbf2a32,2019-08-30 16:20:00 UTC,cool,hold,715,710,710,NM,Rio Rancho,5,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/NM/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/NM-day/2020-aug-day-NM.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9736bb54108128592cd425f28e51db2f67e7f9f5,2020-08-12 17:25:00 UTC,cool,auto,702,700,700,NM,Clovis,0,False,False,False,Gas
1,9736bb54108128592cd425f28e51db2f67e7f9f5,2020-08-10 14:40:00 UTC,cool,auto,695,700,700,NM,Clovis,0,False,False,False,Gas
2,9736bb54108128592cd425f28e51db2f67e7f9f5,2020-08-13 14:20:00 UTC,cool,auto,706,700,700,NM,Clovis,0,False,False,False,Gas
3,9736bb54108128592cd425f28e51db2f67e7f9f5,2020-08-11 14:25:00 UTC,cool,auto,700,700,700,NM,Clovis,0,False,False,False,Gas
4,9736bb54108128592cd425f28e51db2f67e7f9f5,2020-08-12 17:00:00 UTC,cool,auto,703,700,700,NM,Clovis,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237421,b2ce2650f520958eb8ced6e24f23c8ea752cc6ba,2020-08-31 15:55:00 UTC,cool,hold,684,690,690,NM,Carlsbad,30,False,False,False,Gas
237422,b2ce2650f520958eb8ced6e24f23c8ea752cc6ba,2020-08-24 17:15:00 UTC,cool,hold,705,690,690,NM,Carlsbad,30,False,False,False,Gas
237423,b2ce2650f520958eb8ced6e24f23c8ea752cc6ba,2020-08-19 15:10:00 UTC,cool,hold,691,690,690,NM,Carlsbad,30,False,False,False,Gas
237424,b2ce2650f520958eb8ced6e24f23c8ea752cc6ba,2020-08-28 12:00:00 UTC,cool,hold,699,690,690,NM,Carlsbad,30,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/NM/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NM/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NM_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NM/aug/" + file)
    NM_aug = pd.concat([NM_aug, df])
    
NM_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,034f98fe8f4117d011824b1b5c1cfe823fbc29fb,aug,2017,auto,auto,CLOVIS,768.785714,780.000000,700.000000,15.0,False,False,True
1,034f98fe8f4117d011824b1b5c1cfe823fbc29fb,aug,2017,auto,hold,CLOVIS,771.153846,760.000000,680.000000,15.0,False,False,True
2,09128eee40d1b9752e3aec43ebf9db92c8f7a61d,aug,2017,cool,auto,Las Cruces,726.280000,723.560000,697.200000,25.0,False,False,False
3,09128eee40d1b9752e3aec43ebf9db92c8f7a61d,aug,2017,cool,hold,Las Cruces,733.260274,742.109589,711.821918,25.0,False,False,False
4,0c683e9af00af3beb63a8a80e406315b523683d0,aug,2017,cool,hold,Ruidoso,711.875103,788.207610,788.207610,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
285,fbf4eec386d0bbb5c8254b475505ae844b0fc576,aug,2020,cool,auto,Albuquerque,723.518610,718.248139,718.208437,29.0,True,False,False
286,fbf4eec386d0bbb5c8254b475505ae844b0fc576,aug,2020,cool,hold,Albuquerque,730.634945,724.809672,724.809672,29.0,True,False,False
287,fcd9e3e40b18915de13a4d63fb6e5ffc4e81fdd1,aug,2020,auto,hold,Albuquerque,747.212121,743.840909,650.000000,17.0,False,False,False
288,fd5688a621c17d37669b21ac97e057b2364b8ed2,aug,2020,cool,hold,Albuquerque,738.366337,739.405941,739.405941,0.0,False,False,False


In [160]:
NM_aug.to_csv("Scraper_Output/State_Month_Day/NM/NM_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/NM-day/2017-dec-day-NM.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ff533c5efd35daf49ced3a64c6fbc379fd7ac5e6,2017-12-21 16:10:00 UTC,heat,hold,691,698,698,NM,Albuquerque,15,False,False,False,Gas
1,a56d9a9ca4e57468d79fd6db450d9a3ab47a89e7,2017-12-20 13:30:00 UTC,auto,hold,741,795,745,NM,Las Cruces,45,False,False,False,Gas
2,1cab1273eb5f050b9f7240c2e8ebb6bb5a148839,2017-12-11 14:45:00 UTC,heat,hold,721,725,725,NM,Rio Rancho,20,False,False,False,Gas
3,bb11f43e65f7316674ae308b87a38cf37446e96a,2017-12-12 13:35:00 UTC,auto,hold,719,775,725,NM,Santa Fe,25,False,False,False,Gas
5,fd5688a621c17d37669b21ac97e057b2364b8ed2,2017-12-31 14:55:00 UTC,heat,hold,714,719,719,NM,Albuquerque,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
117576,aeefec474242b71d93784f5678af310d2f9e9a0d,2017-12-12 15:05:00 UTC,heat,hold,756,760,760,NM,Roswell,65,False,False,False,Gas
117577,aeefec474242b71d93784f5678af310d2f9e9a0d,2017-12-31 17:40:00 UTC,heat,hold,763,760,760,NM,Roswell,65,False,False,False,Gas
117578,aeefec474242b71d93784f5678af310d2f9e9a0d,2017-12-12 12:20:00 UTC,heat,hold,759,760,760,NM,Roswell,65,False,False,False,Gas
117579,bbdc89a7cd6ab9abc711b538fdac33e9544aabd3,2017-12-23 17:35:00 UTC,auxHeatOnly,auto,733,760,760,NM,Clovis,75,False,False,True,Electric


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/NM/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/NM-day/2018-dec-day-NM.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,c23ea2557d8988af45696eeb2cc768022b966597,2018-12-28 17:00:00 UTC,heat,auto,721,701,701,NM,Albuquerque,30,True,False,False,Gas
2,09d2e59869d3c2ae3cdfc450f6e63d9f2f5685cc,2018-12-06 15:20:00 UTC,auto,hold,690,757,707,NM,Roswell,38,False,False,True,Electric
4,a56d9a9ca4e57468d79fd6db450d9a3ab47a89e7,2018-12-16 16:55:00 UTC,auto,hold,726,777,727,NM,Las Cruces,45,False,False,False,Gas
5,118d947a8b30956c681cdef1dcb2f25831009997,2018-12-28 13:55:00 UTC,heat,hold,704,704,704,NM,Santa Fe,5,False,False,False,Gas
7,18393b92831b0a51ebcac5bd4ba6b6433199a92e,2018-12-19 18:35:00 UTC,auto,hold,704,731,681,NM,Artesia,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
189080,c39ff740f7a46be6bad2d5a4744b79fcdf0e09c0,2018-12-01 14:35:00 UTC,heat,auto,761,760,760,NM,Roswell,0,False,False,True,Electric
189081,c39ff740f7a46be6bad2d5a4744b79fcdf0e09c0,2018-12-01 17:25:00 UTC,heat,auto,758,760,760,NM,Roswell,0,False,False,True,Electric
189082,c39ff740f7a46be6bad2d5a4744b79fcdf0e09c0,2018-12-01 13:55:00 UTC,heat,auto,754,760,760,NM,Roswell,0,False,False,True,Electric
189083,c39ff740f7a46be6bad2d5a4744b79fcdf0e09c0,2018-12-01 14:50:00 UTC,heat,auto,759,760,760,NM,Roswell,0,False,False,True,Electric


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/NM/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/NM-day/2019-dec-day-NM.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,54b5cb33754b7116caa7e6f8f8aa11d5c25a3162,2019-12-08 09:50:00 UTC,auto,hold,728,775,725,NM,1,20,True,False,False,Gas
3,54b5cb33754b7116caa7e6f8f8aa11d5c25a3162,2019-12-21 13:30:00 UTC,auto,hold,743,795,745,NM,1,20,True,False,False,Gas
6,54b5cb33754b7116caa7e6f8f8aa11d5c25a3162,2019-12-31 10:55:00 UTC,auto,hold,766,815,765,NM,1,20,True,False,False,Gas
7,fb02b674b40edbbc6475d037b5f73b1fe464e34d,2019-12-03 14:30:00 UTC,auto,hold,669,765,665,NM,Albuquerque,39,True,False,False,Gas
11,54b5cb33754b7116caa7e6f8f8aa11d5c25a3162,2019-12-08 13:00:00 UTC,auto,hold,721,775,725,NM,1,20,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
215371,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2019-12-27 16:25:00 UTC,cool,hold,678,770,760,NM,Carlsbad,5,False,False,True,Electric
215372,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2019-12-29 14:00:00 UTC,cool,hold,632,770,760,NM,Carlsbad,5,False,False,True,Electric
215373,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2019-12-29 17:00:00 UTC,cool,hold,650,770,760,NM,Carlsbad,5,False,False,True,Electric
215374,e313fefc38bc4af3a0d2e016d1f073d0c8bfdc6c,2019-12-27 17:10:00 UTC,cool,hold,677,770,760,NM,Carlsbad,5,False,False,True,Electric


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/NM/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/NM-day/2020-dec-day-NM.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,0f141439935d7193c2eeb31603d02a6cad179500,2020-12-03 13:10:00 UTC,heat,hold,675,704,682,NM,Albuquerque,30,False,False,False,Gas
2,bff80cf9a50ac35964e74b92cd472f91a1c1b5ad,2020-12-15 15:20:00 UTC,auto,hold,646,715,645,NM,Albuquerque,60,False,False,False,Gas
3,0f141439935d7193c2eeb31603d02a6cad179500,2020-12-07 07:00:00 UTC,heat,hold,682,704,682,NM,Albuquerque,30,False,False,False,Gas
4,16b061612183d102dc014f933af930d1e71a16fc,2020-12-20 17:55:00 UTC,auto,hold,709,775,707,NM,Albuquerque,0,False,False,False,Gas
5,0f141439935d7193c2eeb31603d02a6cad179500,2020-12-03 16:35:00 UTC,heat,hold,679,704,682,NM,Albuquerque,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183432,f19e74fa7dec76716488d15d83d4571beec0901a,2020-12-02 18:25:00 UTC,auto,hold,698,760,700,NM,Albuquerque,30,False,False,False,Gas
183433,f19e74fa7dec76716488d15d83d4571beec0901a,2020-12-11 16:15:00 UTC,auto,auto,699,760,680,NM,Albuquerque,30,False,False,False,Gas
183434,034f98fe8f4117d011824b1b5c1cfe823fbc29fb,2020-12-16 17:30:00 UTC,auto,hold,730,765,722,NM,CLOVIS,15,False,False,True,Electric
183435,e0b2078e787a493fa3168a31387840c98bf95f80,2020-12-17 19:50:00 UTC,auto,hold,715,765,715,NM,Albuquerque,5,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/NM/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NM/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NM_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NM/dec/" + file)
    NM_dec = pd.concat([NM_dec, df])
    
NM_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,016512bd92404310d5ad9f1aaf7c4c38f6cd18a7,dec,2017,heat,auto,Clovis,631.040426,700.076596,663.451064,20.0,False,False,False
1,016512bd92404310d5ad9f1aaf7c4c38f6cd18a7,dec,2017,heat,hold,Clovis,595.000000,700.000000,652.000000,20.0,False,False,False
2,034f98fe8f4117d011824b1b5c1cfe823fbc29fb,dec,2017,auto,hold,CLOVIS,717.725191,780.000000,714.503817,15.0,False,False,True
3,09128eee40d1b9752e3aec43ebf9db92c8f7a61d,dec,2017,heat,auto,Las Cruces,697.403670,742.660550,700.000000,25.0,False,False,False
4,09128eee40d1b9752e3aec43ebf9db92c8f7a61d,dec,2017,heat,hold,Las Cruces,691.593220,714.305085,684.135593,25.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
293,fcef293019a9fff02bdef3c6d2c08c6233b63de7,dec,2020,heat,auto,Corrales,660.636953,671.025932,671.025932,40.0,False,False,False
294,fcef293019a9fff02bdef3c6d2c08c6233b63de7,dec,2020,heat,hold,Corrales,651.000000,665.653333,658.751111,40.0,False,False,False
295,fd5688a621c17d37669b21ac97e057b2364b8ed2,dec,2020,heat,hold,Albuquerque,723.662028,725.133201,725.133201,0.0,False,False,False
296,ff533c5efd35daf49ced3a64c6fbc379fd7ac5e6,dec,2020,heat,auto,Albuquerque,702.873786,752.368932,711.640777,15.0,False,False,False


In [187]:
NM_dec.to_csv("Scraper_Output/State_Month_Day/NM/NM_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/NM/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NM_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/NM/" + file)
    NM_all = pd.concat([NM_all, df])
    
NM_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,034f98fe8f4117d011824b1b5c1cfe823fbc29fb,aug,2017,auto,auto,CLOVIS,768.785714,780.000000,700.000000,15.0,False,False,True
1,034f98fe8f4117d011824b1b5c1cfe823fbc29fb,aug,2017,auto,hold,CLOVIS,771.153846,760.000000,680.000000,15.0,False,False,True
2,09128eee40d1b9752e3aec43ebf9db92c8f7a61d,aug,2017,cool,auto,Las Cruces,726.280000,723.560000,697.200000,25.0,False,False,False
3,09128eee40d1b9752e3aec43ebf9db92c8f7a61d,aug,2017,cool,hold,Las Cruces,733.260274,742.109589,711.821918,25.0,False,False,False
4,0c683e9af00af3beb63a8a80e406315b523683d0,aug,2017,cool,hold,Ruidoso,711.875103,788.207610,788.207610,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1155,fa4c846354d5d50b89f2bbb32307575fa44e32ef,jun,2021,auto,hold,Albuquerque,754.333333,740.000000,690.000000,19.0,False,False,False
1156,fa4c846354d5d50b89f2bbb32307575fa44e32ef,jun,2021,cool,hold,Albuquerque,762.222222,750.444444,748.888889,19.0,False,False,False
1157,fbdba8c27abf08db10ea6a2a1c1ba3544a5fcd37,jun,2021,cool,hold,Albuquerque,707.022779,709.102506,709.064541,10.0,True,False,False
1158,fcd9e3e40b18915de13a4d63fb6e5ffc4e81fdd1,jun,2021,auto,hold,Albuquerque,766.122549,782.500000,620.000000,17.0,False,False,False


In [190]:
NM_all.to_csv("Scraper_Output/State_Month_Day/NM_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mNMe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['NM']
Unique jan_2018: ['NM']
Unique jan_2019: ['NM']
Unique jan_2020: ['NM']
Unique jan_2021: ['NM']
Unique feb_2017: ['NM']
Unique feb_2018: ['NM']
Unique feb_2019: ['NM']
Unique feb_2020: ['NM']
Unique feb_2021: ['NM']
Unique jun_2017: ['NM']
Unique jun_2018: ['NM']
Unique jun_2019: ['NM']
Unique jun_2020: ['NM']
Unique jun_2021: ['NM']
Unique jul_2017: ['NM']
Unique jul_2018: ['NM']
Unique jul_2019: ['NM']
Unique jul_2020: ['NM']
Unique jul_2021: ['NM']
Unique aug_2017: ['NM']
Unique aug_2018: ['NM']
Unique aug_2019: ['NM']
Unique aug_2020: ['NM']
Unique dec_2017: ['NM']
Unique dec_2018: ['NM']
Unique dec_2019: ['NM']
Unique dec_2020: ['NM']
