# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/NV-day/2017-jan-day-NV.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bbf8ecacbd388e91d3e0b3007480605b2c664e95,2017-01-13 19:15:00 UTC,heat,hold,708,711,711,NV,Las Vegas,45,False,False,False,Gas
1,8a6195a317729667e3999b7c30f96729cf43b4f6,2017-01-02 17:15:00 UTC,auto,hold,717,725,675,NV,Henderson,16,False,False,False,Gas
2,bbf8ecacbd388e91d3e0b3007480605b2c664e95,2017-01-06 07:10:00 UTC,auto,hold,724,779,729,NV,Las Vegas,45,False,False,False,Gas
3,bbf8ecacbd388e91d3e0b3007480605b2c664e95,2017-01-13 17:45:00 UTC,heat,hold,732,711,711,NV,Las Vegas,45,False,False,False,Gas
4,89c51e96f72f8fe381645dd53c1483d48102f846,2017-01-14 18:00:00 UTC,heat,auto,777,756,756,NV,Las Vegas,26,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78718,8773741618b99a66eeb08ae7ce54e71ada5ec8b8,2017-01-30 17:30:00 UTC,auto,auto,698,750,690,NV,Reno,40,False,False,False,Gas
78719,8773741618b99a66eeb08ae7ce54e71ada5ec8b8,2017-01-07 15:15:00 UTC,auto,hold,678,750,680,NV,Reno,40,False,False,False,Gas
78720,8773741618b99a66eeb08ae7ce54e71ada5ec8b8,2017-01-15 14:35:00 UTC,auto,hold,683,750,680,NV,Reno,40,False,False,False,Gas
78721,8773741618b99a66eeb08ae7ce54e71ada5ec8b8,2017-01-02 14:10:00 UTC,auto,auto,657,750,660,NV,Reno,40,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
00344f7902f07e49b84a3135e34648206bad5720,Jan,2017,heat,auto,Henderson,670.739130,730.043478,719.913043,5.0,False,False,False
00344f7902f07e49b84a3135e34648206bad5720,Jan,2017,heat,hold,Henderson,701.152866,728.535032,720.757962,5.0,False,False,False
03554eff92f6007aaa1908c40bb7d336d628465b,Jan,2017,auto,hold,Henderson,676.941667,780.000000,680.000000,0.0,True,False,False
03a2e818806b6c07f5d591f53829291834491613,Jan,2017,heat,auto,Genoa,681.123457,719.580247,682.987654,5.0,False,False,False
03a2e818806b6c07f5d591f53829291834491613,Jan,2017,heat,hold,Genoa,667.958333,668.611111,666.111111,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
f39b641668e1b4f57ad6fabedd81c735f7eef7ee,Jan,2017,heat,auto,Las Vegas,718.031250,740.000000,720.000000,10.0,True,False,False
f39b641668e1b4f57ad6fabedd81c735f7eef7ee,Jan,2017,heat,hold,Las Vegas,719.030769,740.292308,720.815385,10.0,True,False,False
f87046545226224b440e731cc97d0b026a32a0ea,Jan,2017,heat,auto,Winnemucca,727.551724,730.000000,730.000000,5.0,False,False,False
f87046545226224b440e731cc97d0b026a32a0ea,Jan,2017,heat,hold,Winnemucca,702.633880,697.163934,696.950820,5.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/NV/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/NV-day/2018-jan-day-NV.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b773c70ea9b8815c1baff1bf4f0696d6e7e3abd5,2018-01-08 14:25:00 UTC,auto,hold,701,784,714,NV,Las Vegas,45,True,False,False,Gas
1,b1add2277bac6cee0c624250fb67eb22d906fe3c,2018-01-09 13:45:00 UTC,auto,hold,663,725,665,NV,Sparks,28,False,False,False,Gas
2,b773c70ea9b8815c1baff1bf4f0696d6e7e3abd5,2018-01-04 19:00:00 UTC,auto,hold,730,784,724,NV,Las Vegas,45,True,False,False,Gas
3,4b85d5423996d999ea08ad387b21dfae4e4032bf,2018-01-12 15:25:00 UTC,auto,hold,712,785,715,NV,Las Vegas,28,False,False,False,Gas
4,b773c70ea9b8815c1baff1bf4f0696d6e7e3abd5,2018-01-04 13:55:00 UTC,auto,hold,722,784,724,NV,Las Vegas,45,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
324539,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-01-16 19:25:00 UTC,heat,auto,717,730,730,NV,Reno,60,False,False,False,Gas
324540,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-01-29 18:40:00 UTC,heat,hold,720,730,730,NV,Reno,60,False,False,False,Gas
324541,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-01-28 18:15:00 UTC,heat,hold,726,730,730,NV,Reno,60,False,False,False,Gas
324542,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-01-28 17:40:00 UTC,heat,hold,713,730,730,NV,Reno,60,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/NV/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/NV-day/2019-jan-day-NV.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a40bab9e0955baf03e1e29224422c1c20e65d1be,2019-01-04 15:25:00 UTC,auto,hold,707,822,742,NV,Las Vegas,47,False,False,False,Gas
1,a40bab9e0955baf03e1e29224422c1c20e65d1be,2019-01-15 17:15:00 UTC,auto,hold,752,822,752,NV,Las Vegas,47,False,False,False,Gas
2,a40bab9e0955baf03e1e29224422c1c20e65d1be,2019-01-06 16:05:00 UTC,auto,hold,740,822,742,NV,Las Vegas,47,False,False,False,Gas
3,f8fe62629841926fcb3f846e96d6ec2e55a970d3,2019-01-12 07:25:00 UTC,heat,hold,732,732,651,NV,Reno,38,False,False,False,Gas
4,a40bab9e0955baf03e1e29224422c1c20e65d1be,2019-01-05 17:55:00 UTC,auto,hold,728,822,742,NV,Las Vegas,47,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
466607,f32d64a7236d5df766150712efa5fe9589a4bd84,2019-01-14 13:30:00 UTC,auto,hold,700,770,710,NV,Henderson,60,False,False,False,Gas
466608,f32d64a7236d5df766150712efa5fe9589a4bd84,2019-01-13 13:00:00 UTC,auto,hold,706,770,710,NV,Henderson,60,False,False,False,Gas
466609,499ba0af270b84a720ae870a00765bb93d2b5dcc,2019-01-01 19:50:00 UTC,heat,hold,650,720,720,NV,Reno,60,False,False,False,Gas
466610,499ba0af270b84a720ae870a00765bb93d2b5dcc,2019-01-03 17:05:00 UTC,heat,hold,705,720,720,NV,Reno,60,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/NV/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/NV-day/2020-jan-day-NV.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,163ea0e17ae20147beee42bc340a7c6f17f386ed,2020-01-24 18:35:00 UTC,auto,auto,661,759,709,NV,Las Vegas,49,True,False,True,Electric
1,163ea0e17ae20147beee42bc340a7c6f17f386ed,2020-01-16 17:40:00 UTC,heat,auto,663,658,638,NV,Las Vegas,49,True,False,True,Electric
2,163ea0e17ae20147beee42bc340a7c6f17f386ed,2020-01-17 15:30:00 UTC,heat,auto,627,716,716,NV,Las Vegas,49,True,False,True,Electric
3,5d871d8822964542aa8f76ec6a8ac4f44e3f2140,2020-01-17 16:45:00 UTC,heat,hold,652,655,655,NV,Reno,45,False,False,False,Gas
4,163ea0e17ae20147beee42bc340a7c6f17f386ed,2020-01-10 18:20:00 UTC,heat,auto,658,628,617,NV,Las Vegas,49,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
696661,f32d64a7236d5df766150712efa5fe9589a4bd84,2020-01-04 18:30:00 UTC,auto,hold,713,760,710,NV,Henderson,60,False,False,False,Gas
696662,f32d64a7236d5df766150712efa5fe9589a4bd84,2020-01-05 16:55:00 UTC,auto,hold,704,760,710,NV,Henderson,60,False,False,False,Gas
696663,f32d64a7236d5df766150712efa5fe9589a4bd84,2020-01-05 16:30:00 UTC,auto,hold,706,760,710,NV,Henderson,60,False,False,False,Gas
696664,f32d64a7236d5df766150712efa5fe9589a4bd84,2020-01-05 13:15:00 UTC,auto,hold,706,760,710,NV,Henderson,60,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/NV/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/NV-day/2021-jan-day-NV.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7176be6dd71332e126a4c612d2e5f7ca02251900,2021-01-09 07:00:00 UTC,heat,hold,690,716,690,NV,Las Vegas,49,False,False,False,Gas
1,7176be6dd71332e126a4c612d2e5f7ca02251900,2021-01-09 19:20:00 UTC,heat,hold,692,716,690,NV,Las Vegas,49,False,False,False,Gas
2,7176be6dd71332e126a4c612d2e5f7ca02251900,2021-01-22 19:25:00 UTC,heat,hold,703,716,700,NV,Las Vegas,49,False,False,False,Gas
3,bbf8ecacbd388e91d3e0b3007480605b2c664e95,2021-01-30 14:50:00 UTC,heat,hold,682,699,699,NV,Las Vegas,45,False,False,False,Gas
4,7176be6dd71332e126a4c612d2e5f7ca02251900,2021-01-08 18:05:00 UTC,heat,hold,683,716,690,NV,Las Vegas,49,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
427238,d180c8e7ed3743bbd614aef512c18bf0cc4beceb,2021-01-09 19:20:00 UTC,heat,hold,695,700,700,NV,Las Vegas,60,False,False,True,Electric
427239,499ba0af270b84a720ae870a00765bb93d2b5dcc,2021-01-05 18:35:00 UTC,heat,hold,682,710,710,NV,Reno,60,False,False,False,Gas
427240,499ba0af270b84a720ae870a00765bb93d2b5dcc,2021-01-05 18:55:00 UTC,heat,hold,686,710,710,NV,Reno,60,False,False,False,Gas
427241,499ba0af270b84a720ae870a00765bb93d2b5dcc,2021-01-03 19:05:00 UTC,heat,hold,697,720,720,NV,Reno,60,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/NV/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NV/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NV_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NV/jan/" + file)
    NV_jan = pd.concat([NV_jan, df])
    
NV_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00344f7902f07e49b84a3135e34648206bad5720,Jan,2017,heat,auto,Henderson,670.739130,730.043478,719.913043,5.0,False,False,False
1,00344f7902f07e49b84a3135e34648206bad5720,Jan,2017,heat,hold,Henderson,701.152866,728.535032,720.757962,5.0,False,False,False
2,03554eff92f6007aaa1908c40bb7d336d628465b,Jan,2017,auto,hold,Henderson,676.941667,780.000000,680.000000,0.0,True,False,False
3,03a2e818806b6c07f5d591f53829291834491613,Jan,2017,heat,auto,Genoa,681.123457,719.580247,682.987654,5.0,False,False,False
4,03a2e818806b6c07f5d591f53829291834491613,Jan,2017,heat,hold,Genoa,667.958333,668.611111,666.111111,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
552,fc28e413c727d775bc743b62ac150cf8eb0eba97,Jan,2021,heat,hold,Las Vegas,745.371429,715.971429,715.742857,19.0,False,False,False
553,fd2a49148eeca159ee79049f97dd0a5120992458,Jan,2021,heat,hold,Las Vegas,693.392723,696.584693,696.584693,0.0,False,False,False
554,fe40e8eac49b06ecdef95f655eba91cdf702b055,Jan,2021,auto,hold,Las Vegas,709.466821,730.607253,650.266975,25.0,True,False,True
555,fe7deb13e47d7caf497fcf4025ee9174843a355b,Jan,2021,heat,hold,Las Vegas,691.500000,680.000000,680.000000,0.0,False,False,False


In [34]:
NV_jan.to_csv("Scraper_Output/State_Month_Day/NV/NV_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/NV-day/2017-feb-day-NV.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8a6195a317729667e3999b7c30f96729cf43b4f6,2017-02-09 07:20:00 UTC,auto,hold,721,725,675,NV,Henderson,16,False,False,False,Gas
1,8a6195a317729667e3999b7c30f96729cf43b4f6,2017-02-03 16:05:00 UTC,auto,auto,718,735,685,NV,Henderson,16,False,False,False,Gas
2,8a6195a317729667e3999b7c30f96729cf43b4f6,2017-02-03 17:00:00 UTC,auto,auto,717,735,685,NV,Henderson,16,False,False,False,Gas
3,8a6195a317729667e3999b7c30f96729cf43b4f6,2017-02-04 15:25:00 UTC,auto,auto,709,735,685,NV,Henderson,16,False,False,False,Gas
4,ed96e3527d06df1feb125a9e26e80b480e37d6e7,2017-02-27 18:40:00 UTC,cool,hold,659,665,665,NV,Las Vegas,47,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73846,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-02-26 16:45:00 UTC,heat,hold,701,710,710,NV,Reno,60,False,False,False,Gas
73847,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-02-26 15:45:00 UTC,heat,hold,694,710,710,NV,Reno,60,False,False,False,Gas
73848,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-02-27 19:20:00 UTC,heat,hold,700,710,710,NV,Reno,60,False,False,False,Gas
73849,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-02-27 19:15:00 UTC,heat,hold,700,710,710,NV,Reno,60,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/NV/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/NV-day/2018-feb-day-NV.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-02-21 17:15:00 UTC,heat,hold,780,778,778,NV,Las Vegas,67,False,False,False,Gas
1,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-02-17 16:20:00 UTC,heat,auto,763,768,768,NV,Las Vegas,67,False,False,False,Gas
2,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-02-18 07:15:00 UTC,heat,hold,762,768,768,NV,Las Vegas,67,False,False,False,Gas
3,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-02-17 18:25:00 UTC,heat,auto,767,768,768,NV,Las Vegas,67,False,False,False,Gas
4,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-02-18 19:45:00 UTC,heat,hold,766,758,758,NV,Las Vegas,67,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
296573,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-02-24 17:20:00 UTC,heat,hold,704,720,720,NV,Reno,60,False,False,False,Gas
296574,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-02-03 15:50:00 UTC,heat,hold,720,720,720,NV,Reno,60,False,False,False,Gas
296575,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-02-24 19:40:00 UTC,heat,hold,713,720,720,NV,Reno,60,False,False,False,Gas
296576,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-02-23 15:20:00 UTC,heat,hold,706,720,720,NV,Reno,60,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/NV/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/NV-day/2019-feb-day-NV.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a40bab9e0955baf03e1e29224422c1c20e65d1be,2019-02-05 14:00:00 UTC,auto,hold,749,822,752,NV,Las Vegas,47,False,False,False,Gas
1,a40bab9e0955baf03e1e29224422c1c20e65d1be,2019-02-10 17:15:00 UTC,auto,hold,750,822,752,NV,Las Vegas,47,False,False,False,Gas
2,a40bab9e0955baf03e1e29224422c1c20e65d1be,2019-02-21 15:25:00 UTC,auto,hold,722,822,722,NV,Las Vegas,47,False,False,False,Gas
3,a40bab9e0955baf03e1e29224422c1c20e65d1be,2019-02-15 16:30:00 UTC,auto,hold,746,822,752,NV,Las Vegas,47,False,False,False,Gas
4,a40bab9e0955baf03e1e29224422c1c20e65d1be,2019-02-13 17:20:00 UTC,auto,hold,745,822,752,NV,Las Vegas,47,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
309801,499ba0af270b84a720ae870a00765bb93d2b5dcc,2019-02-26 18:20:00 UTC,heat,hold,703,730,730,NV,Reno,60,False,False,False,Gas
309802,499ba0af270b84a720ae870a00765bb93d2b5dcc,2019-02-26 17:35:00 UTC,heat,hold,705,730,730,NV,Reno,60,False,False,False,Gas
309803,499ba0af270b84a720ae870a00765bb93d2b5dcc,2019-02-26 19:05:00 UTC,heat,hold,718,730,730,NV,Reno,60,False,False,False,Gas
309804,499ba0af270b84a720ae870a00765bb93d2b5dcc,2019-02-22 17:30:00 UTC,heat,hold,682,730,730,NV,Reno,60,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/NV/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/NV-day/2020-feb-day-NV.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,89ad9d084d5fe281085869d26772df77607ea39a,2020-02-16 19:45:00 UTC,heat,auto,729,795,740,NV,Las Vegas,45,False,False,True,Electric
1,2e00d4da19a8183818f2620e614294c961401411,2020-02-15 17:40:00 UTC,auto,auto,682,695,645,NV,Henderson,18,False,False,False,Gas
2,2e00d4da19a8183818f2620e614294c961401411,2020-02-15 18:50:00 UTC,auto,auto,699,695,645,NV,Henderson,18,False,False,False,Gas
3,89ad9d084d5fe281085869d26772df77607ea39a,2020-02-22 19:40:00 UTC,heat,auto,742,795,750,NV,Las Vegas,45,False,False,True,Electric
4,163ea0e17ae20147beee42bc340a7c6f17f386ed,2020-02-02 16:20:00 UTC,heat,auto,659,657,635,NV,Las Vegas,49,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
632046,499ba0af270b84a720ae870a00765bb93d2b5dcc,2020-02-11 16:15:00 UTC,heat,hold,702,730,730,NV,Reno,60,False,False,False,Gas
632047,499ba0af270b84a720ae870a00765bb93d2b5dcc,2020-02-10 17:05:00 UTC,heat,hold,662,730,730,NV,Reno,60,False,False,False,Gas
632048,499ba0af270b84a720ae870a00765bb93d2b5dcc,2020-02-19 16:10:00 UTC,heat,auto,711,730,730,NV,Reno,60,False,False,False,Gas
632049,499ba0af270b84a720ae870a00765bb93d2b5dcc,2020-02-11 17:35:00 UTC,heat,hold,667,730,730,NV,Reno,60,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/NV/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/NV-day/2021-feb-day-NV.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bea2354e27e71b4fabd89b6014c78703ebd62f77,2021-02-02 08:50:00 UTC,heat,hold,747,746,746,NV,Las Vegas,69,False,False,True,Electric
1,7176be6dd71332e126a4c612d2e5f7ca02251900,2021-02-01 17:00:00 UTC,heat,hold,695,716,700,NV,Las Vegas,49,False,False,False,Gas
2,7176be6dd71332e126a4c612d2e5f7ca02251900,2021-02-02 19:35:00 UTC,heat,hold,700,716,700,NV,Las Vegas,49,False,False,False,Gas
3,7176be6dd71332e126a4c612d2e5f7ca02251900,2021-02-27 15:35:00 UTC,heat,hold,683,716,690,NV,Las Vegas,49,False,False,False,Gas
4,bea2354e27e71b4fabd89b6014c78703ebd62f77,2021-02-17 10:40:00 UTC,heat,hold,744,746,746,NV,Las Vegas,69,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
350726,499ba0af270b84a720ae870a00765bb93d2b5dcc,2021-02-07 18:00:00 UTC,heat,hold,708,730,730,NV,Reno,60,False,False,False,Gas
350727,499ba0af270b84a720ae870a00765bb93d2b5dcc,2021-02-07 17:20:00 UTC,heat,hold,713,730,730,NV,Reno,60,False,False,False,Gas
350728,499ba0af270b84a720ae870a00765bb93d2b5dcc,2021-02-08 16:25:00 UTC,heat,hold,710,730,730,NV,Reno,60,False,False,False,Gas
350729,499ba0af270b84a720ae870a00765bb93d2b5dcc,2021-02-07 19:00:00 UTC,heat,hold,719,730,730,NV,Reno,60,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/NV/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NV/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NV_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NV/feb/" + file)
    NV_feb = pd.concat([NV_feb, df])
    
NV_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00344f7902f07e49b84a3135e34648206bad5720,feb,2017,heat,auto,Henderson,719.000000,730.263158,729.578947,5.0,False,False,False
1,00344f7902f07e49b84a3135e34648206bad5720,feb,2017,heat,hold,Henderson,712.288889,734.222222,721.866667,5.0,False,False,False
2,010fc940dbb77c83300b1029847d933c3ed6f4c7,feb,2017,heat,hold,Dayton,692.585366,694.943089,694.943089,35.0,False,False,False
3,0507b1bb9804f35c5ba9c5d7da063498290db314,feb,2017,auto,auto,Las Vegas,700.588235,770.000000,681.764706,20.0,True,False,False
4,0507b1bb9804f35c5ba9c5d7da063498290db314,feb,2017,auto,hold,Las Vegas,697.166667,770.000000,681.944444,20.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
503,f8fe62629841926fcb3f846e96d6ec2e55a970d3,feb,2021,auto,hold,Reno,658.666667,760.000000,660.000000,38.0,False,False,False
504,fb9250893bb29a1f67b47d6684498059ac9a7a59,feb,2021,auto,hold,Carson City,653.500000,800.000000,660.000000,40.0,True,False,False
505,fd2a49148eeca159ee79049f97dd0a5120992458,feb,2021,auto,hold,Las Vegas,674.192308,718.846154,649.615385,0.0,False,False,False
506,fd2a49148eeca159ee79049f97dd0a5120992458,feb,2021,heat,hold,Las Vegas,694.610132,695.410793,695.275330,0.0,False,False,False


In [67]:
NV_feb.to_csv("Scraper_Output/State_Month_Day/NV/NV_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/NV-day/2017-jun-day-NV.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ed96e3527d06df1feb125a9e26e80b480e37d6e7,2017-06-16 18:30:00 UTC,auto,hold,711,705,655,NV,Las Vegas,47,False,False,False,Gas
1,ed96e3527d06df1feb125a9e26e80b480e37d6e7,2017-06-06 19:45:00 UTC,auto,hold,727,705,655,NV,Las Vegas,47,False,False,False,Gas
2,ed96e3527d06df1feb125a9e26e80b480e37d6e7,2017-06-19 18:10:00 UTC,auto,hold,736,705,655,NV,Las Vegas,47,False,False,False,Gas
3,ed96e3527d06df1feb125a9e26e80b480e37d6e7,2017-06-17 15:55:00 UTC,auto,hold,720,715,665,NV,Las Vegas,47,False,False,False,Gas
4,ed96e3527d06df1feb125a9e26e80b480e37d6e7,2017-06-17 18:10:00 UTC,auto,hold,722,715,665,NV,Las Vegas,47,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
148860,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-06-05 17:35:00 UTC,cool,hold,663,760,760,NV,Reno,60,False,False,False,Gas
148861,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-06-03 16:15:00 UTC,cool,hold,698,760,760,NV,Reno,60,False,False,False,Gas
148862,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-06-06 18:50:00 UTC,cool,hold,711,760,760,NV,Reno,60,False,False,False,Gas
148863,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-06-05 17:50:00 UTC,cool,hold,664,760,760,NV,Reno,60,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/NV/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/NV-day/2018-jun-day-NV.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-06-05 15:15:00 UTC,cool,auto,783,789,789,NV,Las Vegas,67,False,False,False,Gas
3,47de93f02454aaf9248254f42a78baff48d0aae5,2018-06-15 14:00:00 UTC,auto,hold,746,740,675,NV,Las Vegas,46,False,False,False,Gas
4,89ad9d084d5fe281085869d26772df77607ea39a,2018-06-15 19:55:00 UTC,cool,hold,773,772,772,NV,Las Vegas,45,False,False,True,Electric
6,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-06-08 15:25:00 UTC,cool,hold,783,787,787,NV,Las Vegas,67,False,False,False,Gas
7,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-06-05 14:35:00 UTC,cool,auto,783,789,789,NV,Las Vegas,67,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
410336,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-06-17 13:35:00 UTC,cool,hold,705,750,750,NV,Reno,60,False,False,False,Gas
410337,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-06-17 15:30:00 UTC,cool,hold,713,750,750,NV,Reno,60,False,False,False,Gas
410338,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-06-08 17:40:00 UTC,cool,hold,696,750,750,NV,Reno,60,False,False,False,Gas
410339,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-06-08 19:45:00 UTC,cool,hold,703,750,750,NV,Reno,60,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/NV/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/NV-day/2019-jun-day-NV.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,163ea0e17ae20147beee42bc340a7c6f17f386ed,2019-06-17 14:45:00 UTC,cool,hold,807,803,790,NV,Las Vegas,49,True,False,True,Electric
1,f137d9f81cbf66b911eb724011123cb2d9dc9683,2019-06-07 18:55:00 UTC,auto,hold,780,797,622,NV,Las Vegas,67,False,False,False,Gas
2,bbf8ecacbd388e91d3e0b3007480605b2c664e95,2019-06-11 15:55:00 UTC,cool,hold,702,697,697,NV,Las Vegas,45,False,False,False,Gas
3,163ea0e17ae20147beee42bc340a7c6f17f386ed,2019-06-17 18:15:00 UTC,cool,hold,788,793,790,NV,Las Vegas,49,True,False,True,Electric
4,163ea0e17ae20147beee42bc340a7c6f17f386ed,2019-06-09 16:20:00 UTC,cool,hold,873,693,690,NV,Las Vegas,49,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
600441,499ba0af270b84a720ae870a00765bb93d2b5dcc,2019-06-18 17:20:00 UTC,cool,hold,722,730,730,NV,Reno,60,False,False,False,Gas
600442,499ba0af270b84a720ae870a00765bb93d2b5dcc,2019-06-18 15:15:00 UTC,cool,hold,724,740,740,NV,Reno,60,False,False,False,Gas
600443,499ba0af270b84a720ae870a00765bb93d2b5dcc,2019-06-18 14:40:00 UTC,cool,hold,721,740,740,NV,Reno,60,False,False,False,Gas
600444,499ba0af270b84a720ae870a00765bb93d2b5dcc,2019-06-19 15:40:00 UTC,cool,hold,721,740,740,NV,Reno,60,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/NV/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/NV-day/2020-jun-day-NV.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,548a0832da22b6d7e8bbce958a4b6592e3a56088,2020-06-06 12:25:00 UTC,auto,hold,734,725,675,NV,Las Vegas,26,False,False,False,Gas
1,548a0832da22b6d7e8bbce958a4b6592e3a56088,2020-06-06 13:30:00 UTC,auto,hold,726,725,675,NV,Las Vegas,26,False,False,False,Gas
2,9026957167952ed1c38ca490ae6e85b417fb8a16,2020-06-23 16:30:00 UTC,auto,hold,732,753,714,NV,Gardnerville,48,False,False,False,Gas
3,bbf8ecacbd388e91d3e0b3007480605b2c664e95,2020-06-23 17:55:00 UTC,auto,hold,775,764,694,NV,Las Vegas,45,False,False,False,Gas
4,bbf8ecacbd388e91d3e0b3007480605b2c664e95,2020-06-23 17:10:00 UTC,auto,hold,774,764,694,NV,Las Vegas,45,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
731342,d180c8e7ed3743bbd614aef512c18bf0cc4beceb,2020-06-20 18:15:00 UTC,cool,auto,745,760,760,NV,Las Vegas,60,False,False,True,Electric
731343,d180c8e7ed3743bbd614aef512c18bf0cc4beceb,2020-06-23 19:55:00 UTC,cool,hold,763,760,760,NV,Las Vegas,60,False,False,True,Electric
731344,d180c8e7ed3743bbd614aef512c18bf0cc4beceb,2020-06-23 19:20:00 UTC,cool,hold,768,760,760,NV,Las Vegas,60,False,False,True,Electric
731345,d180c8e7ed3743bbd614aef512c18bf0cc4beceb,2020-06-12 16:45:00 UTC,cool,hold,737,760,760,NV,Las Vegas,60,False,False,True,Electric


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/NV/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/NV-day/2021-jun-day-NV.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4709caad8e74c1c3d88cc740a7b675e71bf8d42e,2021-06-22 15:55:00 UTC,cool,hold,687,685,685,NV,Fernley,18,False,False,False,Gas
1,163ea0e17ae20147beee42bc340a7c6f17f386ed,2021-06-22 07:15:00 UTC,cool,hold,760,758,758,NV,Las Vegas,49,True,False,True,Electric
2,163ea0e17ae20147beee42bc340a7c6f17f386ed,2021-06-28 10:00:00 UTC,cool,hold,773,778,778,NV,Las Vegas,49,True,False,True,Electric
4,163ea0e17ae20147beee42bc340a7c6f17f386ed,2021-06-30 16:05:00 UTC,cool,hold,765,768,768,NV,Las Vegas,49,True,False,True,Electric
5,163ea0e17ae20147beee42bc340a7c6f17f386ed,2021-06-29 11:15:00 UTC,cool,hold,772,778,778,NV,Las Vegas,49,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
433863,6b6e09dfd524a941ef0f371152dba38c9699884e,2021-06-21 17:20:00 UTC,cool,hold,764,760,760,NV,Reno,60,False,False,False,Gas
433864,6b6e09dfd524a941ef0f371152dba38c9699884e,2021-06-21 15:45:00 UTC,cool,hold,760,760,760,NV,Reno,60,False,False,False,Gas
433865,6b6e09dfd524a941ef0f371152dba38c9699884e,2021-06-21 19:50:00 UTC,cool,hold,764,760,760,NV,Reno,60,False,False,False,Gas
433866,6b6e09dfd524a941ef0f371152dba38c9699884e,2021-06-29 18:30:00 UTC,cool,hold,764,760,760,NV,Reno,60,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/NV/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NV/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NV_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NV/jun/" + file)
    NV_jun = pd.concat([NV_jun, df])
    
NV_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00344f7902f07e49b84a3135e34648206bad5720,jun,2017,cool,hold,Henderson,749.330000,780.330000,774.000000,5.0,False,False,False
1,010fc940dbb77c83300b1029847d933c3ed6f4c7,jun,2017,auto,hold,Dayton,711.520000,719.046667,668.853333,35.0,False,False,False
2,010fc940dbb77c83300b1029847d933c3ed6f4c7,jun,2017,cool,hold,Dayton,713.234401,708.981450,708.888702,35.0,False,False,False
3,010fc940dbb77c83300b1029847d933c3ed6f4c7,jun,2017,heat,hold,Dayton,722.887574,722.573964,722.573964,35.0,False,False,False
4,03554eff92f6007aaa1908c40bb7d336d628465b,jun,2017,cool,auto,Henderson,768.780842,769.550073,663.280116,0.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
550,fe18d199de10fa3d57854831fac95e6922d7175a,jun,2021,auto,hold,Las Vegas,772.225806,754.612903,704.612903,15.0,False,False,False
551,fe40e8eac49b06ecdef95f655eba91cdf702b055,jun,2021,cool,hold,Las Vegas,839.629787,750.218440,742.558865,25.0,True,False,True
552,fe7deb13e47d7caf497fcf4025ee9174843a355b,jun,2021,cool,hold,Las Vegas,792.148148,790.305556,784.046296,0.0,False,False,False
553,fedd357414610f817cb3c7e0e5940a79db58c11a,jun,2021,cool,hold,Las Vegas,760.417910,756.492537,756.298507,20.0,False,False,False


In [100]:
NV_jun.to_csv("Scraper_Output/State_Month_Day/NV/NV_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/NV-day/2017-jul-day-NV.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ed96e3527d06df1feb125a9e26e80b480e37d6e7,2017-07-24 19:25:00 UTC,auto,hold,727,715,665,NV,Las Vegas,47,False,False,False,Gas
1,47de93f02454aaf9248254f42a78baff48d0aae5,2017-07-30 15:05:00 UTC,auto,hold,727,725,675,NV,Las Vegas,46,False,False,False,Gas
2,ed96e3527d06df1feb125a9e26e80b480e37d6e7,2017-07-06 17:10:00 UTC,auto,auto,728,705,655,NV,Las Vegas,47,False,False,False,Gas
3,47de93f02454aaf9248254f42a78baff48d0aae5,2017-07-30 19:05:00 UTC,auto,hold,752,795,705,NV,Las Vegas,46,False,False,False,Gas
4,ed96e3527d06df1feb125a9e26e80b480e37d6e7,2017-07-13 16:50:00 UTC,auto,hold,726,715,665,NV,Las Vegas,47,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192112,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-07-19 15:15:00 UTC,cool,hold,712,750,750,NV,Reno,60,False,False,False,Gas
192113,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-07-19 15:30:00 UTC,cool,hold,713,750,750,NV,Reno,60,False,False,False,Gas
192114,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-07-19 16:05:00 UTC,cool,hold,711,750,750,NV,Reno,60,False,False,False,Gas
192115,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-07-03 15:45:00 UTC,cool,hold,750,750,750,NV,Reno,60,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/NV/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/NV-day/2018-jul-day-NV.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-07-06 14:00:00 UTC,cool,hold,788,801,801,NV,Las Vegas,67,False,False,False,Gas
1,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-07-06 15:45:00 UTC,cool,hold,802,801,801,NV,Las Vegas,67,False,False,False,Gas
2,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-07-06 14:40:00 UTC,cool,hold,793,801,801,NV,Las Vegas,67,False,False,False,Gas
4,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-07-30 14:10:00 UTC,cool,auto,771,768,768,NV,Las Vegas,67,False,False,False,Gas
5,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-07-18 16:15:00 UTC,cool,hold,791,791,791,NV,Las Vegas,67,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
477664,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-07-17 17:50:00 UTC,cool,hold,752,750,750,NV,Reno,60,False,False,False,Gas
477665,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-07-18 14:40:00 UTC,cool,hold,742,750,750,NV,Reno,60,False,False,False,Gas
477666,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-07-17 18:40:00 UTC,cool,hold,753,750,750,NV,Reno,60,False,False,False,Gas
477667,6b6e09dfd524a941ef0f371152dba38c9699884e,2018-07-15 13:30:00 UTC,cool,hold,759,760,760,NV,Reno,60,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/NV/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/NV-day/2019-jul-day-NV.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,89ad9d084d5fe281085869d26772df77607ea39a,2019-07-30 19:50:00 UTC,auto,hold,809,805,680,NV,Las Vegas,45,False,False,True,Electric
1,163ea0e17ae20147beee42bc340a7c6f17f386ed,2019-07-17 16:20:00 UTC,cool,auto,795,814,790,NV,Las Vegas,49,True,False,True,Electric
2,4b85d5423996d999ea08ad387b21dfae4e4032bf,2019-07-31 17:15:00 UTC,auto,auto,782,784,692,NV,Las Vegas,28,False,False,False,Gas
3,bea2354e27e71b4fabd89b6014c78703ebd62f77,2019-07-09 14:30:00 UTC,cool,hold,706,704,704,NV,Las Vegas,69,False,False,True,Electric
4,163ea0e17ae20147beee42bc340a7c6f17f386ed,2019-07-26 16:35:00 UTC,cool,hold,824,825,775,NV,Las Vegas,49,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
703261,6b6e09dfd524a941ef0f371152dba38c9699884e,2019-07-28 14:25:00 UTC,cool,auto,714,760,760,NV,Reno,60,False,False,False,Gas
703262,6b6e09dfd524a941ef0f371152dba38c9699884e,2019-07-15 13:50:00 UTC,cool,auto,695,760,760,NV,Reno,60,False,False,False,Gas
703263,6b6e09dfd524a941ef0f371152dba38c9699884e,2019-07-09 17:00:00 UTC,cool,hold,676,760,760,NV,Reno,60,False,False,False,Gas
703264,6b6e09dfd524a941ef0f371152dba38c9699884e,2019-07-08 14:55:00 UTC,cool,hold,679,760,760,NV,Reno,60,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/NV/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/NV-day/2020-jul-day-NV.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bbf8ecacbd388e91d3e0b3007480605b2c664e95,2020-07-24 19:25:00 UTC,auto,hold,740,714,664,NV,Las Vegas,45,False,False,False,Gas
1,5d871d8822964542aa8f76ec6a8ac4f44e3f2140,2020-07-31 15:15:00 UTC,cool,hold,727,701,701,NV,Reno,45,False,False,False,Gas
2,86103d43b268da1118e9a321e6673ef4ae8bb2eb,2020-07-24 15:35:00 UTC,auto,hold,727,725,675,NV,Sparks,55,True,False,False,Gas
3,bbf8ecacbd388e91d3e0b3007480605b2c664e95,2020-07-05 07:00:00 UTC,auto,hold,744,744,674,NV,Las Vegas,45,False,False,False,Gas
4,bbf8ecacbd388e91d3e0b3007480605b2c664e95,2020-07-05 07:25:00 UTC,auto,hold,748,744,681,NV,Las Vegas,45,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
783828,6b6e09dfd524a941ef0f371152dba38c9699884e,2020-07-21 14:40:00 UTC,cool,hold,750,760,760,NV,Reno,60,False,False,False,Gas
783829,6b6e09dfd524a941ef0f371152dba38c9699884e,2020-07-21 15:30:00 UTC,cool,hold,752,760,760,NV,Reno,60,False,False,False,Gas
783830,6b6e09dfd524a941ef0f371152dba38c9699884e,2020-07-21 18:40:00 UTC,cool,hold,764,760,760,NV,Reno,60,False,False,False,Gas
783831,6b6e09dfd524a941ef0f371152dba38c9699884e,2020-07-03 19:20:00 UTC,cool,hold,737,760,760,NV,Reno,60,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/NV/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/NV-day/2021-jul-day-NV.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bbf8ecacbd388e91d3e0b3007480605b2c664e95,2021-07-14 12:35:00 UTC,auto,hold,780,775,725,NV,Las Vegas,45,False,False,False,Gas
1,a40bab9e0955baf03e1e29224422c1c20e65d1be,2021-07-19 16:35:00 UTC,auto,hold,785,800,733,NV,Las Vegas,47,False,False,False,Gas
5,163ea0e17ae20147beee42bc340a7c6f17f386ed,2021-07-05 16:50:00 UTC,cool,hold,758,758,758,NV,Las Vegas,49,True,False,True,Electric
7,bbf8ecacbd388e91d3e0b3007480605b2c664e95,2021-07-14 12:20:00 UTC,auto,hold,773,775,725,NV,Las Vegas,45,False,False,False,Gas
11,bbf8ecacbd388e91d3e0b3007480605b2c664e95,2021-07-14 10:05:00 UTC,auto,hold,773,775,725,NV,Las Vegas,45,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
425047,6b6e09dfd524a941ef0f371152dba38c9699884e,2021-07-06 13:50:00 UTC,cool,hold,749,750,750,NV,Reno,60,False,False,False,Gas
425048,6b6e09dfd524a941ef0f371152dba38c9699884e,2021-07-28 17:35:00 UTC,cool,hold,739,750,750,NV,Reno,60,False,False,False,Gas
425049,6b6e09dfd524a941ef0f371152dba38c9699884e,2021-07-27 15:50:00 UTC,cool,hold,747,750,750,NV,Reno,60,False,False,False,Gas
425050,6b6e09dfd524a941ef0f371152dba38c9699884e,2021-07-28 14:50:00 UTC,cool,hold,726,750,750,NV,Reno,60,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/NV/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NV/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NV_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NV/jul/" + file)
    NV_jul = pd.concat([NV_jul, df])
    
NV_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00344f7902f07e49b84a3135e34648206bad5720,jul,2017,cool,hold,Henderson,784.250000,810.000000,790.000000,5.0,False,False,False
1,010fc940dbb77c83300b1029847d933c3ed6f4c7,jul,2017,cool,auto,Dayton,729.666667,730.000000,716.153846,35.0,False,False,False
2,010fc940dbb77c83300b1029847d933c3ed6f4c7,jul,2017,cool,hold,Dayton,721.658013,716.035810,716.014324,35.0,False,False,False
3,010fc940dbb77c83300b1029847d933c3ed6f4c7,jul,2017,heat,auto,Dayton,728.000000,741.000000,723.000000,35.0,False,False,False
4,010fc940dbb77c83300b1029847d933c3ed6f4c7,jul,2017,heat,hold,Dayton,729.142857,728.000000,728.000000,35.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
518,fda925814bdc08b71b5f0dd0ba1f1a6031f92052,jul,2021,cool,hold,North Las Vegas,775.892308,770.123077,769.076923,0.0,False,False,False
519,fe18d199de10fa3d57854831fac95e6922d7175a,jul,2021,auto,hold,Las Vegas,759.942857,754.371429,704.371429,15.0,False,False,False
520,fe40e8eac49b06ecdef95f655eba91cdf702b055,jul,2021,cool,hold,Las Vegas,780.105932,779.604116,779.604116,25.0,True,False,True
521,fe7deb13e47d7caf497fcf4025ee9174843a355b,jul,2021,cool,hold,Las Vegas,792.020833,790.062500,783.104167,0.0,False,False,False


In [133]:
NV_jul.to_csv("Scraper_Output/State_Month_Day/NV/NV_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/NV-day/2017-aug-day-NV.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b1add2277bac6cee0c624250fb67eb22d906fe3c,2017-08-12 14:50:00 UTC,auto,auto,731,735,685,NV,Sparks,28,False,False,False,Gas
1,b1add2277bac6cee0c624250fb67eb22d906fe3c,2017-08-12 19:05:00 UTC,auto,auto,739,735,685,NV,Sparks,28,False,False,False,Gas
2,f137d9f81cbf66b911eb724011123cb2d9dc9683,2017-08-26 18:00:00 UTC,cool,hold,778,788,788,NV,Las Vegas,67,False,False,False,Gas
3,b1add2277bac6cee0c624250fb67eb22d906fe3c,2017-08-12 18:35:00 UTC,auto,auto,739,735,685,NV,Sparks,28,False,False,False,Gas
4,b1add2277bac6cee0c624250fb67eb22d906fe3c,2017-08-12 14:05:00 UTC,auto,auto,737,735,685,NV,Sparks,28,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251479,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-08-08 17:00:00 UTC,cool,hold,710,750,750,NV,Reno,60,False,False,False,Gas
251480,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-08-05 14:40:00 UTC,cool,hold,738,760,760,NV,Reno,60,False,False,False,Gas
251481,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-08-05 14:30:00 UTC,cool,hold,737,760,760,NV,Reno,60,False,False,False,Gas
251482,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-08-16 14:45:00 UTC,cool,hold,715,760,760,NV,Reno,60,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/NV/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/NV-day/2018-aug-day-NV.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-08-24 16:20:00 UTC,cool,auto,798,777,777,NV,Las Vegas,67,False,False,False,Gas
1,71409e03d4ead6fa8c5f47cb3f30a3bc9843c3de,2018-08-04 14:20:00 UTC,cool,hold,705,714,700,NV,Henderson,8,False,False,False,Gas
2,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-08-25 15:25:00 UTC,cool,hold,790,791,791,NV,Las Vegas,67,False,False,False,Gas
3,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-08-07 13:00:00 UTC,cool,hold,785,789,789,NV,Las Vegas,67,False,False,False,Gas
4,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-08-29 19:20:00 UTC,cool,auto,810,776,776,NV,Las Vegas,67,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
465511,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-08-29 16:00:00 UTC,cool,hold,730,760,760,NV,Reno,60,False,False,False,Gas
465512,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-08-28 14:10:00 UTC,cool,hold,698,760,760,NV,Reno,60,False,False,False,Gas
465513,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-08-17 13:55:00 UTC,cool,hold,728,760,760,NV,Reno,60,False,False,False,Gas
465514,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-08-17 13:45:00 UTC,cool,hold,727,760,760,NV,Reno,60,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/NV/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/NV-day/2019-aug-day-NV.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7176be6dd71332e126a4c612d2e5f7ca02251900,2019-08-01 16:05:00 UTC,cool,hold,802,845,790,NV,Las Vegas,49,False,False,False,Gas
1,bea2354e27e71b4fabd89b6014c78703ebd62f77,2019-08-08 19:55:00 UTC,cool,hold,749,734,734,NV,Las Vegas,69,False,False,True,Electric
2,163ea0e17ae20147beee42bc340a7c6f17f386ed,2019-08-12 14:00:00 UTC,cool,auto,776,781,780,NV,Las Vegas,49,True,False,True,Electric
3,163ea0e17ae20147beee42bc340a7c6f17f386ed,2019-08-05 16:00:00 UTC,cool,auto,785,781,780,NV,Las Vegas,49,True,False,True,Electric
4,bea2354e27e71b4fabd89b6014c78703ebd62f77,2019-08-04 13:45:00 UTC,cool,hold,732,724,724,NV,Las Vegas,69,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
810161,6b6e09dfd524a941ef0f371152dba38c9699884e,2019-08-21 16:50:00 UTC,cool,auto,727,760,760,NV,Reno,60,False,False,False,Gas
810162,6b6e09dfd524a941ef0f371152dba38c9699884e,2019-08-18 16:05:00 UTC,cool,auto,715,760,760,NV,Reno,60,False,False,False,Gas
810163,6b6e09dfd524a941ef0f371152dba38c9699884e,2019-08-19 17:00:00 UTC,cool,auto,712,760,760,NV,Reno,60,False,False,False,Gas
810164,6b6e09dfd524a941ef0f371152dba38c9699884e,2019-08-24 16:15:00 UTC,cool,hold,728,760,760,NV,Reno,60,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/NV/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/NV-day/2020-aug-day-NV.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,68791c9826b075be55a0f323694b4f2faa5cf406,2020-08-17 19:30:00 UTC,auto,auto,778,774,650,NV,Las Vegas,47,False,False,False,Gas
1,bea2354e27e71b4fabd89b6014c78703ebd62f77,2020-08-16 11:50:00 UTC,cool,hold,720,687,687,NV,Las Vegas,69,False,False,True,Electric
2,47de93f02454aaf9248254f42a78baff48d0aae5,2020-08-10 14:10:00 UTC,auto,hold,780,779,660,NV,Las Vegas,46,False,False,False,Gas
3,bea2354e27e71b4fabd89b6014c78703ebd62f77,2020-08-28 10:20:00 UTC,cool,hold,727,687,687,NV,Las Vegas,69,False,False,True,Electric
4,68791c9826b075be55a0f323694b4f2faa5cf406,2020-08-26 17:50:00 UTC,cool,auto,782,762,650,NV,Las Vegas,47,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
775943,4135e411e91a458a8850fa8f41b385cb499b562a,2020-08-02 19:55:00 UTC,cool,hold,802,800,790,NV,Mesquite,10,False,False,True,Electric
775944,4135e411e91a458a8850fa8f41b385cb499b562a,2020-08-19 16:25:00 UTC,cool,hold,800,800,790,NV,Mesquite,10,False,False,True,Electric
775945,4135e411e91a458a8850fa8f41b385cb499b562a,2020-08-05 14:15:00 UTC,cool,hold,800,800,790,NV,Mesquite,10,False,False,True,Electric
775946,34db5e0d9d757d90e5d4fd98fc11d04fff81d71d,2020-08-04 12:45:00 UTC,cool,hold,757,800,790,NV,North Las Vegas,10,False,False,True,Electric


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/NV/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NV/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NV_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NV/aug/" + file)
    NV_aug = pd.concat([NV_aug, df])
    
NV_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,010fc940dbb77c83300b1029847d933c3ed6f4c7,aug,2017,cool,hold,Dayton,722.963795,712.096130,712.096130,35.0,False,False,False
1,03554eff92f6007aaa1908c40bb7d336d628465b,aug,2017,cool,hold,Henderson,781.343750,780.000000,780.000000,0.0,True,False,False
2,03a2e818806b6c07f5d591f53829291834491613,aug,2017,cool,hold,Genoa,729.851852,727.370370,727.370370,5.0,False,False,False
3,04944a3bcc6d35e801d6835cb3ade04d03ef8c95,aug,2017,cool,auto,Reno,676.399441,674.675978,673.706704,0.0,False,False,False
4,04944a3bcc6d35e801d6835cb3ade04d03ef8c95,aug,2017,cool,hold,Reno,714.921739,740.000000,740.000000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
991,fe18d199de10fa3d57854831fac95e6922d7175a,aug,2020,auto,auto,Las Vegas,707.800000,680.000000,630.000000,15.0,False,False,False
992,fe18d199de10fa3d57854831fac95e6922d7175a,aug,2020,auto,hold,Las Vegas,739.426667,736.800000,672.400000,15.0,False,False,False
993,fe40e8eac49b06ecdef95f655eba91cdf702b055,aug,2020,cool,hold,Las Vegas,739.898677,754.799835,754.799835,25.0,True,False,True
994,fe7deb13e47d7caf497fcf4025ee9174843a355b,aug,2020,cool,auto,Las Vegas,829.502316,827.612839,780.518862,0.0,False,False,False


In [160]:
NV_aug.to_csv("Scraper_Output/State_Month_Day/NV/NV_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/NV-day/2017-dec-day-NV.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b773c70ea9b8815c1baff1bf4f0696d6e7e3abd5,2017-12-03 13:25:00 UTC,auto,hold,720,764,684,NV,Las Vegas,45,True,False,False,Gas
1,b773c70ea9b8815c1baff1bf4f0696d6e7e3abd5,2017-12-10 15:30:00 UTC,auto,hold,667,764,684,NV,Las Vegas,45,True,False,False,Gas
2,b773c70ea9b8815c1baff1bf4f0696d6e7e3abd5,2017-12-14 11:35:00 UTC,auto,hold,704,764,714,NV,Las Vegas,45,True,False,False,Gas
3,b773c70ea9b8815c1baff1bf4f0696d6e7e3abd5,2017-12-24 11:50:00 UTC,auto,hold,707,774,704,NV,Las Vegas,45,True,False,False,Gas
4,b773c70ea9b8815c1baff1bf4f0696d6e7e3abd5,2017-12-03 18:50:00 UTC,auto,hold,733,764,684,NV,Las Vegas,45,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
317597,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-12-21 15:45:00 UTC,heat,hold,696,730,730,NV,Reno,60,False,False,False,Gas
317598,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-12-08 18:40:00 UTC,heat,hold,724,730,730,NV,Reno,60,False,False,False,Gas
317599,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-12-02 16:40:00 UTC,heat,hold,717,730,730,NV,Reno,60,False,False,False,Gas
317600,499ba0af270b84a720ae870a00765bb93d2b5dcc,2017-12-01 16:40:00 UTC,heat,hold,696,730,730,NV,Reno,60,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/NV/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/NV-day/2018-dec-day-NV.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b773c70ea9b8815c1baff1bf4f0696d6e7e3abd5,2018-12-30 16:55:00 UTC,auto,hold,714,755,705,NV,Las Vegas,45,True,False,False,Gas
1,782feaa90ffd5c8dd2cd55fa882be12fcec2eb6a,2018-12-02 17:45:00 UTC,auto,hold,752,802,752,NV,Las Vegas,47,False,False,False,Gas
2,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-12-03 16:45:00 UTC,heat,hold,800,800,800,NV,Las Vegas,67,False,False,False,Gas
3,f8fe62629841926fcb3f846e96d6ec2e55a970d3,2018-12-03 07:25:00 UTC,heat,auto,673,723,654,NV,Reno,38,False,False,False,Gas
4,f137d9f81cbf66b911eb724011123cb2d9dc9683,2018-12-05 19:10:00 UTC,heat,hold,802,800,800,NV,Las Vegas,67,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454012,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-12-03 17:25:00 UTC,heat,hold,658,720,720,NV,Reno,60,False,False,False,Gas
454013,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-12-07 19:50:00 UTC,heat,hold,709,720,720,NV,Reno,60,False,False,False,Gas
454014,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-12-14 19:00:00 UTC,heat,hold,714,720,720,NV,Reno,60,False,False,False,Gas
454015,499ba0af270b84a720ae870a00765bb93d2b5dcc,2018-12-16 17:00:00 UTC,heat,hold,690,720,720,NV,Reno,60,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/NV/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/NV-day/2019-dec-day-NV.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,163ea0e17ae20147beee42bc340a7c6f17f386ed,2019-12-18 19:50:00 UTC,heat,hold,676,781,680,NV,Las Vegas,49,True,False,True,Electric
1,163ea0e17ae20147beee42bc340a7c6f17f386ed,2019-12-15 19:05:00 UTC,heat,hold,700,781,698,NV,Las Vegas,49,True,False,True,Electric
2,b773c70ea9b8815c1baff1bf4f0696d6e7e3abd5,2019-12-17 16:10:00 UTC,auto,hold,739,791,741,NV,Las Vegas,45,True,False,False,Gas
3,bbf8ecacbd388e91d3e0b3007480605b2c664e95,2019-12-01 12:25:00 UTC,heat,hold,720,721,721,NV,Las Vegas,45,False,False,False,Gas
4,163ea0e17ae20147beee42bc340a7c6f17f386ed,2019-12-16 19:10:00 UTC,heat,auto,716,781,716,NV,Las Vegas,49,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
719752,499ba0af270b84a720ae870a00765bb93d2b5dcc,2019-12-12 17:05:00 UTC,heat,hold,713,730,730,NV,Reno,60,False,False,False,Gas
719753,499ba0af270b84a720ae870a00765bb93d2b5dcc,2019-12-27 19:30:00 UTC,heat,hold,708,730,730,NV,Reno,60,False,False,False,Gas
719754,499ba0af270b84a720ae870a00765bb93d2b5dcc,2019-12-21 16:40:00 UTC,heat,hold,682,730,730,NV,Reno,60,False,False,False,Gas
719755,499ba0af270b84a720ae870a00765bb93d2b5dcc,2019-12-22 17:35:00 UTC,heat,hold,704,730,730,NV,Reno,60,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/NV/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/NV-day/2020-dec-day-NV.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,86103d43b268da1118e9a321e6673ef4ae8bb2eb,2020-12-18 17:35:00 UTC,auto,auto,673,725,675,NV,Sparks,55,True,False,False,Gas
1,7176be6dd71332e126a4c612d2e5f7ca02251900,2020-12-24 14:50:00 UTC,heat,hold,684,716,690,NV,Las Vegas,49,False,False,False,Gas
2,7176be6dd71332e126a4c612d2e5f7ca02251900,2020-12-06 19:25:00 UTC,heat,hold,686,716,690,NV,Las Vegas,49,False,False,False,Gas
3,7176be6dd71332e126a4c612d2e5f7ca02251900,2020-12-21 18:45:00 UTC,heat,hold,689,716,690,NV,Las Vegas,49,False,False,False,Gas
4,83538826fc8da9a8638e7fff3efe03a148872999,2020-12-14 15:25:00 UTC,heat,auto,745,620,620,NV,Sun Valley,26,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
598654,499ba0af270b84a720ae870a00765bb93d2b5dcc,2020-12-10 19:25:00 UTC,heat,hold,690,720,720,NV,Reno,60,False,False,False,Gas
598655,499ba0af270b84a720ae870a00765bb93d2b5dcc,2020-12-12 16:10:00 UTC,heat,auto,691,720,720,NV,Reno,60,False,False,False,Gas
598656,499ba0af270b84a720ae870a00765bb93d2b5dcc,2020-12-28 19:00:00 UTC,heat,hold,687,720,720,NV,Reno,60,False,False,False,Gas
598657,6b6e09dfd524a941ef0f371152dba38c9699884e,2020-12-02 16:40:00 UTC,heat,hold,666,740,740,NV,Reno,60,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/NV/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/NV/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NV_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/NV/dec/" + file)
    NV_dec = pd.concat([NV_dec, df])
    
NV_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00007f75565dc03f82d4bb9f18a61421abe7475e,dec,2017,auto,auto,Henderson,696.937500,780.000000,698.368056,30.0,True,False,True
1,00007f75565dc03f82d4bb9f18a61421abe7475e,dec,2017,auto,hold,Henderson,691.836538,780.307692,711.846154,30.0,True,False,True
2,00007f75565dc03f82d4bb9f18a61421abe7475e,dec,2017,heat,hold,Henderson,664.551402,688.757009,688.271028,30.0,True,False,True
3,00344f7902f07e49b84a3135e34648206bad5720,dec,2017,heat,auto,Henderson,700.323529,719.715686,715.882353,5.0,False,False,False
4,00344f7902f07e49b84a3135e34648206bad5720,dec,2017,heat,hold,Henderson,691.009709,713.310680,699.388350,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1001,fd2a49148eeca159ee79049f97dd0a5120992458,dec,2020,auto,hold,Las Vegas,657.366667,720.000000,659.200000,0.0,False,False,False
1002,fd2a49148eeca159ee79049f97dd0a5120992458,dec,2020,heat,hold,Las Vegas,702.881313,705.818182,705.818182,0.0,False,False,False
1003,fe18d199de10fa3d57854831fac95e6922d7175a,dec,2020,auto,auto,Las Vegas,744.556566,813.272727,751.272727,15.0,False,False,False
1004,fe40e8eac49b06ecdef95f655eba91cdf702b055,dec,2020,auto,hold,Las Vegas,723.590686,778.574346,636.753268,25.0,True,False,True


In [187]:
NV_dec.to_csv("Scraper_Output/State_Month_Day/NV/NV_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/NV/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
NV_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/NV/" + file)
    NV_all = pd.concat([NV_all, df])
    
NV_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,010fc940dbb77c83300b1029847d933c3ed6f4c7,aug,2017,cool,hold,Dayton,722.963795,712.096130,712.096130,35.0,False,False,False
1,03554eff92f6007aaa1908c40bb7d336d628465b,aug,2017,cool,hold,Henderson,781.343750,780.000000,780.000000,0.0,True,False,False
2,03a2e818806b6c07f5d591f53829291834491613,aug,2017,cool,hold,Genoa,729.851852,727.370370,727.370370,5.0,False,False,False
3,04944a3bcc6d35e801d6835cb3ade04d03ef8c95,aug,2017,cool,auto,Reno,676.399441,674.675978,673.706704,0.0,False,False,False
4,04944a3bcc6d35e801d6835cb3ade04d03ef8c95,aug,2017,cool,hold,Reno,714.921739,740.000000,740.000000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3498,fe18d199de10fa3d57854831fac95e6922d7175a,jun,2021,auto,hold,Las Vegas,772.225806,754.612903,704.612903,15.0,False,False,False
3499,fe40e8eac49b06ecdef95f655eba91cdf702b055,jun,2021,cool,hold,Las Vegas,839.629787,750.218440,742.558865,25.0,True,False,True
3500,fe7deb13e47d7caf497fcf4025ee9174843a355b,jun,2021,cool,hold,Las Vegas,792.148148,790.305556,784.046296,0.0,False,False,False
3501,fedd357414610f817cb3c7e0e5940a79db58c11a,jun,2021,cool,hold,Las Vegas,760.417910,756.492537,756.298507,20.0,False,False,False


In [190]:
NV_all.to_csv("Scraper_Output/State_Month_Day/NV_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mNVe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['NV']
Unique jan_2018: ['NV']
Unique jan_2019: ['NV']
Unique jan_2020: ['NV']
Unique jan_2021: ['NV']
Unique feb_2017: ['NV']
Unique feb_2018: ['NV']
Unique feb_2019: ['NV']
Unique feb_2020: ['NV']
Unique feb_2021: ['NV']
Unique jun_2017: ['NV']
Unique jun_2018: ['NV']
Unique jun_2019: ['NV']
Unique jun_2020: ['NV']
Unique jun_2021: ['NV']
Unique jul_2017: ['NV']
Unique jul_2018: ['NV']
Unique jul_2019: ['NV']
Unique jul_2020: ['NV']
Unique jul_2021: ['NV']
Unique aug_2017: ['NV']
Unique aug_2018: ['NV']
Unique aug_2019: ['NV']
Unique aug_2020: ['NV']
Unique dec_2017: ['NV']
Unique dec_2018: ['NV']
Unique dec_2019: ['NV']
Unique dec_2020: ['NV']
