# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/AZ-day/2017-jan-day-AZ.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,c83f8f51b0f8fe74fa93197aa4131490767745d6,2017-01-07 17:20:00 UTC,heat,auto,746,740,740,AZ,Glendale,5,False,False,False,Gas
1,3a5912e1270f82a855f99429304913f732e2a4d3,2017-01-01 17:05:00 UTC,auto,auto,728,765,705,AZ,Tucson,15,True,False,False,Gas
2,9e01bad8e6eb04428a88b5b0980df117fd385100,2017-01-14 19:40:00 UTC,heat,auto,706,720,670,AZ,Gilbert,16,False,False,False,Gas
3,823fb61f1e5d46890e35b630beb3389bcbe1f0fa,2017-01-10 17:20:00 UTC,heat,auto,725,740,740,AZ,Bisbee,10,False,False,False,Gas
4,19aa51d82a1e843b5aff3b341dda54e40d11ccdd,2017-01-25 13:50:00 UTC,heat,hold,699,700,700,AZ,Litchfield Park,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
316207,f4204b8ee90aa8930df52207631d03ec09b1f1a4,2017-01-09 14:35:00 UTC,cool,hold,668,770,770,AZ,Phoenix,65,False,False,False,Gas
316208,f4204b8ee90aa8930df52207631d03ec09b1f1a4,2017-01-17 13:30:00 UTC,cool,hold,652,770,770,AZ,Phoenix,65,False,False,False,Gas
316209,f4204b8ee90aa8930df52207631d03ec09b1f1a4,2017-01-09 17:40:00 UTC,cool,hold,664,770,770,AZ,Phoenix,65,False,False,False,Gas
316210,f4204b8ee90aa8930df52207631d03ec09b1f1a4,2017-01-17 15:45:00 UTC,cool,hold,644,770,770,AZ,Phoenix,65,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
00abe73a91e4a0012069878eba34b166551cce4d,Jan,2017,auto,auto,Tucson,698.434109,770.000000,700.000000,0.0,False,False,True
00abe73a91e4a0012069878eba34b166551cce4d,Jan,2017,auto,hold,Tucson,698.580952,770.000000,700.000000,0.0,False,False,True
015b901f68620d06275e3b7aa91aaf7712578c24,Jan,2017,auto,auto,Mesa,699.178571,754.714286,704.571429,10.0,False,False,False
015b901f68620d06275e3b7aa91aaf7712578c24,Jan,2017,auto,hold,Mesa,693.359813,748.913551,690.128505,10.0,False,False,False
015b901f68620d06275e3b7aa91aaf7712578c24,Jan,2017,heat,hold,Mesa,688.200000,714.400000,707.200000,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fe6c9ebd5f40b39cd1ca42027621edd93ac11ec1,Jan,2017,heat,hold,Gilbert,701.000000,727.222222,723.888889,6.0,False,False,False
ff3e086b5016e9175c0025a60b8f6db90bb1ef6e,Jan,2017,auto,auto,Fort Mohave,713.241546,783.603865,714.681159,25.0,False,False,False
ff3e086b5016e9175c0025a60b8f6db90bb1ef6e,Jan,2017,auto,hold,Fort Mohave,714.615385,786.779487,716.928205,25.0,False,False,False
ffa43b7f70a57be13cf84bc8869b8484ca00b020,Jan,2017,auto,auto,Chandler,676.794118,770.000000,679.705882,20.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/AZ/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/AZ-day/2018-jan-day-AZ.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,405c5f6f964f93249a4601c63320ff0a3d45e6df,2018-01-10 19:50:00 UTC,heat,hold,717,678,678,AZ,Phoenix,0,False,False,False,Gas
1,b5fe7a5af6568e6a574f3868c8da0399e656e27d,2018-01-18 17:05:00 UTC,auto,hold,739,845,735,AZ,Vail,15,False,False,False,Gas
2,ea2ab7cd11af7014ecc7a481972a29a180107dc7,2018-01-12 17:15:00 UTC,heat,hold,670,665,665,AZ,Tucson,0,False,False,False,Gas
3,9f3d8ac40541cfc2dc20cce41f7cd5c13619a5d6,2018-01-20 08:40:00 UTC,auto,hold,715,767,717,AZ,Tucson,10,True,False,True,Electric
4,176057f2ea00c27b53afeb2b2c8284f13ce865b9,2018-01-28 19:25:00 UTC,auto,hold,745,805,745,AZ,Yuma,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
755086,c90b6b66871e2d1835134af14e3d01688d94fd45,2018-01-03 16:45:00 UTC,auto,auto,711,765,715,AZ,Sahuarita,10,False,False,False,Gas
755087,3c99bb18e329187e7ed68d3f34d80fc2e5200e0d,2018-01-03 17:30:00 UTC,auto,hold,723,765,715,AZ,Surprise,6,False,False,False,Gas
755088,9f3d8ac40541cfc2dc20cce41f7cd5c13619a5d6,2018-01-07 17:55:00 UTC,auto,hold,740,765,715,AZ,Tucson,10,True,False,True,Electric
755089,f6808434082fd5ed72424eaecc77f26daf2b6d2b,2018-01-05 15:35:00 UTC,auto,hold,719,765,715,AZ,Phoenix,17,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/AZ/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/AZ-day/2019-jan-day-AZ.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d89ff6104d444090519e43333b6399bcd935e39b,2019-01-11 14:40:00 UTC,auto,hold,717,765,715,AZ,Tucson,15,False,False,True,Electric
1,5be6f27587f802730ce59c504b6667f7f043b223,2019-01-22 17:25:00 UTC,heat,hold,685,685,685,AZ,Surprise,20,True,False,False,Gas
2,92ddb2b34ea659d91151d9f1a781b7132949706a,2019-01-30 19:50:00 UTC,heat,hold,707,702,702,AZ,Mesa,0,False,False,False,Gas
3,4ba3c60ec596a37b6feed8e6a7b0300baea5ef93,2019-01-09 15:15:00 UTC,heat,hold,690,655,655,AZ,Queen Creek,16,False,False,True,Electric
4,f0dcccda6e9fe0b963e8d2b9997663ed45ddb034,2019-01-14 14:25:00 UTC,auto,auto,747,796,746,AZ,Litchfield Park,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1148230,7192d7b69031b9c712d7b1da9dd7dc648ef4bd17,2019-01-30 07:35:00 UTC,auto,hold,732,760,710,AZ,Phoenix,20,False,False,True,Electric
1148231,7192d7b69031b9c712d7b1da9dd7dc648ef4bd17,2019-01-16 09:20:00 UTC,auto,hold,723,760,710,AZ,Phoenix,20,False,False,True,Electric
1148232,1b941760cdf1a4e0d4b887a71a6fa78c86796db0,2019-01-04 15:55:00 UTC,heat,hold,757,760,760,AZ,Oro Valley,10,True,False,True,Electric
1148234,7ecc29385f3a555f16f7497deb3971a4364f4cdd,2019-01-10 17:25:00 UTC,heat,auto,696,760,700,AZ,Surprise,10,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/AZ/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/AZ-day/2020-jan-day-AZ.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ac3cd242daf926d1590edd256c1d9a02e4d8f3e7,2020-01-11 12:40:00 UTC,heat,hold,680,705,705,AZ,Chandler,0,True,False,False,Gas
1,5a4ad8ff8b6a0bd64323da29f6de3ab5bba31f91,2020-01-17 14:50:00 UTC,auto,hold,725,776,726,AZ,Thatcher,5,True,False,True,Electric
2,5ef65b3d4847473b122c9e91f5ae4aa4a840879e,2020-01-15 19:30:00 UTC,heat,hold,673,665,665,AZ,Mesa,5,False,False,False,Gas
4,0951b1fd22270363781e81814be00d51bc77124d,2020-01-19 15:20:00 UTC,auto,auto,724,765,715,AZ,Tucson,29,False,False,False,Gas
5,0f2432d3a759c78ce4932d9da08a3eb6201e7d35,2020-01-19 18:15:00 UTC,heat,hold,717,685,685,AZ,Phoenix,25,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1413795,98d6abafcccd37d627e0eb12a6d952b6f0eb4310,2020-01-28 15:45:00 UTC,heat,hold,706,760,710,AZ,Tucson,7,False,False,False,Gas
1413796,715de17e9d8f355dfc6677579276748e2c3e02f4,2020-01-01 19:25:00 UTC,auto,hold,712,760,710,AZ,Sun City,47,False,False,True,Electric
1413797,be5ec3fb4f5d19984380a06a56054eb963dde34f,2020-01-12 13:40:00 UTC,heat,hold,760,760,760,AZ,Litchfield Park,0,False,False,False,Gas
1413798,2af6100a8a96b071a6f947555ece78e6295cd626,2020-01-21 19:00:00 UTC,auto,hold,676,760,660,AZ,Phoenix,27,False,False,True,Electric


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/AZ/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/AZ-day/2021-jan-day-AZ.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8c617741c974ea3d95df96ae55f9f86aaab35a26,2021-01-27 15:05:00 UTC,heat,hold,671,685,685,AZ,Tucson,25,False,False,False,Gas
1,d7c45ac5a0360fe7053d9b1883c9fd31862522f6,2021-01-23 16:05:00 UTC,auto,hold,727,787,727,AZ,Tempe,0,False,False,True,Electric
2,71baa5afa559fe1dbce98c262bef377272fdabe2,2021-01-12 13:55:00 UTC,heat,hold,657,675,675,AZ,Flagstaff,30,False,False,False,Gas
3,f1f20b2ba0614b999eb9d8dd4c979dfea299bdc9,2021-01-03 15:20:00 UTC,auto,hold,684,767,697,AZ,Phoenix,10,False,False,False,Gas
4,7319ce6572048ac74ef266de139f4576e474249f,2021-01-04 08:15:00 UTC,heat,hold,696,699,699,AZ,Tucson,60,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
874904,52e92d3957712db8fd805073e024c90a95817381,2021-01-13 14:35:00 UTC,auto,hold,694,765,705,AZ,Phoenix,50,False,False,True,Electric
874905,0ac042f6fc949edceac5fb1ecda5a16bc4e45b77,2021-01-13 08:30:00 UTC,heat,hold,757,765,765,AZ,stanfield,10,True,False,True,Electric
874906,f84367a74e71563922b6830678b76cce513f8ac1,2021-01-02 19:35:00 UTC,auto,hold,711,765,715,AZ,Avondale,20,False,False,False,Gas
874907,0ac042f6fc949edceac5fb1ecda5a16bc4e45b77,2021-01-12 17:45:00 UTC,heat,hold,765,765,765,AZ,stanfield,10,True,False,True,Electric


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/AZ/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AZ/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AZ_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AZ/jan/" + file)
    AZ_jan = pd.concat([AZ_jan, df])
    
AZ_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00abe73a91e4a0012069878eba34b166551cce4d,Jan,2017,auto,auto,Tucson,698.434109,770.000000,700.000000,0.0,False,False,True
1,00abe73a91e4a0012069878eba34b166551cce4d,Jan,2017,auto,hold,Tucson,698.580952,770.000000,700.000000,0.0,False,False,True
2,015b901f68620d06275e3b7aa91aaf7712578c24,Jan,2017,auto,auto,Mesa,699.178571,754.714286,704.571429,10.0,False,False,False
3,015b901f68620d06275e3b7aa91aaf7712578c24,Jan,2017,auto,hold,Mesa,693.359813,748.913551,690.128505,10.0,False,False,False
4,015b901f68620d06275e3b7aa91aaf7712578c24,Jan,2017,heat,hold,Mesa,688.200000,714.400000,707.200000,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1311,ff3e086b5016e9175c0025a60b8f6db90bb1ef6e,Jan,2021,auto,hold,Fort Mohave,690.630137,780.000000,676.438356,25.0,False,False,False
1312,ffc69aa005f871624e7187d94b86f1d795252fd7,Jan,2021,heat,hold,Phoenix,690.871212,695.078283,695.078283,50.0,True,False,True
1313,ffec90658ad5a300da53b43a28fe991808fb4006,Jan,2021,auto,hold,Sierra Vista,693.427263,799.582909,692.868769,0.0,False,False,False
1314,fffe6c24cd5a508c58628472cd5bd31846ea70e4,Jan,2021,cool,hold,Phoenix,701.000000,650.000000,650.000000,10.0,True,False,False


In [34]:
AZ_jan.to_csv("Scraper_Output/State_Month_Day/AZ/AZ_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/AZ-day/2017-feb-day-AZ.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,664aaf4176ce2239ff5bc658ab25fe45f2ddd19e,2017-02-26 18:45:00 UTC,cool,auto,679,680,690,AZ,Chandler,0,False,False,True,Electric
1,0cced87df237140f4072fcd7d1576422d2dbbb0a,2017-02-02 16:55:00 UTC,auto,auto,668,820,640,AZ,Tucson,10,False,False,False,Gas
2,c83f8f51b0f8fe74fa93197aa4131490767745d6,2017-02-03 15:15:00 UTC,heat,hold,743,750,750,AZ,Glendale,5,False,False,False,Gas
3,747c1d901235ece585427c3e4b4b6aed55ee3913,2017-02-05 17:25:00 UTC,heat,auto,673,720,660,AZ,Oro Valley,5,False,False,True,Electric
4,9b22d3a2ff9a7c03d7ddec0f6ebad9928466b325,2017-02-25 16:25:00 UTC,heat,auto,771,770,770,AZ,Arizona City,15,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
272425,f4204b8ee90aa8930df52207631d03ec09b1f1a4,2017-02-02 19:45:00 UTC,heat,auto,679,650,650,AZ,Phoenix,65,False,False,False,Gas
272426,f4204b8ee90aa8930df52207631d03ec09b1f1a4,2017-02-22 14:25:00 UTC,heat,auto,710,650,650,AZ,Phoenix,65,False,False,False,Gas
272427,f4204b8ee90aa8930df52207631d03ec09b1f1a4,2017-02-05 18:15:00 UTC,heat,auto,652,650,650,AZ,Phoenix,65,False,False,False,Gas
272428,f4204b8ee90aa8930df52207631d03ec09b1f1a4,2017-02-12 14:35:00 UTC,heat,auto,723,650,650,AZ,Phoenix,65,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/AZ/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/AZ-day/2018-feb-day-AZ.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,af019b5d786eb9ad785963329fa216232f30d30e,2018-02-14 15:15:00 UTC,heat,hold,690,788,662,AZ,Phoenix,26,False,False,False,Gas
2,1d4dfb7ff62cc2996605aea93c20797b1dcd717c,2018-02-26 16:00:00 UTC,heat,hold,679,677,677,AZ,Phoenix,57,False,False,True,Electric
3,f5b7e1ec79db6e4430197f9988e2b0b9d6f81347,2018-02-18 15:10:00 UTC,cool,auto,685,718,730,AZ,Cave Creek,20,False,False,False,Gas
4,4f0fefd7929a9b1a58368fd50b62e01c0ca7fc8a,2018-02-11 18:45:00 UTC,heat,auto,716,830,640,AZ,Sun City,47,False,False,True,Electric
5,e1505b480623eb6cca8b3491435fc4e931085c97,2018-02-08 19:10:00 UTC,heat,auto,696,725,675,AZ,Phoenix,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
683460,5cb96df182e0fb88438744dadaf141e4748ab3b4,2018-02-04 18:30:00 UTC,auto,hold,717,765,715,AZ,Thatcher,10,False,False,False,Gas
683461,2623afd8550dd9ae290a7d6d2f63179d36ecdc9a,2018-02-09 07:35:00 UTC,auto,auto,739,765,715,AZ,Tucson,0,False,False,True,Electric
683462,2623afd8550dd9ae290a7d6d2f63179d36ecdc9a,2018-02-12 08:30:00 UTC,auto,auto,743,765,715,AZ,Tucson,0,False,False,True,Electric
683463,5cb96df182e0fb88438744dadaf141e4748ab3b4,2018-02-03 15:00:00 UTC,auto,hold,709,765,715,AZ,Thatcher,10,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/AZ/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/AZ-day/2019-feb-day-AZ.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a95bce92e10a8a6857ff5f060c831d60e42c85a1,2019-02-25 16:55:00 UTC,auto,hold,729,785,735,AZ,Chandler,0,True,False,False,Gas
1,c8c04c2688597b5133a0d02e6007382b9cdd6b95,2019-02-12 07:20:00 UTC,auto,hold,689,785,695,AZ,Gold Canyon,20,False,False,True,Electric
2,1d4dfb7ff62cc2996605aea93c20797b1dcd717c,2019-02-22 13:30:00 UTC,heat,auto,720,686,706,AZ,Phoenix,57,False,False,True,Electric
3,126f9f43fc99603d36dfedbdc0cbe8632e455d45,2019-02-19 09:10:00 UTC,auto,hold,731,772,702,AZ,Yuma,0,False,False,False,Gas
4,b536e23aa86390a609b6d268dbcb2c5e24b38d90,2019-02-24 15:25:00 UTC,cool,hold,674,712,662,AZ,Tempe,9,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
793068,6d49a6eba7c643bfb165271ffded5dcb473e14b1,2019-02-28 10:00:00 UTC,cool,auto,727,760,760,AZ,Yuma,15,False,False,False,Gas
793069,d1444dc33e577ebac3f1ef2ebee9f45dc20f5cf0,2019-02-16 13:35:00 UTC,auto,hold,677,760,680,AZ,Tucson,68,False,False,False,Gas
793070,eaec5f8513536d6aa30949f96572ad6b9b9a1c63,2019-02-21 13:35:00 UTC,heat,auto,759,760,760,AZ,Peoria,45,True,False,True,Electric
793071,7192d7b69031b9c712d7b1da9dd7dc648ef4bd17,2019-02-08 07:05:00 UTC,auto,hold,711,760,710,AZ,Phoenix,20,False,False,True,Electric


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/AZ/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/AZ-day/2020-feb-day-AZ.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a51a1969ab2e263df9c211cc7a8b37e4c0924ee2,2020-02-10 15:45:00 UTC,heat,hold,724,725,725,AZ,Scottsdale,30,False,False,False,Gas
1,f77103e2b7e215941f1f250a3286a2802e4013c6,2020-02-03 14:30:00 UTC,heat,hold,711,715,715,AZ,Phoenix,69,False,False,False,Gas
2,1dfc4ad22860756eb43f5e0f505f5d07af63da87,2020-02-04 16:40:00 UTC,auto,hold,704,797,701,AZ,Mesa,5,False,False,False,Gas
3,ccdbf4852773530bf6e1374bc14d2c1a21155148,2020-02-14 08:40:00 UTC,heat,auto,692,772,700,AZ,Buckeye,0,True,False,True,Electric
4,ccdbf4852773530bf6e1374bc14d2c1a21155148,2020-02-22 18:45:00 UTC,heat,auto,719,772,700,AZ,Buckeye,0,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1167653,3117da1ff75835d0ba7de43aebb36cf2f3b2258f,2020-02-11 16:30:00 UTC,heat,hold,772,760,760,AZ,Peoria,30,False,False,False,Gas
1167654,1e3f1c09e0ea8c8d5a658c8ffaa7fcf39c00ef26,2020-02-23 17:00:00 UTC,heat,hold,759,760,760,AZ,Goodyear,7,False,False,False,Gas
1167655,8864ec0a418dd37a6e729ade9a6c09281b3baa65,2020-02-27 15:30:00 UTC,auto,hold,696,760,700,AZ,Phoenix,0,False,False,True,Electric
1167656,b02ebaf63ef4c0dc2d44ea0fa2f1950b05246fc6,2020-02-18 19:10:00 UTC,auto,hold,705,760,690,AZ,Chandler,25,False,False,True,Electric


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/AZ/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/AZ-day/2021-feb-day-AZ.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,442a6c9846193a8f54db23adaea3830054c3c75d,2021-02-27 14:50:00 UTC,auto,hold,756,840,760,AZ,Peoria,9,True,False,False,Gas
1,f5fd44aaddb7f2d604ef7ddc97e23187531d6be1,2021-02-19 08:55:00 UTC,heat,hold,758,757,757,AZ,Yuma,5,False,False,True,Electric
2,04d4e09912e078ee4c318f02c6473aa329d24952,2021-02-08 14:05:00 UTC,heat,hold,703,762,636,AZ,Maricopa,10,False,False,False,Gas
3,92303fda727ee4a870cf7b12957de34912ac0b0a,2021-02-09 14:45:00 UTC,auto,hold,741,793,743,AZ,Scottsdale,10,False,False,True,Electric
4,f5fd44aaddb7f2d604ef7ddc97e23187531d6be1,2021-02-15 10:45:00 UTC,heat,hold,757,757,757,AZ,Yuma,5,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
650909,45da57e128203e5f592a15106401255eb70d565d,2021-02-04 18:30:00 UTC,heat,hold,758,760,760,AZ,Buckeye,7,False,False,False,Gas
650910,2e03eb06fafd5334563db1275fa5f9fe2391453c,2021-02-13 18:10:00 UTC,auto,hold,708,760,710,AZ,Chandler,20,False,False,True,Electric
650911,ca83f0f00cccfacd19af3be18e0e0a3a7c925fd6,2021-02-19 14:00:00 UTC,auto,hold,670,760,690,AZ,Peoria,30,False,False,True,Electric
650912,80467f30cce2c321e9e05405960ce92a377f38bc,2021-02-10 13:35:00 UTC,heat,hold,757,760,760,AZ,San Tan Valley,10,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/AZ/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AZ/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AZ_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AZ/feb/" + file)
    AZ_feb = pd.concat([AZ_feb, df])
    
AZ_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00abe73a91e4a0012069878eba34b166551cce4d,feb,2017,auto,hold,Tucson,688.600000,770.000000,680.000000,0.0,False,False,True
1,015b901f68620d06275e3b7aa91aaf7712578c24,feb,2017,auto,hold,Mesa,698.472727,721.672727,658.400000,10.0,False,False,False
2,01e3ae470e88aac33b2483a97f19afafb926128c,feb,2017,auto,auto,Chandler,735.669477,778.387015,727.600337,15.0,False,False,False
3,01e3ae470e88aac33b2483a97f19afafb926128c,feb,2017,auto,hold,Chandler,735.939170,778.408579,727.329114,15.0,False,False,False
4,0222630f87433cb6a60e0fe83c9bc2233a842fce,feb,2017,cool,auto,Phoenix,738.236246,757.640777,753.097087,0.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1127,fe22e9c2412fbf136c42924645795c639968ca3a,feb,2021,heat,hold,Chandler,753.611285,754.023511,754.023511,19.0,True,False,False
1128,fe751bcd59ea39c02069bbc276492daa502ef01d,feb,2021,heat,hold,Gilbert,709.134043,698.534043,698.534043,9.0,False,False,False
1129,ff3e086b5016e9175c0025a60b8f6db90bb1ef6e,feb,2021,auto,hold,Fort Mohave,692.604396,780.000000,693.626374,25.0,False,False,False
1130,ffec90658ad5a300da53b43a28fe991808fb4006,feb,2021,auto,hold,Sierra Vista,689.658872,815.000000,691.040578,0.0,False,False,False


In [67]:
AZ_feb.to_csv("Scraper_Output/State_Month_Day/AZ/AZ_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/AZ-day/2017-jun-day-AZ.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,cb3d252b40e33c9b8240639fcbd3b27d9a23cc53,2017-06-18 16:40:00 UTC,cool,auto,762,759,729,AZ,Tucson,10,False,False,False,Gas
3,1186bfe43eb8c0463d3033d0ed62173252c60f1c,2017-06-02 11:55:00 UTC,auto,hold,782,785,715,AZ,Phoenix,0,False,False,False,Gas
4,e147eeb021d885f460139447aa11e27eabe23273,2017-06-14 14:20:00 UTC,cool,hold,768,772,772,AZ,Mesa,40,False,False,True,Electric
5,621250a303aedc23ae761ed6d885dc2656ca780d,2017-06-26 14:50:00 UTC,auto,auto,714,715,655,AZ,Show Low,10,False,False,False,Gas
6,0d70caae6cc9824cf120c4e92cf5e78fc7f933b4,2017-06-10 17:55:00 UTC,cool,auto,770,765,715,AZ,Queen Creek,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
559468,6d9610e892ce92158aaf7403a5ec4ee92bb3e53a,2017-06-30 17:05:00 UTC,cool,hold,760,760,760,AZ,Phoenix,30,False,False,True,Electric
559469,01e3ae470e88aac33b2483a97f19afafb926128c,2017-06-24 10:40:00 UTC,cool,auto,770,770,760,AZ,Chandler,15,False,False,False,Gas
559470,a953a8b49c2eb9194208c2263b15f5b14b526393,2017-06-11 17:10:00 UTC,cool,hold,761,760,760,AZ,Peoria,15,False,False,False,Gas
559471,3f39761de407cc12effcc3c99a2bd59d820fd619,2017-06-29 19:00:00 UTC,cool,hold,761,760,760,AZ,Glendale,45,False,False,True,Electric


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/AZ/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/AZ-day/2018-jun-day-AZ.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,14936a3e0bf350ef87ccaddad0014c7a4b51da2a,2018-06-07 14:15:00 UTC,cool,auto,679,675,655,AZ,Cave Creek,27,False,False,False,Gas
1,14936a3e0bf350ef87ccaddad0014c7a4b51da2a,2018-06-24 15:00:00 UTC,cool,auto,677,675,655,AZ,Cave Creek,27,False,False,False,Gas
3,b67f4a3b9ed33a01822aa688dcd6816fbf59747c,2018-06-13 17:45:00 UTC,auto,hold,778,775,705,AZ,Gilbert,30,False,False,False,Gas
4,015b901f68620d06275e3b7aa91aaf7712578c24,2018-06-14 17:35:00 UTC,auto,hold,735,735,665,AZ,Mesa,10,False,False,False,Gas
5,40309acd974cf0c593acdcc14f464c0721ca59a7,2018-06-12 14:15:00 UTC,auto,hold,769,765,655,AZ,Chandler,6,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
994831,1186bfe43eb8c0463d3033d0ed62173252c60f1c,2018-06-16 16:55:00 UTC,cool,hold,785,780,760,AZ,Phoenix,0,False,False,False,Gas
994832,63be77a20fbfa6fbe35c3ce0abbc5bde444632c8,2018-06-11 19:40:00 UTC,cool,hold,762,760,760,AZ,Tempe,40,False,False,False,Gas
994833,5df58e41f64ee38403723e3c782e58a24e1644f3,2018-06-19 17:45:00 UTC,cool,hold,764,760,760,AZ,Marana,10,False,False,False,Gas
994834,85aa2e341897348088b381f0bd533354fe1262e8,2018-06-10 15:00:00 UTC,cool,hold,766,760,760,AZ,Peoria,35,True,False,True,Electric


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/AZ/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/AZ-day/2019-jun-day-AZ.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a468a2af17424e6cf7a9e56d0221ffcf4c822c39,2019-06-24 19:10:00 UTC,cool,auto,786,820,820,AZ,Chandler,0,False,False,False,Gas
1,cc95eb27a8c93eac6357ca193c380315625a3850,2019-06-07 18:35:00 UTC,cool,hold,793,790,788,AZ,Scottsdale,30,True,False,True,Electric
2,6c24901ddb6616cdbca225d4593a43e8eedc9ab7,2019-06-12 13:55:00 UTC,cool,auto,794,846,842,AZ,Chandler,10,False,False,False,Gas
3,5b35014886264cf14e65d695dbe5b5d483031d73,2019-06-23 16:30:00 UTC,cool,hold,837,846,749,AZ,Tempe,0,False,False,False,Gas
5,80467f30cce2c321e9e05405960ce92a377f38bc,2019-06-14 15:20:00 UTC,auto,hold,782,795,685,AZ,San Tan Valley,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1419202,5298e410369c7cf5a7a5e335d80b650977fa2289,2019-06-30 19:40:00 UTC,cool,hold,766,760,760,AZ,Chandler,9,True,False,True,Electric
1419203,015dc7c3497b4b091cd292a54c3fe1e48b73b2aa,2019-06-01 19:15:00 UTC,cool,hold,761,760,760,AZ,Phoenix,30,False,False,True,Electric
1419204,f93883af48d15a1a6eb7d6d6afff070958c62d7a,2019-06-20 18:00:00 UTC,cool,hold,793,760,760,AZ,Mesa,20,False,False,False,Gas
1419205,1b941760cdf1a4e0d4b887a71a6fa78c86796db0,2019-06-15 12:20:00 UTC,cool,hold,744,760,760,AZ,Oro Valley,10,True,False,True,Electric


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/AZ/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/AZ-day/2020-jun-day-AZ.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,425ac3626cf6e1ad590f194d5e1f78fba02b16cd,2020-06-23 15:45:00 UTC,cool,auto,788,785,705,AZ,Chandler,10,True,False,False,Gas
1,1f40eb23680da761bcf80f94387541f9241a9f72,2020-06-12 07:35:00 UTC,cool,hold,762,765,765,AZ,Scottsdale,0,False,False,True,Electric
2,b00f9da3121841492eef01ad8c9fbdece5e6b03e,2020-06-19 13:30:00 UTC,cool,hold,776,779,779,AZ,Tempe,0,False,False,True,Electric
4,0ac042f6fc949edceac5fb1ecda5a16bc4e45b77,2020-06-26 18:25:00 UTC,cool,hold,758,755,755,AZ,stanfield,10,True,False,True,Electric
5,53afc009cce56be1c6ee08f3038c4f3c3dd7ca73,2020-06-04 15:15:00 UTC,auto,hold,786,785,705,AZ,Litchfield Park,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1442780,9ebe523781d0d4ee256551ff0e0a60cb8eb15a4c,2020-06-16 14:55:00 UTC,cool,auto,763,760,760,AZ,Phoenix,0,True,False,True,Electric
1442781,bce12688151da76e5950316b49073323f3d98d94,2020-06-24 16:35:00 UTC,cool,auto,755,760,760,AZ,Tucson,45,False,False,False,Gas
1442782,1186bfe43eb8c0463d3033d0ed62173252c60f1c,2020-06-01 14:45:00 UTC,cool,hold,775,770,760,AZ,Phoenix,0,False,False,False,Gas
1442783,f7c2b6873ba38921239c9aacc222817f0a9df1c2,2020-06-16 17:25:00 UTC,cool,hold,749,760,760,AZ,Phoenix,20,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/AZ/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/AZ-day/2021-jun-day-AZ.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f5fd44aaddb7f2d604ef7ddc97e23187531d6be1,2021-06-10 19:50:00 UTC,cool,hold,761,757,757,AZ,Yuma,5,False,False,True,Electric
1,4fcefbf541c1e66392577df6912269611736c5b0,2021-06-14 12:25:00 UTC,cool,hold,826,830,800,AZ,Gilbert,15,False,False,True,Electric
2,0951b1fd22270363781e81814be00d51bc77124d,2021-06-25 16:50:00 UTC,auto,hold,737,735,665,AZ,Tucson,29,False,False,False,Gas
3,1a39154f7267bea2a58b125d47afb78b6db01d6a,2021-06-16 13:40:00 UTC,cool,hold,760,790,734,AZ,Kingman,0,False,False,False,Gas
4,1a672b2d2d457ef5a28cf32df6f993b9e8f9095b,2021-06-14 14:45:00 UTC,cool,hold,757,753,753,AZ,Bisbee,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
920526,80467f30cce2c321e9e05405960ce92a377f38bc,2021-06-20 18:10:00 UTC,cool,hold,768,765,765,AZ,San Tan Valley,10,False,False,False,Gas
920527,80467f30cce2c321e9e05405960ce92a377f38bc,2021-06-26 16:20:00 UTC,cool,hold,761,765,765,AZ,San Tan Valley,10,False,False,False,Gas
920528,e9726b9c4d69508d33a77d75c05e84f0e2bba5d8,2021-06-01 18:55:00 UTC,cool,hold,765,765,765,AZ,Phoenix,0,False,False,True,Electric
920529,e9726b9c4d69508d33a77d75c05e84f0e2bba5d8,2021-06-15 15:05:00 UTC,cool,hold,761,765,765,AZ,Phoenix,0,False,False,True,Electric


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/AZ/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AZ/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AZ_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AZ/jun/" + file)
    AZ_jun = pd.concat([AZ_jun, df])
    
AZ_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00ab105be10ba80cca71a5e8d83088116fe17003,jun,2017,cool,auto,Tucson,780.433383,784.720808,757.682635,10.0,False,False,False
1,00ab105be10ba80cca71a5e8d83088116fe17003,jun,2017,cool,hold,Tucson,781.662791,782.790698,780.930233,10.0,False,False,False
2,00abe73a91e4a0012069878eba34b166551cce4d,jun,2017,auto,hold,Tucson,765.594444,764.182222,687.833333,0.0,False,False,True
3,01016c682a7be3fa75027c0ebd9edf413cf2459e,jun,2017,cool,auto,Peoria,761.009804,760.294118,689.754902,0.0,False,False,False
4,01016c682a7be3fa75027c0ebd9edf413cf2459e,jun,2017,cool,hold,Peoria,766.282132,767.222571,765.783699,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1253,fe7d60cb7fe777a80cd29f1df2350728a8ac3381,jun,2021,cool,hold,Surprise,747.600000,750.000000,750.000000,0.0,False,False,False
1254,ff2a7e846baca4fde6914b30a69e4f6327d76db5,jun,2021,cool,hold,Phoenix,784.859122,788.730947,783.519630,0.0,False,False,False
1255,ffc69aa005f871624e7187d94b86f1d795252fd7,jun,2021,cool,hold,Phoenix,794.774709,793.793605,786.513081,50.0,True,False,True
1256,ffec90658ad5a300da53b43a28fe991808fb4006,jun,2021,auto,hold,Sierra Vista,756.847215,757.487925,685.766387,0.0,False,False,False


In [100]:
AZ_jun.to_csv("Scraper_Output/State_Month_Day/AZ/AZ_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/AZ-day/2017-jul-day-AZ.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,f519906973abc35eea9e76bc0fa37def65df8778,2017-07-24 16:15:00 UTC,auto,hold,699,700,640,AZ,Chino Valley,15,False,False,False,Gas
2,31743671a90ceecea076c02a12b72afd28e03fef,2017-07-24 18:15:00 UTC,cool,hold,741,760,760,AZ,Sun City West,15,False,False,False,Gas
4,1e050c07022faad3babc87a30a28538466629590,2017-07-20 19:45:00 UTC,cool,hold,775,770,770,AZ,Litchfield park,15,False,False,True,Electric
5,5d27ef4d268c5bcee4469a6b2ececa65a6f5adf3,2017-07-03 19:05:00 UTC,cool,auto,749,750,680,AZ,Desert Hills,25,False,False,True,Electric
6,4f0fefd7929a9b1a58368fd50b62e01c0ca7fc8a,2017-07-20 17:25:00 UTC,cool,auto,783,830,640,AZ,Sun City,47,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
637629,6dcb7638afa9e750b3f74113a3ebcc2b58772f1f,2017-07-02 19:10:00 UTC,cool,hold,719,720,720,AZ,San Tan Valley,5,False,False,False,Gas
637630,947147f97498df3ec675a6375b3efc0d7472b9f5,2017-07-25 14:10:00 UTC,cool,auto,753,790,640,AZ,San Tan Valley,0,False,False,False,Gas
637631,7a1f42fafd9967610184b75ce8c477548c3dd980,2017-07-13 11:40:00 UTC,cool,hold,784,780,780,AZ,San Tan Valley,10,True,False,False,Gas
637632,7a1f42fafd9967610184b75ce8c477548c3dd980,2017-07-27 11:40:00 UTC,cool,hold,784,780,780,AZ,San Tan Valley,10,True,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/AZ/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/AZ-day/2018-jul-day-AZ.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2bafaa9268961800b3769c287ee288710b0dd7b7,2018-07-24 15:45:00 UTC,cool,hold,800,685,685,AZ,Gilbert,30,False,False,False,Gas
1,c98c6aef64866d625db4925f4e464eaa2ad62721,2018-07-14 17:45:00 UTC,cool,hold,752,768,738,AZ,Phoenix,20,True,False,True,Electric
2,f140a632c95e3358d828843b8b5559275d622f2c,2018-07-03 14:25:00 UTC,cool,auto,820,820,820,AZ,Chandler,0,True,False,False,Gas
3,54c8534059657d531f8491829a02bccabc7e002f,2018-07-07 16:50:00 UTC,auto,hold,790,795,705,AZ,Gilbert,6,False,False,False,Gas
4,aa969b7f663b62c6a66c7ad2d00fe27a1f2e92ba,2018-07-05 15:35:00 UTC,cool,hold,702,695,695,AZ,Scottsdale,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1125521,a8691bd122326b599780901123d812b26287c1f1,2018-07-10 18:00:00 UTC,cool,auto,764,760,760,AZ,Lake Havasu City,10,False,False,True,Electric
1125522,f0b99206fd4d1382147b8d70a1ef35de1972c60d,2018-07-19 15:25:00 UTC,cool,hold,757,760,760,AZ,Sahuarita,25,False,False,False,Gas
1125523,9db8fee199126f595835859da82784f93742e8d1,2018-07-21 16:45:00 UTC,cool,hold,760,760,760,AZ,Tempe,0,False,False,False,Gas
1125524,f439ecc40f055d4bcf4fcfd020f9ac76d7c99506,2018-07-21 14:55:00 UTC,cool,hold,730,760,760,AZ,Gilbert,20,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/AZ/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/AZ-day/2019-jul-day-AZ.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e8ab3f9281e4b47e63004908f512772635f53361,2019-07-16 14:20:00 UTC,auto,auto,760,800,618,AZ,Flagstaff,5,False,False,False,Gas
1,bb1191aa00ff520c09a63cecc92aba1bf44072d1,2019-07-17 15:30:00 UTC,cool,hold,774,770,766,AZ,Gilbert,5,False,False,False,Gas
2,d6f207d822f8b93da8a7257b5a3c6d0a6b652da4,2019-07-14 16:05:00 UTC,auto,hold,761,760,605,AZ,Prescott Valley,0,False,False,False,Gas
3,fd567104c456f96468cde6628db6d0dca4c940c9,2019-07-04 16:35:00 UTC,auto,hold,763,750,610,AZ,Sahuarita,15,False,False,False,Gas
4,c5f9575896ba4d402c5ad4a30d79fce757499378,2019-07-13 17:45:00 UTC,cool,auto,789,800,768,AZ,Phoenix,30,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1508565,1a39154f7267bea2a58b125d47afb78b6db01d6a,2019-07-16 17:15:00 UTC,cool,auto,822,830,760,AZ,Kingman,0,False,False,False,Gas
1508566,1b941760cdf1a4e0d4b887a71a6fa78c86796db0,2019-07-18 16:50:00 UTC,cool,auto,764,760,760,AZ,Oro Valley,10,True,False,True,Electric
1508567,efebef567ff3dd92fbc5816fa35ec010c08de832,2019-07-24 16:25:00 UTC,cool,hold,755,760,760,AZ,Phoenix,10,False,False,False,Gas
1508568,341ded0d08168fcb6d28b91d7640f936dfa91d38,2019-07-11 14:35:00 UTC,cool,hold,764,760,760,AZ,glendale,20,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/AZ/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/AZ-day/2020-jul-day-AZ.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1697e33768e33a3eba2cdd4464849a40f6f3ebcc,2020-07-10 19:30:00 UTC,cool,hold,781,781,781,AZ,Avondale,0,True,False,True,Electric
1,0ea7bcc48ce9a7dfc94e145d807f7d4cbc7e687e,2020-07-18 18:10:00 UTC,auto,hold,763,765,685,AZ,Sedona,29,True,False,False,Gas
2,4488951eac06434a289e3b0133ecbb01c17113f8,2020-07-01 14:40:00 UTC,auto,hold,746,735,685,AZ,Buckeye,0,True,False,True,Electric
3,63c57939a5b432f80fadb8501dcbfaab9c1ed4e2,2020-07-17 13:45:00 UTC,auto,hold,688,700,614,AZ,Sahuarita,15,True,False,False,Gas
4,4b56fe9b3c45ba417c432211405e5cfccbbeb452,2020-07-12 17:25:00 UTC,cool,auto,758,762,672,AZ,Surprise,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1504954,cbff3b7a1a08ff17cfc7b97d6b3eaa0461148f51,2020-07-31 15:35:00 UTC,cool,hold,761,760,760,AZ,Scottsdale,0,False,False,False,Gas
1504955,c6b5a22e0e26e02336b5770e78a2b82597c0cee2,2020-07-10 17:55:00 UTC,cool,auto,772,760,760,AZ,Chandler,10,False,False,True,Electric
1504956,028dd9fd947eb85d85eafe758967d10e3f3f39e8,2020-07-25 19:10:00 UTC,cool,hold,775,760,760,AZ,Tempe,30,False,False,True,Electric
1504957,09f3dca8198990b9d2a91a2a71148338b3fa6a83,2020-07-16 16:50:00 UTC,cool,hold,762,760,760,AZ,Queen Creek,10,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/AZ/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/AZ-day/2021-jul-day-AZ.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0004d19902138a2152dbbaeb5f71d652551f23b7,2021-07-20 14:20:00 UTC,auto,hold,742,745,645,AZ,Tucson,20,False,False,False,Gas
1,80467f30cce2c321e9e05405960ce92a377f38bc,2021-07-01 18:55:00 UTC,cool,hold,766,765,765,AZ,San Tan Valley,10,False,False,False,Gas
2,85677b42fe74484c21b9fe7c7d8188aad018e304,2021-07-23 14:10:00 UTC,auto,hold,786,780,649,AZ,Mesa,39,True,False,False,Gas
3,4ad398cb1b6109ab5b5ffa77ef59959a25d4308f,2021-07-02 19:25:00 UTC,cool,hold,799,800,712,AZ,Surprise,15,False,False,False,Gas
4,3f36cfc86c1f4d9e3aeec7d993e47bd935eee9e0,2021-07-21 16:35:00 UTC,auto,hold,737,730,610,AZ,Phoenix,39,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
881628,98a2ae963be11439b5c112055d0eb7d5ece804ba,2021-07-12 18:20:00 UTC,cool,hold,752,760,760,AZ,Buckeye,17,False,False,False,Gas
881629,f1c616a72d276a71c989687091e2524140078322,2021-07-27 14:55:00 UTC,cool,hold,752,760,760,AZ,Mesa,0,True,False,True,Electric
881630,e8a0700ccb39c5956b4ffeea3e440bd34a9d2dc7,2021-07-20 13:50:00 UTC,cool,hold,763,760,760,AZ,Buckeye,5,False,False,True,Electric
881631,8563313be1404e512485f7a4419744190cd7fc83,2021-07-20 09:00:00 UTC,cool,hold,754,760,760,AZ,Chandler,45,True,False,True,Electric


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/AZ/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AZ/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AZ_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AZ/jul/" + file)
    AZ_jul = pd.concat([AZ_jul, df])
    
AZ_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00ab105be10ba80cca71a5e8d83088116fe17003,jul,2017,cool,auto,Tucson,789.124172,795.200331,780.149007,10.0,False,False,False
1,00ab105be10ba80cca71a5e8d83088116fe17003,jul,2017,cool,hold,Tucson,777.244898,777.530612,777.346939,10.0,False,False,False
2,00abe73a91e4a0012069878eba34b166551cce4d,jul,2017,auto,auto,Tucson,762.216667,756.666667,683.333333,0.0,False,False,True
3,00abe73a91e4a0012069878eba34b166551cce4d,jul,2017,auto,hold,Tucson,765.257048,762.363184,676.230514,0.0,False,False,True
4,01016c682a7be3fa75027c0ebd9edf413cf2459e,jul,2017,cool,hold,Peoria,767.103746,767.233429,766.662824,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1221,fe751bcd59ea39c02069bbc276492daa502ef01d,jul,2021,cool,hold,Gilbert,772.022325,772.220725,709.993682,9.0,False,False,False
1222,ff2a7e846baca4fde6914b30a69e4f6327d76db5,jul,2021,cool,hold,Phoenix,794.257252,798.941221,786.237405,0.0,False,False,False
1223,ffc69aa005f871624e7187d94b86f1d795252fd7,jul,2021,cool,hold,Phoenix,797.165756,794.816029,783.870674,50.0,True,False,True
1224,ffec90658ad5a300da53b43a28fe991808fb4006,jul,2021,auto,hold,Sierra Vista,752.119628,752.822300,680.000000,0.0,False,False,False


In [133]:
AZ_jul.to_csv("Scraper_Output/State_Month_Day/AZ/AZ_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/AZ-day/2017-aug-day-AZ.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b4a23bcbdeda88fe06692dbac3148deaa9871e95,2017-08-20 19:35:00 UTC,auto,hold,790,785,735,AZ,Phoenix,0,False,False,False,Gas
1,6818c05b83307ffaabd0fa8e3f81df9d7afd6d40,2017-08-09 16:10:00 UTC,cool,hold,683,705,705,AZ,Phoenix,70,False,False,True,Electric
2,f432001783f1362a57e97ef3a959e6e3d42b0a70,2017-08-12 16:40:00 UTC,cool,hold,777,765,765,AZ,Phoenix,15,False,False,False,Gas
3,80cd08f9d15226657414f467a0a84c8d6fe9994e,2017-08-13 14:05:00 UTC,cool,hold,789,785,785,AZ,Phoenix,15,False,False,False,Gas
4,b8a96b5d5de04f18d98fd153368b3469c03ba654,2017-08-19 16:05:00 UTC,cool,auto,762,797,667,AZ,Scottsdale,15,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
653621,eea1a8dd2b5f5a31877d9be93f05d3069dd79f2b,2017-08-16 15:55:00 UTC,cool,auto,756,760,760,AZ,Buckeye,10,False,False,True,Electric
653622,3355007b4db5b4b7812ef09d71c3df920c4d99a7,2017-08-11 13:40:00 UTC,cool,hold,758,760,760,AZ,Scottsdale,5,False,False,True,Electric
653623,8569729795b5c3d9a5165c301651884d5a3b1311,2017-08-07 17:05:00 UTC,cool,auto,778,780,760,AZ,Peoria,0,False,False,False,Gas
653624,b69d1addc33fab4713d505e909d7871235d66939,2017-08-14 15:25:00 UTC,cool,hold,751,760,760,AZ,Cave Creek,0,True,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/AZ/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/AZ-day/2018-aug-day-AZ.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1f539e5f414bc936f626b439b92663afd5c1c0dc,2018-08-11 15:40:00 UTC,cool,auto,776,780,715,AZ,Chandler,15,False,False,False,Gas
1,40fe0ad2c29d4fa1f16443a2d1ce30642276805a,2018-08-08 19:15:00 UTC,cool,hold,777,789,789,AZ,Phoenix,0,True,False,False,Gas
3,dcbc9be1ef503fcca9989bf30e24f0bce0688d33,2018-08-04 07:55:00 UTC,cool,hold,742,665,665,AZ,Scottsdale,0,True,False,True,Electric
5,2f1441ae5ba99f821d7ec9eeac1255d41e14827d,2018-08-09 14:10:00 UTC,cool,hold,721,729,729,AZ,Phoenix,39,True,False,True,Electric
6,1d6bdc097e50dcf69a5d14a9fb29fd9cf37e9daf,2018-08-30 18:50:00 UTC,auto,hold,798,804,696,AZ,Gilbert,6,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1085032,ca5cca779687803d7140569bb2b539e113d235a1,2018-08-25 19:15:00 UTC,cool,hold,756,760,760,AZ,Prescott Valley,10,False,False,False,Gas
1085033,8fcf121e649ebf697caa49b3795602008c137558,2018-08-26 16:15:00 UTC,cool,hold,759,760,760,AZ,Tucson,10,False,False,False,Gas
1085034,65e9024cbb0e78abb70a28b31c1ef32c92635b0b,2018-08-20 16:25:00 UTC,cool,hold,755,760,760,AZ,Tucson,15,True,False,False,Gas
1085035,5f37a90ab75318f95f477eceeea7997fbecfe523,2018-08-10 17:35:00 UTC,cool,hold,756,760,760,AZ,Tempe,25,False,False,True,Electric


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/AZ/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/AZ-day/2019-aug-day-AZ.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,615966cb5d0c6b63c9687da35e01a7df502713ca,2019-08-20 15:10:00 UTC,auto,auto,782,785,735,AZ,Tucson,69,False,False,False,Gas
2,b0c61cfce30760006ad4e6375155c98fd14255b1,2019-08-13 13:30:00 UTC,cool,hold,725,725,725,AZ,Tucson,0,False,False,False,Gas
3,ca5cca779687803d7140569bb2b539e113d235a1,2019-08-18 14:50:00 UTC,cool,hold,768,764,764,AZ,Prescott Valley,10,False,False,False,Gas
5,82cec3808a8aec4c1d3a83ac95de458d4a1806e0,2019-08-17 19:40:00 UTC,auto,hold,762,762,712,AZ,Queen Creek,0,False,False,False,Gas
6,12662f413188b006f2769990f117b74c518eca82,2019-08-13 17:05:00 UTC,cool,hold,753,745,745,AZ,Mesa,35,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1488340,a55a645c8eab67fc47ab4926ca485c8f3210d951,2019-08-11 12:05:00 UTC,cool,auto,758,760,760,AZ,Goodyear,9,False,False,False,Gas
1488341,a020371cea6e8a05029f83acce0591a487ed3be5,2019-08-21 18:35:00 UTC,cool,hold,756,760,760,AZ,Mesa,0,False,False,True,Electric
1488342,efebef567ff3dd92fbc5816fa35ec010c08de832,2019-08-09 14:00:00 UTC,cool,hold,759,760,760,AZ,Phoenix,10,False,False,False,Gas
1488343,f1df213f9d1456838cd95bd588fcb5591ebc66ae,2019-08-04 13:55:00 UTC,cool,hold,763,760,760,AZ,Rio Verde,0,False,False,True,Electric


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/AZ/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/AZ-day/2020-aug-day-AZ.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,52e92d3957712db8fd805073e024c90a95817381,2020-08-24 15:00:00 UTC,auto,hold,755,745,695,AZ,Phoenix,50,False,False,True,Electric
1,425ac3626cf6e1ad590f194d5e1f78fba02b16cd,2020-08-26 18:45:00 UTC,cool,auto,792,790,748,AZ,Chandler,10,True,False,False,Gas
2,cbff3b7a1a08ff17cfc7b97d6b3eaa0461148f51,2020-08-24 17:00:00 UTC,cool,auto,777,780,749,AZ,Scottsdale,0,False,False,False,Gas
3,a41499894359eb09406fab6c64e2d9dd7f9cd252,2020-08-26 16:15:00 UTC,cool,hold,766,765,765,AZ,Mesa,0,False,False,True,Electric
4,1f40eb23680da761bcf80f94387541f9241a9f72,2020-08-18 15:05:00 UTC,cool,hold,705,698,698,AZ,Scottsdale,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1534300,341ded0d08168fcb6d28b91d7640f936dfa91d38,2020-08-25 17:55:00 UTC,cool,hold,760,760,760,AZ,glendale,20,False,False,False,Gas
1534301,0e04bc911366aef7ce21b65505d1f6015b181d5a,2020-08-25 16:15:00 UTC,cool,hold,761,760,760,AZ,Phoenix,35,True,False,True,Electric
1534302,c6962c2b095020186e2df5390f5f0e779c0f6e36,2020-08-04 19:05:00 UTC,cool,auto,790,760,760,AZ,Chandler,0,False,False,False,Gas
1534303,bb1191aa00ff520c09a63cecc92aba1bf44072d1,2020-08-06 15:05:00 UTC,cool,hold,759,760,760,AZ,Gilbert,5,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/AZ/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AZ/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AZ_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AZ/aug/" + file)
    AZ_aug = pd.concat([AZ_aug, df])
    
AZ_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00ab105be10ba80cca71a5e8d83088116fe17003,aug,2017,cool,auto,Tucson,785.218147,796.067568,778.753861,10.0,False,False,False
1,00ab105be10ba80cca71a5e8d83088116fe17003,aug,2017,cool,hold,Tucson,781.499365,787.621347,781.158831,10.0,False,False,False
2,00abe73a91e4a0012069878eba34b166551cce4d,aug,2017,auto,auto,Tucson,752.400000,750.000000,680.000000,0.0,False,False,True
3,00abe73a91e4a0012069878eba34b166551cce4d,aug,2017,auto,hold,Tucson,752.829225,749.693662,690.000000,0.0,False,False,True
4,01016c682a7be3fa75027c0ebd9edf413cf2459e,aug,2017,cool,hold,Peoria,777.000000,770.000000,770.000000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2289,ff3e086b5016e9175c0025a60b8f6db90bb1ef6e,aug,2020,auto,hold,Fort Mohave,760.490566,753.600000,685.196226,25.0,False,False,False
2290,ffc69aa005f871624e7187d94b86f1d795252fd7,aug,2020,cool,hold,Phoenix,815.188772,811.806909,790.000000,50.0,True,False,True
2291,ffec90658ad5a300da53b43a28fe991808fb4006,aug,2020,auto,auto,Sierra Vista,766.198135,767.090909,690.000000,0.0,False,False,False
2292,ffec90658ad5a300da53b43a28fe991808fb4006,aug,2020,auto,hold,Sierra Vista,759.710340,760.000000,690.000000,0.0,False,False,False


In [160]:
AZ_aug.to_csv("Scraper_Output/State_Month_Day/AZ/AZ_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/AZ-day/2017-dec-day-AZ.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7fdbd7a865e15e8e491e6750ee9c4117ce9847d7,2017-12-22 15:15:00 UTC,heat,auto,666,754,686,AZ,Phoenix,25,False,False,True,Electric
1,b5fe7a5af6568e6a574f3868c8da0399e656e27d,2017-12-12 17:10:00 UTC,heat,hold,760,755,755,AZ,Vail,15,False,False,False,Gas
3,14936a3e0bf350ef87ccaddad0014c7a4b51da2a,2017-12-30 19:35:00 UTC,cool,auto,683,695,645,AZ,Cave Creek,27,False,False,False,Gas
4,c90b6b66871e2d1835134af14e3d01688d94fd45,2017-12-14 12:35:00 UTC,auto,hold,724,755,705,AZ,Sahuarita,10,False,False,False,Gas
6,6820a043eade62efd830673676f29040e1310722,2017-12-10 18:50:00 UTC,heat,hold,709,741,741,AZ,Cave Creek,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
711486,86e7e6e7c2079a537bba14707aba2d945ebbefaf,2017-12-15 19:35:00 UTC,auto,hold,693,765,665,AZ,Phoenix,40,True,False,True,Electric
711487,2623afd8550dd9ae290a7d6d2f63179d36ecdc9a,2017-12-09 19:05:00 UTC,auto,auto,717,765,715,AZ,Tucson,0,False,False,True,Electric
711488,66a0c5209e2c4c317b9e858613671086fed7f17f,2017-12-29 17:25:00 UTC,auto,auto,713,765,715,AZ,Queen Creek,10,False,False,False,Gas
711489,86e7e6e7c2079a537bba14707aba2d945ebbefaf,2017-12-31 14:55:00 UTC,auto,hold,653,765,665,AZ,Phoenix,40,True,False,True,Electric


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/AZ/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/AZ-day/2018-dec-day-AZ.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,17e47567ce0de68562ebe6ba0aca97d53c37c32f,2018-12-31 18:05:00 UTC,heat,hold,701,705,705,AZ,Flagstaff,10,False,False,False,Gas
1,42625b8bceff1a433465301087b67df701c4c895,2018-12-19 14:00:00 UTC,heat,hold,712,713,713,AZ,Tolleson,15,True,False,True,Electric
2,2d9bff633934542b154b6470e733831a6ab9e239,2018-12-15 18:50:00 UTC,heat,hold,766,756,756,AZ,Mesa,5,False,False,False,Gas
3,92ddb2b34ea659d91151d9f1a781b7132949706a,2018-12-17 14:15:00 UTC,heat,hold,720,719,719,AZ,Mesa,0,False,False,False,Gas
4,8b814b0f15283f86cace65975e000334c433eb2c,2018-12-29 16:05:00 UTC,heat,hold,695,689,689,AZ,Sahuarita,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1034291,9dcd6d16fdc31ab22ec8e482574f92319b29aabb,2018-12-06 13:40:00 UTC,heat,auto,767,760,760,AZ,Phoenix,0,False,False,True,Electric
1034292,31f15af8291f3fb2d23d27f28cae917c4adbb415,2018-12-08 13:50:00 UTC,auto,hold,691,760,690,AZ,Tucson,45,False,False,False,Gas
1034293,2dd19cb5f1aab8aa67ca2411d9710c287b675bc3,2018-12-25 19:55:00 UTC,auto,auto,692,760,680,AZ,Maricopa,10,False,False,False,Gas
1034294,3eaad16e681de383b78d5cbbc7bbb4f5747a710b,2018-12-03 17:20:00 UTC,auto,hold,689,760,690,AZ,Surprise,15,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/AZ/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/AZ-day/2019-dec-day-AZ.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0222630f87433cb6a60e0fe83c9bc2233a842fce,2019-12-05 14:15:00 UTC,heat,hold,749,715,715,AZ,Phoenix,0,True,False,False,Gas
2,4e8f298070a01421704c96522699fcb8831e9626,2019-12-31 14:35:00 UTC,heat,hold,709,761,671,AZ,Phoenix,55,False,False,True,Electric
3,52efa58814c9787b1ead3595ff9082a24aa021cd,2019-12-04 14:15:00 UTC,heat,hold,695,718,700,AZ,Phoenix,70,False,False,False,Gas
4,2cdee45cabae6d78ac89cea0d28a49dd931f1950,2019-12-18 17:55:00 UTC,auto,auto,630,825,630,AZ,Peoria,19,False,False,False,Gas
5,3bb59c839fe174965abfaf5948e014193baa054a,2019-12-15 14:40:00 UTC,auto,hold,700,715,665,AZ,Phoenix,20,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1336147,8b8abd0573b58905f7af953cc6e9c8d8d05f40d4,2019-12-20 18:45:00 UTC,auto,hold,727,765,715,AZ,Phoenix,60,True,False,False,Gas
1336148,8b8abd0573b58905f7af953cc6e9c8d8d05f40d4,2019-12-20 19:05:00 UTC,auto,hold,725,765,715,AZ,Phoenix,60,True,False,False,Gas
1336149,a1fb3a3144cb137c49b18bbadd05fa086c0755e8,2019-12-20 13:00:00 UTC,auto,auto,723,765,715,AZ,Chandler,0,True,False,False,Gas
1336150,a41499894359eb09406fab6c64e2d9dd7f9cd252,2019-12-28 19:45:00 UTC,auto,hold,718,765,715,AZ,Mesa,0,False,False,True,Electric


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/AZ/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/AZ-day/2020-dec-day-AZ.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8b4090a877444b902965cecbc69396cc72ad4df7,2020-12-31 18:30:00 UTC,auto,hold,660,785,625,AZ,Peoria,0,True,False,True,Electric
1,ab044de6713db537806cad24400f9ede54faa142,2020-12-26 14:15:00 UTC,heat,hold,633,640,640,AZ,Sun City,70,True,False,True,Electric
4,36c558f4706dbc66d799758e6e0f082e3395021d,2020-12-03 15:30:00 UTC,cool,auto,619,840,780,AZ,Chandler,25,False,False,False,Gas
5,f66a249909690b7643de98295e65498b72b85ecf,2020-12-26 09:35:00 UTC,heat,hold,753,759,759,AZ,Phoenix,60,False,False,False,Gas
7,ddade16a8259d317804b25a867a487bd7a7ced5a,2020-12-20 07:10:00 UTC,auto,hold,725,833,723,AZ,SCOTTSDALE,35,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1149364,0ac042f6fc949edceac5fb1ecda5a16bc4e45b77,2020-12-16 12:05:00 UTC,heat,hold,761,765,765,AZ,stanfield,10,True,False,True,Electric
1149365,ef00799907125434509b12c9ade2d3f646ae101f,2020-12-22 18:45:00 UTC,heat,hold,718,765,675,AZ,Surprise,19,True,False,False,Gas
1149366,ef00799907125434509b12c9ade2d3f646ae101f,2020-12-14 14:40:00 UTC,auto,hold,697,765,675,AZ,Surprise,19,True,False,False,Gas
1149367,615966cb5d0c6b63c9687da35e01a7df502713ca,2020-12-18 17:00:00 UTC,auto,auto,726,765,715,AZ,Tucson,69,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/AZ/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AZ/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AZ_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AZ/dec/" + file)
    AZ_dec = pd.concat([AZ_dec, df])
    
AZ_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,002e06a167b6c8ac5b691f3555a28c83623973b1,dec,2017,auto,auto,Phoenix,725.176471,780.000000,720.000000,20.0,False,False,True
1,00ab105be10ba80cca71a5e8d83088116fe17003,dec,2017,auto,hold,Tucson,698.125000,840.000000,700.000000,10.0,False,False,False
2,00ab105be10ba80cca71a5e8d83088116fe17003,dec,2017,cool,auto,Tucson,732.200000,823.200000,790.000000,10.0,False,False,False
3,00ab105be10ba80cca71a5e8d83088116fe17003,dec,2017,cool,hold,Tucson,698.615385,820.000000,790.000000,10.0,False,False,False
4,00ab105be10ba80cca71a5e8d83088116fe17003,dec,2017,heat,auto,Tucson,715.833333,720.000000,720.000000,10.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2316,ffec90658ad5a300da53b43a28fe991808fb4006,dec,2020,auto,hold,Sierra Vista,687.376758,770.000000,690.000000,0.0,False,False,False
2317,fffd2d78fc884b609293e46bfc90641926869ebb,dec,2020,auto,auto,Scottsdale,740.996269,792.729478,742.680970,30.0,False,False,False
2318,fffd2d78fc884b609293e46bfc90641926869ebb,dec,2020,auto,hold,Scottsdale,743.658120,796.217949,746.112536,30.0,False,False,False
2319,fffd2d78fc884b609293e46bfc90641926869ebb,dec,2020,heat,hold,Scottsdale,737.888889,737.530864,737.530864,30.0,False,False,False


In [187]:
AZ_dec.to_csv("Scraper_Output/State_Month_Day/AZ/AZ_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/AZ/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AZ_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/AZ/" + file)
    AZ_all = pd.concat([AZ_all, df])
    
AZ_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00ab105be10ba80cca71a5e8d83088116fe17003,aug,2017,cool,auto,Tucson,785.218147,796.067568,778.753861,10.0,False,False,False
1,00ab105be10ba80cca71a5e8d83088116fe17003,aug,2017,cool,hold,Tucson,781.499365,787.621347,781.158831,10.0,False,False,False
2,00abe73a91e4a0012069878eba34b166551cce4d,aug,2017,auto,auto,Tucson,752.400000,750.000000,680.000000,0.0,False,False,True
3,00abe73a91e4a0012069878eba34b166551cce4d,aug,2017,auto,hold,Tucson,752.829225,749.693662,690.000000,0.0,False,False,True
4,01016c682a7be3fa75027c0ebd9edf413cf2459e,aug,2017,cool,hold,Peoria,777.000000,770.000000,770.000000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8793,fe7d60cb7fe777a80cd29f1df2350728a8ac3381,jun,2021,cool,hold,Surprise,747.600000,750.000000,750.000000,0.0,False,False,False
8794,ff2a7e846baca4fde6914b30a69e4f6327d76db5,jun,2021,cool,hold,Phoenix,784.859122,788.730947,783.519630,0.0,False,False,False
8795,ffc69aa005f871624e7187d94b86f1d795252fd7,jun,2021,cool,hold,Phoenix,794.774709,793.793605,786.513081,50.0,True,False,True
8796,ffec90658ad5a300da53b43a28fe991808fb4006,jun,2021,auto,hold,Sierra Vista,756.847215,757.487925,685.766387,0.0,False,False,False


In [190]:
AZ_all.to_csv("Scraper_Output/State_Month_Day/AZ_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mAZe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['AZ']
Unique jan_2018: ['AZ']
Unique jan_2019: ['AZ']
Unique jan_2020: ['AZ']
Unique jan_2021: ['AZ']
Unique feb_2017: ['AZ']
Unique feb_2018: ['AZ']
Unique feb_2019: ['AZ']
Unique feb_2020: ['AZ']
Unique feb_2021: ['AZ']
Unique jun_2017: ['AZ']
Unique jun_2018: ['AZ']
Unique jun_2019: ['AZ']
Unique jun_2020: ['AZ']
Unique jun_2021: ['AZ']
Unique jul_2017: ['AZ']
Unique jul_2018: ['AZ']
Unique jul_2019: ['AZ']
Unique jul_2020: ['AZ']
Unique jul_2021: ['AZ']
Unique aug_2017: ['AZ']
Unique aug_2018: ['AZ']
Unique aug_2019: ['AZ']
Unique aug_2020: ['AZ']
Unique dec_2017: ['AZ']
Unique dec_2018: ['AZ']
Unique dec_2019: ['AZ']
Unique dec_2020: ['AZ']
