# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/CO-day/2017-jan-day-CO.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ae2cd071a6f34fe3d21fceb3640acbdce6829fc6,2017-01-10 14:00:00 UTC,auto,auto,729,780,730,CO,Denver,0,False,False,False,Gas
1,ba5529e75fc6a8d4f97a45563e92ee2bd93e5394,2017-01-11 16:15:00 UTC,heat,hold,673,700,650,CO,Westminster,20,False,False,False,Gas
2,31cd4c451873c9b810cebe94a75575c3a7ea17c1,2017-01-01 19:40:00 UTC,auto,hold,669,760,670,CO,Castle Rock,15,False,False,False,Gas
3,08e7db35844baf02265dfe552f501ef60d0c893f,2017-01-14 17:50:00 UTC,heat,auto,682,690,690,CO,Arvada,50,False,False,False,Gas
4,c795d77dc9ab9523c4836d29be82ef15f1326b23,2017-01-06 07:05:00 UTC,heat,auto,751,750,750,CO,Aurora,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259094,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-01-19 13:30:00 UTC,auto,auto,729,780,640,CO,Denver,15,False,False,False,Gas
259095,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-01-04 13:35:00 UTC,auto,hold,711,770,690,CO,Denver,15,False,False,False,Gas
259096,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-01-30 16:00:00 UTC,auto,auto,741,780,640,CO,Denver,15,False,False,False,Gas
259097,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-01-26 19:55:00 UTC,auto,auto,721,780,640,CO,Denver,15,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
016982393b0ec18a6b02e713f5605fbf7fcf6f81,Jan,2017,auto,auto,Parker,715.564767,775.584197,713.256477,5.0,True,False,False
016982393b0ec18a6b02e713f5605fbf7fcf6f81,Jan,2017,auto,hold,Parker,724.218750,761.390625,705.140625,5.0,True,False,False
016982393b0ec18a6b02e713f5605fbf7fcf6f81,Jan,2017,heat,auto,Parker,716.766304,720.135870,719.336957,5.0,True,False,False
016982393b0ec18a6b02e713f5605fbf7fcf6f81,Jan,2017,heat,hold,Parker,718.000000,745.000000,732.500000,5.0,True,False,False
034ffbf47612caeb78f4f755f2fd86b65cf66b57,Jan,2017,heat,auto,Arvada,614.733333,750.000000,625.533333,50.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fc77ed1ee3eb1255ded0f60582b1c3c0c5c0541b,Jan,2017,heat,hold,Centennial,671.556667,657.633333,652.806667,35.0,False,False,False
fc9666d9a0f2be01389656f8c4341b89400717f2,Jan,2017,heat,hold,Montrose,686.115207,694.092166,694.092166,20.0,False,False,False
fe6c870eb425abd86270a645f41ec05d765acb54,Jan,2017,auto,hold,Colorado Springs,666.571429,770.000000,660.000000,20.0,False,False,False
fe88bc642d13f72e892c1a3b8169eda7d3510ae1,Jan,2017,heat,hold,Lone Tree,677.166667,722.250000,670.000000,25.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/CO/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/CO-day/2018-jan-day-CO.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6aa07bab01e47e1a77a5a7b6b75e1736ded169c8,2018-01-09 16:20:00 UTC,auto,hold,670,725,675,CO,Denver,10,False,False,False,Gas
2,3ff5c70404a01461fd6156acbf663d237cbbebff,2018-01-23 15:05:00 UTC,heat,auto,744,805,755,CO,Denver,85,True,False,False,Gas
3,57ccdc18cbd039088f441efd6ba31f079a2cfc14,2018-01-10 13:55:00 UTC,heat,hold,707,693,693,CO,Colorado Springs,5,False,False,False,Gas
4,375fb18ae7f3fbab62b7501926dedbbec71aa5d0,2018-01-13 19:00:00 UTC,auto,hold,714,769,719,CO,Highlands Ranch,0,False,False,False,Gas
5,c8cccee8dac70a363845bf747fdea2119ed8692e,2018-01-21 14:20:00 UTC,auto,auto,662,715,665,CO,Windsor,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
766957,5232564682d9f48fff02e17ef09fa3a677b0a236,2018-01-11 19:15:00 UTC,auto,auto,735,765,715,CO,Colorado Springs,17,False,False,False,Gas
766958,157e37b055787c86ce4bcc2666bd96342075fc9d,2018-01-16 16:45:00 UTC,auto,auto,712,765,715,CO,Commerce City,0,False,False,False,Gas
766959,293fc3f8f0de547a0fc6541bc4ea8ce54456f8cb,2018-01-14 16:15:00 UTC,auto,auto,714,765,715,CO,Denver,0,False,False,False,Gas
766960,20cafdaef4af276169f30963002b567c432bc4a5,2018-01-15 16:35:00 UTC,heat,auto,707,765,715,CO,COLORADO Springs,15,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/CO/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/CO-day/2019-jan-day-CO.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e2b7e9dc66158712a0c58c6b0d9ea376dce54f40,2019-01-05 18:20:00 UTC,auto,hold,713,755,705,CO,Denver,0,False,False,False,Gas
1,c521ad2a06104de76ae9654403f026541f3066db,2019-01-12 16:45:00 UTC,heat,hold,721,725,725,CO,Denver,40,False,False,False,Gas
2,3ae80c28eedde1217680e8a513b235e2de1b02e2,2019-01-14 14:40:00 UTC,auto,hold,681,745,695,CO,Monument,17,False,False,False,Gas
3,ad941e63c290affc20d5a5186c6ffe2904c3c784,2019-01-01 16:15:00 UTC,heat,hold,754,712,712,CO,Parker,10,True,False,False,Gas
4,e35073652bedf4b4a084b33e6cf4647248236174,2019-01-12 16:50:00 UTC,heat,hold,674,665,665,CO,Highlands Ranch,20,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1135802,f89d08241e022443aa3883681338bd594705e0f6,2019-01-25 14:35:00 UTC,auto,hold,705,765,705,CO,Erie,0,False,False,False,Gas
1135803,8982da59497b19978a1ae9740ae3891c52c1f3d3,2019-01-19 19:45:00 UTC,auto,hold,715,765,715,CO,Denver,97,False,False,False,Gas
1135804,eb3bde41535c67f111401939bdfde60b2c8c0fcc,2019-01-13 19:30:00 UTC,auto,hold,717,765,715,CO,Denver,100,False,False,False,Gas
1135805,f89d08241e022443aa3883681338bd594705e0f6,2019-01-14 19:05:00 UTC,auto,hold,711,765,705,CO,Erie,0,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/CO/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/CO-day/2020-jan-day-CO.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7653e2712209b2b87e509000ff9c11b56cf31835,2020-01-08 16:45:00 UTC,auto,hold,709,755,695,CO,Englewood,0,False,False,False,Gas
1,57ccdc18cbd039088f441efd6ba31f079a2cfc14,2020-01-04 17:25:00 UTC,heat,hold,677,689,689,CO,Colorado Springs,5,False,False,False,Gas
2,7653e2712209b2b87e509000ff9c11b56cf31835,2020-01-03 13:55:00 UTC,auto,hold,692,755,695,CO,Englewood,0,False,False,False,Gas
3,157e37b055787c86ce4bcc2666bd96342075fc9d,2020-01-11 18:05:00 UTC,auto,hold,700,751,701,CO,Commerce City,0,False,False,False,Gas
4,5158d2b639512fa7b8db5c7415b225987f3ea2c3,2020-01-01 16:10:00 UTC,heat,hold,688,752,690,CO,Longmont,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1247772,3fb82519ca32f14babdb28ca2d3fd5129765b38e,2020-01-18 18:50:00 UTC,auto,hold,705,765,705,CO,Littleton,0,False,False,False,Gas
1247773,8fc9a86379f53cc4938d3242e4a773fe71141cb4,2020-01-26 18:05:00 UTC,auto,hold,712,765,715,CO,Fort Collins,55,False,False,False,Gas
1247774,0781deaacb8e7bbc4784a1694378c8e8db943034,2020-01-17 19:20:00 UTC,auto,hold,705,765,705,CO,Northglenn,50,False,False,False,Gas
1247775,9c8876bf7c0dcbc308041d43ea8eb38f823d7c65,2020-01-25 15:40:00 UTC,auto,hold,691,765,695,CO,Castle Rock,17,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/CO/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/CO-day/2021-jan-day-CO.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9945c8106ee07666e1c353969ee8c609410ded67,2021-01-09 17:50:00 UTC,auto,hold,655,656,606,CO,Boulder,0,True,False,True,Electric
1,e1449b21f683b8f65e19436dc7e32f5e26a4dc49,2021-01-11 08:55:00 UTC,heat,hold,728,729,729,CO,Thornton,20,False,False,False,Gas
2,0be3e27868b511314735c56c999edfcd045268e7,2021-01-03 14:15:00 UTC,heat,hold,645,655,655,CO,Colorado Springs,15,True,False,False,Gas
3,d5b6c942200ddd3d73145229f9c76dd24f2cbfb3,2021-01-03 18:10:00 UTC,auto,hold,736,803,753,CO,Louisville,60,False,False,False,Gas
4,b990c12d7bc66fd96991ff07060fc01053efdde7,2021-01-01 17:10:00 UTC,heat,hold,668,678,678,CO,denver,120,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
874960,5f5b79c794562eff36c0ce7fd555c9a9416bdba8,2021-01-29 19:25:00 UTC,auto,hold,680,760,680,CO,Fort Collins,0,False,False,False,Gas
874961,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2021-01-23 14:00:00 UTC,auto,hold,707,760,710,CO,Denver,15,False,False,False,Gas
874962,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2021-01-22 18:15:00 UTC,auto,hold,725,760,710,CO,Denver,15,False,False,False,Gas
874963,66271da88037bd0cea6f8ed6c504f13ccce77bf7,2021-01-01 16:55:00 UTC,auto,hold,680,760,680,CO,Arvada,40,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/CO/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/CO/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
CO_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/CO/jan/" + file)
    CO_jan = pd.concat([CO_jan, df])
    
CO_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,016982393b0ec18a6b02e713f5605fbf7fcf6f81,Jan,2017,auto,auto,Parker,715.564767,775.584197,713.256477,5.0,True,False,False
1,016982393b0ec18a6b02e713f5605fbf7fcf6f81,Jan,2017,auto,hold,Parker,724.218750,761.390625,705.140625,5.0,True,False,False
2,016982393b0ec18a6b02e713f5605fbf7fcf6f81,Jan,2017,heat,auto,Parker,716.766304,720.135870,719.336957,5.0,True,False,False
3,016982393b0ec18a6b02e713f5605fbf7fcf6f81,Jan,2017,heat,hold,Parker,718.000000,745.000000,732.500000,5.0,True,False,False
4,034ffbf47612caeb78f4f755f2fd86b65cf66b57,Jan,2017,heat,auto,Arvada,614.733333,750.000000,625.533333,50.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1193,ff8263254ab986ac6a02c06ba1ebba7435d71dc6,Jan,2021,heat,hold,Lone Tree,688.500000,690.472222,690.472222,9.0,False,False,False
1194,ff8c6d458a900bf33496d221452e1983c646e110,Jan,2021,heat,hold,Lafayette,697.541463,700.243902,699.902439,7.0,False,False,False
1195,ffb55cc3e9d7346710fa8ede22ee3c7cde217e3d,Jan,2021,heat,hold,Centennial,693.920635,700.000000,700.000000,0.0,False,False,False
1196,ffd9498de3ffb2737be1fc9d662ae9ab116d5f1c,Jan,2021,heat,hold,Commerce City,725.094977,730.653425,730.653425,30.0,False,False,False


In [34]:
CO_jan.to_csv("Scraper_Output/State_Month_Day/CO/CO_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/CO-day/2017-feb-day-CO.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,af31414aa7695640b0d8f55bf759ae584a146566,2017-02-18 14:30:00 UTC,auto,auto,650,750,630,CO,Littleton,30,False,False,False,Gas
1,2e6fc6485a17cee206131dca8aac3c1d7e1a8223,2017-02-20 19:20:00 UTC,heat,hold,724,720,720,CO,Highlands Ranch,20,True,False,False,Gas
2,846e4eb2a5ebda3e002f05ce348b07fa270856f8,2017-02-04 15:10:00 UTC,heat,auto,639,650,620,CO,Denver,5,False,False,False,Gas
3,91029bfb1aa7c2838144d11212fa1601adebee04,2017-02-16 18:00:00 UTC,auto,hold,654,740,630,CO,Fort Collins,5,False,False,False,Gas
4,296b434d8fdf37d9294d42c9754317a4ecff808a,2017-02-11 16:15:00 UTC,heat,hold,673,660,660,CO,Superior,20,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
250070,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-02-08 15:45:00 UTC,auto,auto,744,770,640,CO,Denver,15,False,False,False,Gas
250071,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-02-10 16:00:00 UTC,auto,auto,748,770,640,CO,Denver,15,False,False,False,Gas
250072,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-02-13 19:30:00 UTC,auto,hold,735,710,640,CO,Denver,15,False,False,False,Gas
250073,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-02-09 17:25:00 UTC,auto,auto,721,770,640,CO,Denver,15,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/CO/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/CO-day/2018-feb-day-CO.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,3b1ebc06b71971e032202af59688eca0f52888b4,2018-02-15 16:00:00 UTC,heat,hold,648,654,654,CO,Copper Mountain,0,False,False,False,Gas
2,3b1ebc06b71971e032202af59688eca0f52888b4,2018-02-12 17:30:00 UTC,heat,hold,666,654,654,CO,Copper Mountain,0,False,False,False,Gas
3,7f68180362dd886a7f1e14499725f16ef50e1c5f,2018-02-22 16:25:00 UTC,auto,hold,685,705,655,CO,Denver,10,False,False,True,Electric
4,7c6690e4cedf1e81f8d2f9768eb3bd8a1414daf9,2018-02-01 14:20:00 UTC,heat,hold,683,686,686,CO,Centennial,36,False,False,False,Gas
6,768f465486a22a2bfd1c6daf71b3c6cbc4ea3131,2018-02-02 14:55:00 UTC,heat,hold,712,709,709,CO,Grand Junction,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
742999,b59d7105ba6697b28687a0e083823119c655a993,2018-02-06 18:05:00 UTC,auto,hold,711,765,715,CO,Castle Rock,5,False,False,False,Gas
743000,b59d7105ba6697b28687a0e083823119c655a993,2018-02-07 17:30:00 UTC,auto,hold,712,765,715,CO,Castle Rock,5,False,False,False,Gas
743001,20cafdaef4af276169f30963002b567c432bc4a5,2018-02-23 15:40:00 UTC,heat,auto,707,765,715,CO,COLORADO Springs,15,False,False,False,Gas
743002,52c8ef28537fb6d1df295a52854e74543879f67f,2018-02-10 16:15:00 UTC,auto,hold,699,765,705,CO,Boulder,25,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/CO/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/CO-day/2019-feb-day-CO.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3c5070a9f9922202e9ec38b7c5d3ab192fbe54b5,2019-02-22 18:00:00 UTC,auto,auto,731,830,730,CO,Broomfield,0,False,False,False,Gas
2,75d4005fb369491c5c0f61b7977b8badaeb10d76,2019-02-21 14:55:00 UTC,auto,hold,707,835,715,CO,Colorado Springs,0,False,False,False,Gas
3,80fc7817bffe4f469288e73953891e4e89cf5c06,2019-02-28 15:10:00 UTC,heat,hold,682,685,685,CO,Wellington,0,False,False,False,Gas
4,8b1060368fef23cb7b9b77af023f00c0367f60c3,2019-02-26 14:30:00 UTC,heat,hold,742,745,745,CO,Denver,37,False,False,False,Gas
5,12990e0a0ebe1a62ef2aff5a425bb516df47d289,2019-02-25 15:15:00 UTC,heat,hold,746,745,745,CO,Denvee,37,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
767123,f89d08241e022443aa3883681338bd594705e0f6,2019-02-03 16:15:00 UTC,auto,hold,696,765,705,CO,Erie,0,False,False,False,Gas
767124,ac016e4d0796fe55478b81e36dea0cc9e158ed51,2019-02-08 15:25:00 UTC,auto,hold,717,765,715,CO,Colorado Springs,40,False,False,False,Gas
767125,8982da59497b19978a1ae9740ae3891c52c1f3d3,2019-02-03 17:10:00 UTC,auto,hold,717,765,715,CO,Denver,97,False,False,False,Gas
767126,2e30b88c039763b1eecb0d81f75fc327c121976b,2019-02-10 16:05:00 UTC,heat,hold,693,765,695,CO,Broomfield,20,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/CO/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/CO-day/2020-feb-day-CO.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,157e37b055787c86ce4bcc2666bd96342075fc9d,2020-02-09 19:50:00 UTC,auto,hold,705,751,701,CO,Commerce City,0,False,False,False,Gas
2,d36b14f84a0a1e08fff68b8eabbbe129994861bc,2020-02-06 15:05:00 UTC,auxHeatOnly,hold,713,716,716,CO,Severance,0,True,False,False,Gas
3,8fc9a86379f53cc4938d3242e4a773fe71141cb4,2020-02-18 16:50:00 UTC,auto,hold,712,765,715,CO,Fort Collins,55,False,False,False,Gas
4,8fc9a86379f53cc4938d3242e4a773fe71141cb4,2020-02-20 19:15:00 UTC,auto,hold,709,765,715,CO,Fort Collins,55,False,False,False,Gas
5,0813f368bb3126279084e545c546f4ff045a82df,2020-02-26 16:05:00 UTC,heat,auto,714,795,719,CO,Highlands Ranch,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1135310,234cd146881e1b2d95e3c5ec15a24e50e236f41f,2020-02-05 15:05:00 UTC,auto,hold,708,760,710,CO,Parker,20,True,False,False,Gas
1135311,861e32264478b2f23b7e87f999ef98355f88c611,2020-02-14 17:30:00 UTC,auto,hold,680,760,690,CO,Aurora,38,False,False,False,Gas
1135312,a56577e5028cf2e9114ea7da1a79758c5c3d5702,2020-02-11 17:45:00 UTC,auto,auto,697,760,690,CO,Denver,10,False,False,False,Gas
1135313,3fb82519ca32f14babdb28ca2d3fd5129765b38e,2020-02-07 14:15:00 UTC,auto,auto,709,760,710,CO,Littleton,0,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/CO/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/CO-day/2021-feb-day-CO.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9945c8106ee07666e1c353969ee8c609410ded67,2021-02-16 15:45:00 UTC,auto,hold,672,726,646,CO,Boulder,0,True,False,True,Electric
2,7972c8f9754f635bccb50fcde95f0042c093c3ee,2021-02-13 10:55:00 UTC,auto,hold,688,742,692,CO,Denver,15,False,False,False,Gas
3,a77e896073bd09dc735ee035c45e89607e394493,2021-02-08 17:15:00 UTC,auto,hold,779,835,780,CO,Lakewood,40,True,False,False,Gas
4,76abae418228be16b46223453e9b6ee7ffc91ea3,2021-02-03 17:10:00 UTC,heat,hold,640,640,640,CO,Centennial,50,False,False,False,Gas
5,1f147db125333c258d255d0fa5b514490a1f99d6,2021-02-14 19:25:00 UTC,auto,hold,720,795,715,CO,Severance,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
775852,dc02403e74ea3666a668b9f4d2e9d2fe42787cfb,2021-02-08 17:45:00 UTC,auto,hold,723,760,720,CO,Thornton,10,False,False,False,Gas
775853,e86501a553fb27b7a0f09e6e09d855cd04d99589,2021-02-23 12:10:00 UTC,auto,hold,703,760,710,CO,Arvada,60,False,False,False,Gas
775854,f1f438a7259c87f9e85b155b9d687d9c7f77f295,2021-02-03 12:00:00 UTC,auto,hold,689,760,690,CO,Fort Collins,0,False,False,False,Gas
775855,6b49ba1dbc86b923907ccf03ecf0deca6bf8b10a,2021-02-14 14:20:00 UTC,heat,hold,750,760,760,CO,Old North Boulder,9,True,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/CO/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/CO/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
CO_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/CO/feb/" + file)
    CO_feb = pd.concat([CO_feb, df])
    
CO_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,011a84668067e31b514a0238be88622798e2d625,feb,2017,auto,auto,Colorado Springs,667.473684,759.473684,680.000000,40.0,False,False,False
1,011a84668067e31b514a0238be88622798e2d625,feb,2017,auto,hold,Colorado Springs,693.441176,765.411765,706.882353,40.0,False,False,False
2,016982393b0ec18a6b02e713f5605fbf7fcf6f81,feb,2017,auto,auto,Parker,711.706827,758.975904,704.293173,5.0,True,False,False
3,016982393b0ec18a6b02e713f5605fbf7fcf6f81,feb,2017,auto,hold,Parker,723.504348,768.717391,713.269565,5.0,True,False,False
4,02ace11b429b2dd91e15d00e95e74e8de4390f44,feb,2017,auto,auto,Aurora,773.571429,767.571429,710.000000,10.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1223,ff82364006968dacbbc4275cf59704c29696d664,feb,2021,heat,hold,Thornton,666.664706,670.000000,670.029412,0.0,False,False,False
1224,ff8263254ab986ac6a02c06ba1ebba7435d71dc6,feb,2021,heat,hold,Lone Tree,672.071429,650.000000,630.000000,9.0,False,False,False
1225,ff8c6d458a900bf33496d221452e1983c646e110,feb,2021,heat,hold,Lafayette,697.639731,700.040404,699.983165,7.0,False,False,False
1226,ffd9498de3ffb2737be1fc9d662ae9ab116d5f1c,feb,2021,heat,hold,Commerce City,727.397856,733.423251,733.423251,30.0,False,False,False


In [67]:
CO_feb.to_csv("Scraper_Output/State_Month_Day/CO/CO_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/CO-day/2017-jun-day-CO.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,89a264a7320e2b3cbcd18a058928ae3f3d8fdf7d,2017-06-12 19:15:00 UTC,auto,hold,739,780,620,CO,Denver,60,False,False,False,Gas
1,5ce4cce8fc3c690ebe1b3e539928a54bcc0df5bb,2017-06-18 18:15:00 UTC,cool,hold,730,730,730,CO,Denver,120,False,False,False,Gas
2,ae8a7e5154849580ddeca5748b35b291bc114ec0,2017-06-04 16:05:00 UTC,auto,hold,737,730,700,CO,Loveland,15,False,False,False,Gas
3,8da3f9fe55ec2a2995adb98eca688817a07c08d3,2017-06-27 17:25:00 UTC,cool,hold,733,740,740,CO,Akron,0,False,False,False,Gas
4,c274982f373f3646faec5a3c007b8db9e5b27248,2017-06-23 11:45:00 UTC,cool,auto,713,730,680,CO,,30,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
433028,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-06-15 18:10:00 UTC,cool,hold,738,740,740,CO,Denver,15,False,False,False,Gas
433029,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-06-05 19:45:00 UTC,cool,hold,715,760,760,CO,Denver,15,False,False,False,Gas
433030,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-06-07 16:35:00 UTC,cool,hold,728,740,740,CO,Denver,15,False,False,False,Gas
433031,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-06-12 14:55:00 UTC,cool,hold,717,710,710,CO,Denver,15,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/CO/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/CO-day/2018-jun-day-CO.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
5,b9e811b53f220e4a1c2f07768ddae4cfb4709083,2018-06-21 15:15:00 UTC,auto,hold,708,735,625,CO,Westminster,40,False,False,False,Gas
6,e202025138582880a6d91a44a34c791884a7f58f,2018-06-29 19:45:00 UTC,cool,hold,723,715,715,CO,Colorado Springs,10,True,False,False,Gas
7,d2345a5875d165e2172f946919ee209226c856da,2018-06-30 17:15:00 UTC,cool,auto,754,750,705,CO,Lakewood,67,False,False,False,Gas
8,20cafdaef4af276169f30963002b567c432bc4a5,2018-06-11 15:30:00 UTC,auto,auto,737,745,695,CO,COLORADO Springs,15,False,False,False,Gas
9,75c88d2331184ca8af3e66c1a4db61b7d1dfdcdb,2018-06-11 17:40:00 UTC,cool,hold,722,780,744,CO,Fort Collins,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
896226,29c18b48b5629b2f1454ee7fc07d3abd8455be94,2018-06-23 16:15:00 UTC,cool,hold,760,760,760,CO,Erie,0,False,False,False,Gas
896227,8b09f0dedbd06518b0d087283128ee983f12b528,2018-06-23 13:55:00 UTC,cool,hold,720,760,760,CO,Arvada,0,False,False,False,Gas
896228,ae2cd071a6f34fe3d21fceb3640acbdce6829fc6,2018-06-04 17:40:00 UTC,cool,hold,771,760,760,CO,Denver,0,False,False,False,Gas
896229,6285646c5e68ebb133fd5ea384abe7882a749a90,2018-06-27 13:35:00 UTC,cool,hold,708,760,760,CO,Littleton,25,True,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/CO/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/CO-day/2019-jun-day-CO.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d06849c7fd270e16e5187347d13f5df0e5c8140f,2019-06-24 15:55:00 UTC,heat,hold,706,705,705,CO,Colorado Springs,0,True,False,False,Gas
1,a30cb3490bdb954fe59e729493a9d59a49468d51,2019-06-24 13:25:00 UTC,heat,auto,679,732,657,CO,Arvada,0,True,False,False,Gas
2,713c6b8892746f216272c16af61b2dcd7b789b09,2019-06-30 17:25:00 UTC,cool,hold,750,735,735,CO,Northglenn,69,True,False,False,Gas
3,c521ad2a06104de76ae9654403f026541f3066db,2019-06-22 14:55:00 UTC,cool,hold,715,743,743,CO,Denver,40,False,False,False,Gas
4,5f596ec61ae4fa2466ad1a79c3f981b326ff038a,2019-06-03 12:45:00 UTC,cool,hold,704,735,735,CO,Greeley,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1149817,ae2cd071a6f34fe3d21fceb3640acbdce6829fc6,2019-06-26 16:10:00 UTC,cool,hold,739,760,760,CO,Denver,0,False,False,False,Gas
1149818,dd06ebcbac51dc4f9a80a0edbdfd4ce1254c00e7,2019-06-15 13:00:00 UTC,cool,hold,746,760,760,CO,Colorado Springs,10,False,False,False,Gas
1149819,f9beaa27198f80fd7c090f20dab3b682e487a4d8,2019-06-14 16:00:00 UTC,cool,auto,768,760,760,CO,Lakewood,0,False,False,False,Gas
1149820,dd06ebcbac51dc4f9a80a0edbdfd4ce1254c00e7,2019-06-18 19:20:00 UTC,cool,hold,715,760,760,CO,Colorado Springs,10,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/CO/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/CO-day/2020-jun-day-CO.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,12895ef724576e9a3bf5b494f1661b7f7a3b75c7,2020-06-28 14:40:00 UTC,cool,hold,681,626,626,CO,Centennial,10,False,False,False,Gas
1,932940394add1dc3c6db6735c97631389f3f8889,2020-06-16 18:35:00 UTC,cool,hold,769,775,775,CO,Denver,7,False,False,False,Gas
2,0acb6bd44fe47dc9066fb8c4907b43a54fca9b03,2020-06-15 16:15:00 UTC,cool,hold,703,728,728,CO,Louisville,9,False,False,False,Gas
3,5f596ec61ae4fa2466ad1a79c3f981b326ff038a,2020-06-02 13:30:00 UTC,cool,hold,719,717,717,CO,Greeley,10,False,False,False,Gas
4,b0315e8b40dda659ca78e03f6e5a6efdb05ec32e,2020-06-29 17:25:00 UTC,cool,hold,748,775,775,CO,Denver,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1130524,41d3262dee2937026ff8c195aeea7d48de6e13b9,2020-06-29 18:40:00 UTC,cool,auto,758,760,760,CO,Englewood,10,False,False,False,Gas
1130525,69fbcca1ab34f833df73a423cf4f4b2e9d65fe7d,2020-06-23 13:30:00 UTC,cool,auto,725,740,760,CO,Aurora,0,True,False,False,Gas
1130526,219f8df11d3dd301b596b9788197f86823452acc,2020-06-01 18:00:00 UTC,cool,hold,746,760,760,CO,Lafayette,30,False,False,False,Gas
1130527,102b02f61483ca4cca672ba7e3388c5456d52477,2020-06-01 18:05:00 UTC,cool,hold,752,760,760,CO,Aurora CO,10,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/CO/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/CO-day/2021-jun-day-CO.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7ff94e6a3804570556ea49b225b6d14ef2377a79,2021-06-23 16:50:00 UTC,cool,hold,751,752,752,CO,Laporte,0,False,False,True,Electric
1,0dcc98ae0ec0c13231c8799f8b4991bed842c5d0,2021-06-21 17:30:00 UTC,auto,hold,773,780,618,CO,Aurora,9,True,False,False,Gas
2,974fc3fdc6b4ab58393dfc464f6a4fa181e240fd,2021-06-11 19:25:00 UTC,cool,hold,767,722,722,CO,Denver,90,False,False,False,Gas
3,1e75670ba428a948824ea3b58b6d56e9700711fa,2021-06-25 16:15:00 UTC,cool,hold,685,740,719,CO,Aurora,40,True,False,False,Gas
6,29bfbd4b81d97b2ade9cfa5f4c40fe67e8f3bf02,2021-06-08 14:20:00 UTC,auto,hold,678,677,627,CO,Denver,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
775178,8fc9a86379f53cc4938d3242e4a773fe71141cb4,2021-06-11 16:50:00 UTC,cool,hold,746,760,760,CO,Fort Collins,55,False,False,False,Gas
775179,a66197012f4860cb46b1cbe0dd69a12229f09dfa,2021-06-09 11:50:00 UTC,cool,hold,701,760,760,CO,Fort Collins,20,True,False,False,Gas
775180,932f76dbfa9b55ba08a2b2cbd4e84d350e0a9aa7,2021-06-16 15:15:00 UTC,cool,hold,758,760,760,CO,Ft Collins,120,False,False,False,Gas
775181,846e4eb2a5ebda3e002f05ce348b07fa270856f8,2021-06-12 13:05:00 UTC,cool,hold,764,799,760,CO,Denver,5,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/CO/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/CO/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
CO_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/CO/jun/" + file)
    CO_jun = pd.concat([CO_jun, df])
    
CO_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004666c8518763d5ff5a1b6f9be919899724167e,jun,2017,auto,hold,Denver,766.115385,826.867521,712.631054,10.0,False,False,False
1,004666c8518763d5ff5a1b6f9be919899724167e,jun,2017,cool,hold,Denver,742.849398,737.012048,736.656627,10.0,False,False,False
2,00b9d14bcf27ff6d6e0c30c6f60651c6c8c9ae8b,jun,2017,auto,hold,Aurora,715.142857,691.285714,641.285714,17.0,False,False,False
3,011a84668067e31b514a0238be88622798e2d625,jun,2017,auto,auto,Colorado Springs,703.000000,812.000000,635.000000,40.0,False,False,False
4,016982393b0ec18a6b02e713f5605fbf7fcf6f81,jun,2017,auto,auto,Parker,714.558824,742.161765,691.661765,5.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1366,ffb55cc3e9d7346710fa8ede22ee3c7cde217e3d,jun,2021,cool,hold,Centennial,714.593857,740.000000,743.071672,0.0,False,False,False
1367,ffd9498de3ffb2737be1fc9d662ae9ab116d5f1c,jun,2021,auto,hold,Commerce City,717.909710,717.913118,648.219761,30.0,False,False,False
1368,ffd9498de3ffb2737be1fc9d662ae9ab116d5f1c,jun,2021,cool,hold,Commerce City,720.166667,730.000000,730.000000,30.0,False,False,False
1369,ffd9498de3ffb2737be1fc9d662ae9ab116d5f1c,jun,2021,heat,hold,Commerce City,729.673469,735.775510,733.020408,30.0,False,False,False


In [100]:
CO_jun.to_csv("Scraper_Output/State_Month_Day/CO/CO_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/CO-day/2017-jul-day-CO.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e1449b21f683b8f65e19436dc7e32f5e26a4dc49,2017-07-26 13:05:00 UTC,heat,hold,711,700,700,CO,Thornton,20,False,False,False,Gas
1,f99d5effcb9641c6ca74357fa3a9709315d74a06,2017-07-12 17:00:00 UTC,auto,hold,708,710,660,CO,Denver,80,False,False,False,Gas
2,f0cc4d540f8cd6165f545e7d1529273f3fa4279c,2017-07-07 12:40:00 UTC,auto,hold,723,720,660,CO,Berthoud,20,True,False,False,Gas
3,c274982f373f3646faec5a3c007b8db9e5b27248,2017-07-30 17:15:00 UTC,cool,auto,721,720,620,CO,,30,True,False,False,Gas
4,f4eeab77ed5795d6356fe13f04c7ebd269c068d1,2017-07-23 15:25:00 UTC,cool,auto,778,790,670,CO,Boulder,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
516074,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-07-30 16:25:00 UTC,cool,hold,721,720,720,CO,Denver,15,False,False,False,Gas
516075,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-07-15 15:10:00 UTC,cool,auto,736,740,720,CO,Denver,15,False,False,False,Gas
516076,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-07-21 13:00:00 UTC,cool,auto,753,750,740,CO,Denver,15,False,False,False,Gas
516077,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-07-13 19:40:00 UTC,cool,auto,739,740,720,CO,Denver,15,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/CO/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/CO-day/2018-jul-day-CO.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,95da450b1ac1694e06eafce0e299275bf80f0542,2018-07-27 16:40:00 UTC,cool,hold,681,689,644,CO,Aurora,27,False,False,False,Gas
1,7506e48ea50d667054af93baa5e4726649e5af61,2018-07-27 16:40:00 UTC,cool,hold,698,695,695,CO,Littleton,18,False,False,False,Gas
2,f3c73dfbbf31be193d384e361b53664180fd6e74,2018-07-27 09:50:00 UTC,cool,hold,735,735,735,CO,Littleton,30,False,False,False,Gas
3,eb3bde41535c67f111401939bdfde60b2c8c0fcc,2018-07-14 16:30:00 UTC,auto,auto,719,716,666,CO,Denver,100,False,False,False,Gas
5,619484bcc6ee0fe7a4c230842c1a72fb8c8fcc15,2018-07-15 18:30:00 UTC,cool,hold,705,687,687,CO,Peyton,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1028115,23462a9967937962bb321cc2e1e1e86191d35eca,2018-07-02 14:15:00 UTC,cool,hold,711,760,760,CO,Lyons,40,False,False,False,Gas
1028116,f4d0cf604042a1639abbceb8b6487efeb7ae1e55,2018-07-22 11:15:00 UTC,cool,auto,698,700,760,CO,Westminster,7,False,False,False,Gas
1028117,c867af67bdab18f5db45dd082f4b1cd8d0192f94,2018-07-27 11:40:00 UTC,cool,auto,654,760,760,CO,Byers,10,False,False,False,Gas
1028118,38352831f01f61780948bb690a2cddd6058acb33,2018-07-19 14:50:00 UTC,cool,hold,741,760,760,CO,Colorado Springs,18,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/CO/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/CO-day/2019-jul-day-CO.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ba86efbc5d0e4415dffa12d3b8fe2d3ae2391ed9,2019-07-31 16:55:00 UTC,auto,auto,733,730,661,CO,Lafayette,35,True,False,False,Gas
1,3b3509c7f93f94ffce9d6df8baf94febf02aa52b,2019-07-01 18:50:00 UTC,auto,hold,751,749,669,CO,Colorado Springs,40,False,False,False,Gas
2,77d480928324ebfa0f68ecf645ca71573171f7c8,2019-07-11 18:05:00 UTC,cool,hold,808,765,765,CO,DENVER,0,True,False,False,Gas
3,9c62be219186aca5218ab2485c702eb01e25f0ee,2019-07-08 18:05:00 UTC,auto,hold,703,705,655,CO,Brighton,25,False,False,False,Gas
4,32b31051afd7f0c6d2204311e7a187166093d1e6,2019-07-14 19:30:00 UTC,cool,hold,764,735,735,CO,Fort Collins,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1332827,e7666a4a4543089b43613f01488377b6297fd550,2019-07-15 15:10:00 UTC,cool,hold,747,760,760,CO,Aurora,5,False,False,False,Gas
1332828,b1c2b78865e4eab8fc6b944f71211376ecb84751,2019-07-26 13:10:00 UTC,cool,hold,751,760,760,CO,Parker,0,False,False,False,Gas
1332829,cec14530de6b11174f723a9675541ed9d8695b70,2019-07-11 10:40:00 UTC,cool,auto,728,730,760,CO,strasburg,5,False,False,False,Gas
1332830,1e70387aadb141e34fd58c2a953ceab3fba1f6c9,2019-07-01 16:35:00 UTC,cool,hold,734,760,760,CO,Denver,0,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/CO/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/CO-day/2020-jul-day-CO.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,af5bc365b128dd28aa1550aa1a6ce48d68c4c462,2020-07-20 13:15:00 UTC,auto,hold,735,734,608,CO,Colorado Springs,38,False,False,False,Gas
1,b2a556e5d07057de55c0a18b5850d95ebd919b58,2020-07-27 15:15:00 UTC,cool,hold,763,765,765,CO,Lakewood,0,True,False,False,Gas
2,2941825965ef2d8014db2c1c3f5e54bdcecb1080,2020-07-07 12:25:00 UTC,cool,hold,770,770,743,CO,Thornton,59,False,False,False,Gas
3,b2a556e5d07057de55c0a18b5850d95ebd919b58,2020-07-22 18:25:00 UTC,cool,hold,753,755,755,CO,Lakewood,0,True,False,False,Gas
4,4f311556926edede8fe1bba64ee4653eda0217fc,2020-07-28 19:30:00 UTC,cool,auto,754,760,744,CO,Lochbuie,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1221180,a66197012f4860cb46b1cbe0dd69a12229f09dfa,2020-07-31 19:20:00 UTC,cool,auto,743,760,760,CO,Fort Collins,20,True,False,False,Gas
1221181,ea9c3f4deaa210b5c30837d152a65329860d1d88,2020-07-28 13:45:00 UTC,cool,hold,739,760,760,CO,Denver,15,False,False,False,Gas
1221182,040251e913e4e6bd909b8dcff677b57c40c90f01,2020-07-20 18:15:00 UTC,cool,hold,760,760,760,CO,Thornton,35,False,False,False,Gas
1221183,0be3e27868b511314735c56c999edfcd045268e7,2020-07-06 16:30:00 UTC,cool,auto,728,760,760,CO,Colorado Springs,15,True,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/CO/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/CO-day/2021-jul-day-CO.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a77e896073bd09dc735ee035c45e89607e394493,2021-07-04 13:55:00 UTC,cool,hold,780,765,765,CO,Lakewood,40,True,False,False,Gas
1,19630f684326a6ba140dc33d5bfe6963eb6e6373,2021-07-16 14:20:00 UTC,heat,hold,772,650,608,CO,Denver,0,True,False,False,Gas
2,157e37b055787c86ce4bcc2666bd96342075fc9d,2021-07-22 14:20:00 UTC,cool,hold,699,699,699,CO,Commerce City,0,False,False,False,Gas
4,1b766ddf64f4e68b13d10de91543f48d9d5cb09d,2021-07-29 11:55:00 UTC,auto,hold,704,752,702,CO,Denver,5,False,False,False,Gas
5,3cc2c4516ee0e05e958a9ca4bbd511911c0669df,2021-07-02 18:35:00 UTC,auto,hold,661,655,605,CO,Longmont,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
772022,3864696454dc66dc5ffe11e69320caad65228830,2021-07-23 18:55:00 UTC,cool,hold,778,760,760,CO,Denver,90,False,False,False,Gas
772023,69fbcca1ab34f833df73a423cf4f4b2e9d65fe7d,2021-07-12 15:20:00 UTC,cool,hold,754,760,760,CO,Aurora,0,True,False,False,Gas
772024,077dc972deffea9bbda546c96917ad1a5a4d231a,2021-07-29 13:30:00 UTC,cool,hold,747,760,760,CO,Arvada,50,True,False,False,Gas
772025,69fbcca1ab34f833df73a423cf4f4b2e9d65fe7d,2021-07-12 17:35:00 UTC,cool,hold,761,760,760,CO,Aurora,0,True,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/CO/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/CO/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
CO_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/CO/jul/" + file)
    CO_jul = pd.concat([CO_jul, df])
    
CO_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004666c8518763d5ff5a1b6f9be919899724167e,jul,2017,auto,auto,Denver,741.444444,740.000000,690.000000,10.0,False,False,False
1,004666c8518763d5ff5a1b6f9be919899724167e,jul,2017,auto,hold,Denver,761.559585,762.829016,690.000000,10.0,False,False,False
2,00b9d14bcf27ff6d6e0c30c6f60651c6c8c9ae8b,jul,2017,auto,auto,Aurora,701.000000,694.000000,644.000000,17.0,False,False,False
3,011a84668067e31b514a0238be88622798e2d625,jul,2017,auto,auto,Colorado Springs,719.000000,759.000000,678.000000,40.0,False,False,False
4,016982393b0ec18a6b02e713f5605fbf7fcf6f81,jul,2017,auto,auto,Parker,737.669197,746.504338,671.434924,5.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1133,fe9fcaf3856033484966afc17ccbd95961236999,jul,2021,cool,hold,Wheat Ridge,747.053191,743.382979,742.659574,70.0,False,False,False
1134,feaa24f9ac1e6aad67a9b38fa1e2bd33dfd972ef,jul,2021,cool,hold,Aurora,765.206511,772.080366,761.268566,30.0,False,False,False
1135,ff82364006968dacbbc4275cf59704c29696d664,jul,2021,cool,hold,Thornton,750.605096,746.315287,746.315287,0.0,False,False,False
1136,ff8c6d458a900bf33496d221452e1983c646e110,jul,2021,auto,hold,Lafayette,742.570423,739.542254,678.589789,7.0,False,False,False


In [133]:
CO_jul.to_csv("Scraper_Output/State_Month_Day/CO/CO_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/CO-day/2017-aug-day-CO.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,dc988c2fb26d817cb34bc485a8e8d586e60c7e82,2017-08-04 14:50:00 UTC,cool,hold,731,760,760,CO,Aurora,20,False,False,False,Gas
1,e0a35d649a3a0ebc183d44089c4098fe4c5ab4c1,2017-08-02 17:15:00 UTC,cool,hold,774,770,770,CO,Greeley,45,False,False,False,Gas
2,fa1b024e4e341f97ab5bac95d4d8e7dba58a5fc5,2017-08-06 14:30:00 UTC,cool,auto,721,730,760,CO,Arvada,7,True,False,False,Gas
3,90be3476fd7781a7019e7c7613be3ad3687a5b3d,2017-08-09 15:00:00 UTC,cool,hold,691,690,690,CO,Aspen,20,False,False,False,Gas
4,d35fb7d89bd15e31ec7f225f196e3999d5007dbe,2017-08-27 12:00:00 UTC,cool,hold,721,760,760,CO,Aurora,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
515880,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-08-26 19:30:00 UTC,cool,auto,738,780,750,CO,Denver,15,False,False,False,Gas
515881,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-08-27 14:40:00 UTC,cool,hold,689,680,680,CO,Denver,15,False,False,False,Gas
515882,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-08-05 13:45:00 UTC,cool,hold,722,720,720,CO,Denver,15,False,False,False,Gas
515883,fc3d85bdd6583e3530e9e4d8b1e0536ddf13970a,2017-08-01 15:30:00 UTC,cool,hold,740,750,750,CO,Denver,15,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/CO/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/CO-day/2018-aug-day-CO.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e93101ca50143a05bb397e2215ed9283f436ab56,2018-08-30 14:30:00 UTC,auto,hold,709,765,645,CO,Lakewood,50,False,False,False,Gas
5,7f68180362dd886a7f1e14499725f16ef50e1c5f,2018-08-26 17:55:00 UTC,auto,hold,727,730,635,CO,Denver,10,False,False,True,Electric
9,c521ad2a06104de76ae9654403f026541f3066db,2018-08-16 19:35:00 UTC,cool,hold,731,734,734,CO,Denver,40,False,False,False,Gas
10,157e37b055787c86ce4bcc2666bd96342075fc9d,2018-08-30 19:45:00 UTC,cool,hold,694,683,683,CO,Commerce City,0,False,False,False,Gas
11,5cd65b9d76f86f75df28b3ecf0159050e92a4859,2018-08-26 18:40:00 UTC,cool,hold,714,698,698,CO,Highlands Ranch,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
968262,4ac043efbfd9babbb100c17e1cc9f0da4b61844a,2018-08-06 18:15:00 UTC,cool,hold,767,760,760,CO,Colorado Springs,0,False,False,False,Gas
968263,912144461974e1af99d647d206bac2d2d9ff26c4,2018-08-07 16:00:00 UTC,cool,hold,764,760,760,CO,Boulder,0,True,False,True,Electric
968264,4ac043efbfd9babbb100c17e1cc9f0da4b61844a,2018-08-03 17:35:00 UTC,cool,hold,755,760,760,CO,Colorado Springs,0,False,False,False,Gas
968265,4168b0c49a311bdf836e0065d5225030f996516d,2018-08-12 19:45:00 UTC,cool,auto,762,760,760,CO,Castle Pines,10,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/CO/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/CO-day/2019-aug-day-CO.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0fdddd2047914361ddad6f62ef0a9e48bb9bd887,2019-08-14 12:00:00 UTC,auto,hold,713,710,655,CO,Greeley,7,False,False,False,Gas
1,1e70387aadb141e34fd58c2a953ceab3fba1f6c9,2019-08-16 18:20:00 UTC,cool,auto,740,750,732,CO,Denver,0,False,False,False,Gas
3,106dc2a3ff0ec42b0996a35b5bd64daafec1e190,2019-08-28 13:35:00 UTC,cool,hold,784,800,800,CO,Aurora,5,False,False,False,Gas
4,b2a556e5d07057de55c0a18b5850d95ebd919b58,2019-08-28 08:15:00 UTC,cool,hold,732,745,745,CO,Lakewood,0,True,False,False,Gas
5,210c431b4f89441e9928b56cfdaa5ad2cc823f55,2019-08-23 15:50:00 UTC,cool,hold,764,800,764,CO,Englewood,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1276488,76692ab067cf134e5645d6c8d2f4b872f630aa8a,2019-08-25 16:05:00 UTC,cool,hold,758,760,760,CO,Centennial,0,False,False,False,Gas
1276489,a76d773c8758dea845aa39009d20e888642072c8,2019-08-06 19:55:00 UTC,cool,hold,759,760,760,CO,Littleton,40,True,False,False,Gas
1276490,62344653da1fc3b9841e1f3f65ec728aaf9c2da2,2019-08-08 19:40:00 UTC,cool,hold,758,760,760,CO,Denver,69,True,False,False,Gas
1276491,feaa24f9ac1e6aad67a9b38fa1e2bd33dfd972ef,2019-08-08 13:10:00 UTC,cool,auto,750,760,760,CO,Aurora,30,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/CO/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/CO-day/2020-aug-day-CO.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,fc77ed1ee3eb1255ded0f60582b1c3c0c5c0541b,2020-08-26 17:55:00 UTC,cool,hold,761,752,752,CO,Centennial,35,False,False,False,Gas
2,bab09e86fced8b20e3ec992400d514839e1b3f0f,2020-08-27 18:40:00 UTC,cool,hold,742,740,686,CO,Highlands Ranch,20,False,False,False,Gas
3,3cc2c4516ee0e05e958a9ca4bbd511911c0669df,2020-08-31 18:15:00 UTC,auto,auto,737,735,685,CO,Longmont,5,False,False,False,Gas
4,fac8fc7ca99d1a75a77c72dd01b8ef3a6e048efc,2020-08-20 18:10:00 UTC,cool,hold,765,757,757,CO,Greeley,30,True,False,False,Gas
5,19630f684326a6ba140dc33d5bfe6963eb6e6373,2020-08-29 12:30:00 UTC,heat,hold,783,650,608,CO,Denver,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1229654,2f6f1b4134916ea3b5e9835e50d3a5c6933a795b,2020-08-26 15:05:00 UTC,cool,auto,738,760,760,CO,Highlands Ranch,29,True,False,False,Gas
1229655,cc078fa1d1e7ab364ab5395ddedc9428c8917a70,2020-08-09 16:50:00 UTC,cool,hold,753,760,760,CO,Castle Rock,0,False,False,False,Gas
1229656,878224751577d448f80f8fb4df5a849141562af0,2020-08-23 18:40:00 UTC,cool,hold,748,760,760,CO,Colorado Springs,5,False,False,False,Gas
1229657,4429b9f785aaf62e7847614e0c726b3427ee8f3c,2020-08-01 17:10:00 UTC,cool,hold,712,760,760,CO,Denver,69,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/CO/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/CO/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
CO_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/CO/aug/" + file)
    CO_aug = pd.concat([CO_aug, df])
    
CO_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004666c8518763d5ff5a1b6f9be919899724167e,aug,2017,auto,auto,Denver,756.500000,766.666667,683.333333,10.0,False,False,False
1,004666c8518763d5ff5a1b6f9be919899724167e,aug,2017,auto,hold,Denver,772.555556,780.000000,690.000000,10.0,False,False,False
2,0101ed9a523b572e5b256aeeaf9755d4f076d1a0,aug,2017,cool,auto,Golden,707.120482,708.084337,680.120482,20.0,False,False,True
3,0101ed9a523b572e5b256aeeaf9755d4f076d1a0,aug,2017,cool,hold,Golden,731.750000,700.500000,700.000000,20.0,False,False,True
4,016982393b0ec18a6b02e713f5605fbf7fcf6f81,aug,2017,auto,auto,Parker,727.508520,748.542601,668.034978,5.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1912,ff82364006968dacbbc4275cf59704c29696d664,aug,2020,cool,hold,Thornton,761.081967,767.180328,766.344262,0.0,False,False,False
1913,ff8263254ab986ac6a02c06ba1ebba7435d71dc6,aug,2020,cool,hold,Lone Tree,707.984127,733.619048,733.619048,9.0,False,False,False
1914,ff8c6d458a900bf33496d221452e1983c646e110,aug,2020,auto,hold,Lafayette,743.173913,729.956522,670.260870,7.0,False,False,False
1915,ffd9498de3ffb2737be1fc9d662ae9ab116d5f1c,aug,2020,auto,hold,Commerce City,734.636364,734.890699,641.954283,30.0,False,False,False


In [160]:
CO_aug.to_csv("Scraper_Output/State_Month_Day/CO/CO_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/CO-day/2017-dec-day-CO.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4d4a96c3e46f6626b1176435c19a6e581357c88b,2017-12-03 17:00:00 UTC,heat,auto,706,755,705,CO,Centennial,25,False,False,False,Gas
1,80fc7817bffe4f469288e73953891e4e89cf5c06,2017-12-05 14:00:00 UTC,heat,hold,672,675,675,CO,Wellington,0,False,False,False,Gas
3,1fcad194254fc2e9b3bfd8c54857811abe064ea1,2017-12-08 18:50:00 UTC,auto,hold,689,840,670,CO,Highlands Ranch,0,False,False,False,Gas
4,65a1f4be9b61f78f8dab74dbc0d1c923b9ed85bb,2017-12-23 18:05:00 UTC,heat,hold,722,724,724,CO,Denver,60,False,False,False,Gas
5,6f8dcbe032c377d767503b8b8cd4d795045b1f34,2017-12-09 18:25:00 UTC,auto,auto,683,715,665,CO,Thornton,25,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715774,f0e965e5900041c61c189bbad98779b60ade37fc,2017-12-11 13:20:00 UTC,auto,auto,709,765,715,CO,Denver,0,True,False,False,Gas
715775,f0e965e5900041c61c189bbad98779b60ade37fc,2017-12-10 15:50:00 UTC,auto,auto,710,765,715,CO,Denver,0,True,False,False,Gas
715776,f0e965e5900041c61c189bbad98779b60ade37fc,2017-12-10 17:15:00 UTC,auto,auto,712,765,715,CO,Denver,0,True,False,False,Gas
715777,3a2f2695c8e3ec3e4f0b2773a993383f6169acbf,2017-12-25 14:35:00 UTC,auto,hold,715,765,715,CO,Littleton,17,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/CO/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/CO-day/2018-dec-day-CO.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a553699aea7e3635845241c5768ebb986881f3d4,2018-12-30 19:05:00 UTC,heat,auto,616,647,620,CO,Colorado Springs,0,True,False,False,Gas
1,cbc3edc040c6210533e3f65146a3ca091508fdb7,2018-12-08 15:55:00 UTC,auto,hold,765,815,765,CO,Greenwood Village,0,False,False,False,Gas
2,d800593c62513a73bb716b445d175b163dfcd931,2018-12-23 17:50:00 UTC,heat,hold,692,721,700,CO,Denver,40,False,False,False,Gas
3,723cc732feb52cc529366ac94419dd1d392e59fb,2018-12-15 17:35:00 UTC,heat,hold,629,715,637,CO,Highlands Ranch,20,False,False,False,Gas
4,5312105a5a48f2bec7262ce4917023b53cddb872,2018-12-23 17:35:00 UTC,heat,hold,739,738,738,CO,Broomfield,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1112216,1f147db125333c258d255d0fa5b514490a1f99d6,2018-12-15 17:50:00 UTC,auto,hold,705,765,705,CO,Severance,5,False,False,False,Gas
1112217,ac016e4d0796fe55478b81e36dea0cc9e158ed51,2018-12-24 18:05:00 UTC,auto,hold,705,765,705,CO,Colorado Springs,40,False,False,False,Gas
1112218,f1f438a7259c87f9e85b155b9d687d9c7f77f295,2018-12-24 19:05:00 UTC,auto,hold,691,765,685,CO,Fort Collins,0,False,False,False,Gas
1112219,ac016e4d0796fe55478b81e36dea0cc9e158ed51,2018-12-16 16:40:00 UTC,auto,hold,708,765,705,CO,Colorado Springs,40,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/CO/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/CO-day/2019-dec-day-CO.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,45683d4a709f9d7fbf1fc0d976e9a32e7631a19d,2019-12-17 16:50:00 UTC,heat,hold,703,702,702,CO,Arvada,5,False,False,False,Gas
2,db3d86d6f73e42dfaf3fc0a42fc2567c5f50f58b,2019-12-21 17:20:00 UTC,heat,hold,682,681,681,CO,Brighton,10,False,False,False,Gas
3,f99d5effcb9641c6ca74357fa3a9709315d74a06,2019-12-03 15:05:00 UTC,auto,hold,709,755,705,CO,Denver,80,False,False,False,Gas
4,7653e2712209b2b87e509000ff9c11b56cf31835,2019-12-14 18:35:00 UTC,auto,hold,711,765,715,CO,Englewood,0,False,False,False,Gas
5,3b3509c7f93f94ffce9d6df8baf94febf02aa52b,2019-12-02 16:00:00 UTC,auto,hold,737,789,739,CO,Colorado Springs,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1278717,82c292f5cf7ea4a41a11b430981a30d0f1dac635,2019-12-25 16:40:00 UTC,auto,hold,709,760,710,CO,Westminster,10,True,False,False,Gas
1278718,cbc3edc040c6210533e3f65146a3ca091508fdb7,2019-12-30 16:40:00 UTC,auto,hold,684,760,690,CO,Greenwood Village,0,False,False,False,Gas
1278719,097081920ce82fd5609dc038f945317b9eb4bc51,2019-12-18 14:45:00 UTC,auto,auto,701,760,710,CO,Denver,40,False,False,False,Gas
1278720,e1449b21f683b8f65e19436dc7e32f5e26a4dc49,2019-12-14 13:40:00 UTC,heat,hold,760,760,760,CO,Thornton,20,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/CO/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/CO-day/2020-dec-day-CO.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5f596ec61ae4fa2466ad1a79c3f981b326ff038a,2020-12-20 13:30:00 UTC,heat,hold,698,707,707,CO,Greeley,10,False,False,False,Gas
2,57ccdc18cbd039088f441efd6ba31f079a2cfc14,2020-12-30 15:00:00 UTC,auto,hold,658,755,685,CO,Colorado Springs,5,False,False,False,Gas
3,840c712bdcb12fd694fce374748d9d8d839c7e66,2020-12-09 15:55:00 UTC,heat,hold,673,668,668,CO,Parker,0,False,False,False,Gas
4,d0732437ec82ff1dcecbfc18020d9ab2461eabf7,2020-12-17 14:15:00 UTC,heat,auto,673,667,660,CO,Colorado Springs,9,False,False,False,Gas
5,e34536e2be34251085dc6560e5a7a20ae397cf07,2020-12-04 13:50:00 UTC,heat,hold,712,711,711,CO,Aurora,19,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1198067,fdfd59fcbc41a169348bdf9c12d3dea50bbc32ec,2020-12-18 17:20:00 UTC,auto,hold,705,760,710,CO,Arvada,29,True,False,False,Gas
1198068,853fa2567233c0d5623877473d609d60506de2bc,2020-12-16 18:25:00 UTC,auto,hold,709,760,690,CO,Littleton,30,True,False,False,Gas
1198069,f3f04b895d2282110c2b9af0a735c05c1a5873bc,2020-12-30 16:35:00 UTC,auto,hold,741,760,730,CO,Fountain,15,False,False,False,Gas
1198070,5b0d8b7b9151ae504d6ce79bc8e72a81ccb4d69a,2020-12-08 15:05:00 UTC,auto,hold,695,760,700,CO,Littleton,0,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/CO/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/CO/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
CO_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/CO/dec/" + file)
    CO_dec = pd.concat([CO_dec, df])
    
CO_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004666c8518763d5ff5a1b6f9be919899724167e,dec,2017,heat,auto,Denver,726.600000,736.363452,729.194924,10.0,False,False,False
1,004666c8518763d5ff5a1b6f9be919899724167e,dec,2017,heat,hold,Denver,705.597738,723.570275,706.122779,10.0,False,False,False
2,00b9d14bcf27ff6d6e0c30c6f60651c6c8c9ae8b,dec,2017,auto,auto,Aurora,685.209402,733.333333,683.333333,17.0,False,False,False
3,00b9d14bcf27ff6d6e0c30c6f60651c6c8c9ae8b,dec,2017,auto,hold,Aurora,696.527778,700.000000,650.000000,17.0,False,False,False
4,0101ed9a523b572e5b256aeeaf9755d4f076d1a0,dec,2017,heat,auto,Golden,656.910959,673.123288,671.767123,20.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2130,ff8c6d458a900bf33496d221452e1983c646e110,dec,2020,heat,auto,Lafayette,686.208333,750.000000,689.708333,7.0,False,False,False
2131,ff8c6d458a900bf33496d221452e1983c646e110,dec,2020,heat,hold,Lafayette,688.428571,690.945055,690.417582,7.0,False,False,False
2132,ffd9498de3ffb2737be1fc9d662ae9ab116d5f1c,dec,2020,heat,hold,Commerce City,722.820062,728.122027,728.122027,30.0,False,False,False
2133,fffbfb34cefe6da01eaf617298aa82ae80b96ed8,dec,2020,heat,auto,Berthoud,685.145833,650.000000,690.000000,25.0,False,False,False


In [187]:
CO_dec.to_csv("Scraper_Output/State_Month_Day/CO/CO_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/CO/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
CO_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/CO/" + file)
    CO_all = pd.concat([CO_all, df])
    
CO_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,004666c8518763d5ff5a1b6f9be919899724167e,aug,2017,auto,auto,Denver,756.500000,766.666667,683.333333,10.0,False,False,False
1,004666c8518763d5ff5a1b6f9be919899724167e,aug,2017,auto,hold,Denver,772.555556,780.000000,690.000000,10.0,False,False,False
2,0101ed9a523b572e5b256aeeaf9755d4f076d1a0,aug,2017,cool,auto,Golden,707.120482,708.084337,680.120482,20.0,False,False,True
3,0101ed9a523b572e5b256aeeaf9755d4f076d1a0,aug,2017,cool,hold,Golden,731.750000,700.500000,700.000000,20.0,False,False,True
4,016982393b0ec18a6b02e713f5605fbf7fcf6f81,aug,2017,auto,auto,Parker,727.508520,748.542601,668.034978,5.0,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
8873,ffb55cc3e9d7346710fa8ede22ee3c7cde217e3d,jun,2021,cool,hold,Centennial,714.593857,740.000000,743.071672,0.0,False,False,False
8874,ffd9498de3ffb2737be1fc9d662ae9ab116d5f1c,jun,2021,auto,hold,Commerce City,717.909710,717.913118,648.219761,30.0,False,False,False
8875,ffd9498de3ffb2737be1fc9d662ae9ab116d5f1c,jun,2021,cool,hold,Commerce City,720.166667,730.000000,730.000000,30.0,False,False,False
8876,ffd9498de3ffb2737be1fc9d662ae9ab116d5f1c,jun,2021,heat,hold,Commerce City,729.673469,735.775510,733.020408,30.0,False,False,False


In [190]:
CO_all.to_csv("Scraper_Output/State_Month_Day/CO_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mCOe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['CO']
Unique jan_2018: ['CO']
Unique jan_2019: ['CO']
Unique jan_2020: ['CO']
Unique jan_2021: ['CO']
Unique feb_2017: ['CO']
Unique feb_2018: ['CO']
Unique feb_2019: ['CO']
Unique feb_2020: ['CO']
Unique feb_2021: ['CO']
Unique jun_2017: ['CO']
Unique jun_2018: ['CO']
Unique jun_2019: ['CO']
Unique jun_2020: ['CO']
Unique jun_2021: ['CO']
Unique jul_2017: ['CO']
Unique jul_2018: ['CO']
Unique jul_2019: ['CO']
Unique jul_2020: ['CO']
Unique jul_2021: ['CO']
Unique aug_2017: ['CO']
Unique aug_2018: ['CO']
Unique aug_2019: ['CO']
Unique aug_2020: ['CO']
Unique dec_2017: ['CO']
Unique dec_2018: ['CO']
Unique dec_2019: ['CO']
Unique dec_2020: ['CO']
