# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/WI-day/2017-jan-day-WI.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,75910578db7bd09526fdb3c6b2faf03d04402f83,2017-01-20 13:40:00 UTC,auto,hold,718,765,715,WI,Columbus,120,False,False,False,Gas
1,14bb247a62f2f8e19e9bc3a8313856d0d3d4505a,2017-01-15 19:55:00 UTC,heat,auto,628,651,603,WI,Greenville,116,False,False,False,Gas
2,75910578db7bd09526fdb3c6b2faf03d04402f83,2017-01-20 13:30:00 UTC,auto,hold,717,765,715,WI,Columbus,120,False,False,False,Gas
3,7994297220898167d7d4c67f97d259b5cac22ca0,2017-01-15 18:10:00 UTC,heat,hold,722,719,719,WI,Milwaukee,120,False,False,False,Gas
4,7994297220898167d7d4c67f97d259b5cac22ca0,2017-01-15 16:40:00 UTC,heat,hold,722,719,719,WI,Milwaukee,120,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
158761,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2017-01-21 17:05:00 UTC,heat,hold,674,680,680,WI,Delavan,110,False,False,False,Gas
158762,8de00609008982909f5d83b10c7f2d2793249f05,2017-01-08 17:40:00 UTC,heat,hold,674,680,680,WI,Fredonia,110,False,False,False,Gas
158763,8de00609008982909f5d83b10c7f2d2793249f05,2017-01-08 18:55:00 UTC,heat,hold,679,680,680,WI,Fredonia,110,False,False,False,Gas
158764,8de00609008982909f5d83b10c7f2d2793249f05,2017-01-08 15:50:00 UTC,heat,hold,679,680,680,WI,Fredonia,110,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
019822d5aa2b17d3e3737a0bed7d2c04ac03b1c0,Jan,2017,heat,hold,Neenah,710.274359,712.307692,712.307692,39.0,True,False,False
025ac253e71fd687111cb40439615119a305d1a4,Jan,2017,auto,auto,Madison,737.660000,790.980000,749.580000,40.0,False,False,False
027d1ae6b66bece659dd61aadfbd8f7cd43c187a,Jan,2017,auto,auto,Madison,718.295918,781.204082,704.510204,55.0,False,False,False
03049cbda96a4f44aa6a7a317fbbbe7f7f8b4192,Jan,2017,heat,auto,Janesville,696.651639,700.036885,700.036885,70.0,False,False,False
03049cbda96a4f44aa6a7a317fbbbe7f7f8b4192,Jan,2017,heat,hold,Janesville,697.749271,705.798834,700.804665,70.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fdd84348021ffa61435022158e55ed85c7b0479d,Jan,2017,heat,auto,Milwaukee,673.300000,677.075862,674.155172,66.0,False,False,False
fdd84348021ffa61435022158e55ed85c7b0479d,Jan,2017,heat,hold,Milwaukee,678.209513,680.635334,680.635334,66.0,False,False,False
fde3014bf3526a010791bc4aa9451177ce757519,Jan,2017,heat,auto,West Bend,628.923077,820.000000,620.000000,50.0,False,False,False
fe4982376bb431e289339d4f9804c92dc9ea49e7,Jan,2017,heat,hold,Siren,704.127660,681.255319,699.879433,15.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/WI/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/WI-day/2018-jan-day-WI.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ac3f5312ef690e2eac486b3b348a8c41cbbad386,2018-01-07 17:20:00 UTC,auto,hold,700,765,715,WI,Milwaukee,118,False,False,False,Gas
1,ac3f5312ef690e2eac486b3b348a8c41cbbad386,2018-01-06 19:40:00 UTC,auto,hold,708,765,715,WI,Milwaukee,118,False,False,False,Gas
2,332002d2df385d19dd242f4372cf65d098cd124a,2018-01-02 17:30:00 UTC,heat,hold,726,728,728,WI,Oconomowoc,56,True,False,False,Gas
3,a33d3f9f86ad1159dea44bab93ca1c371fd917a8,2018-01-13 13:35:00 UTC,heat,auto,668,830,700,WI,Wausau,115,True,False,False,Gas
4,ac3f5312ef690e2eac486b3b348a8c41cbbad386,2018-01-03 12:10:00 UTC,auto,hold,718,765,715,WI,Milwaukee,118,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
500917,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2018-01-07 16:25:00 UTC,auto,hold,710,760,710,WI,Delavan,110,False,False,False,Gas
500918,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2018-01-07 15:00:00 UTC,auto,hold,700,760,710,WI,Delavan,110,False,False,False,Gas
500919,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2018-01-07 15:30:00 UTC,auto,hold,693,760,710,WI,Delavan,110,False,False,False,Gas
500920,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2018-01-14 16:30:00 UTC,auto,hold,695,760,710,WI,Delavan,110,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/WI/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/WI-day/2019-jan-day-WI.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,8414c7039f40df33552b8e4f61cc457312381267,2019-01-04 13:50:00 UTC,heat,hold,677,683,680,WI,East Troy,39,True,False,False,Gas
2,14bb247a62f2f8e19e9bc3a8313856d0d3d4505a,2019-01-11 15:10:00 UTC,heat,hold,720,717,717,WI,Greenville,116,False,False,False,Gas
3,8414c7039f40df33552b8e4f61cc457312381267,2019-01-18 13:20:00 UTC,heat,hold,675,683,680,WI,East Troy,39,True,False,False,Gas
4,8414c7039f40df33552b8e4f61cc457312381267,2019-01-12 15:00:00 UTC,heat,hold,674,683,680,WI,East Troy,39,True,False,False,Gas
5,8414c7039f40df33552b8e4f61cc457312381267,2019-01-04 12:55:00 UTC,heat,hold,684,683,680,WI,East Troy,39,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
888575,8e8440101775a9cf9b250b0ce7f54846d67082af,2019-01-26 17:00:00 UTC,auto,auto,586,790,700,WI,Pardeeville,110,False,False,False,Gas
888576,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2019-01-24 19:15:00 UTC,heat,hold,696,700,700,WI,Blanchardville,110,False,False,False,Gas
888577,5ad5fe9be5c421b804af9f8c50896990b610dba8,2019-01-31 13:30:00 UTC,heat,hold,697,720,720,WI,Mayville,110,False,False,False,Gas
888578,5ad5fe9be5c421b804af9f8c50896990b610dba8,2019-01-31 19:30:00 UTC,heat,hold,734,740,740,WI,Mayville,110,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/WI/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/WI-day/2020-jan-day-WI.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,4949e2207e8f59c14d94cb0938fa6195ab0c455a,2020-01-04 13:10:00 UTC,heat,auto,625,675,650,WI,Oak Creek,120,False,False,False,Gas
4,80e20073add65b82889d19386bf33af6b8636b18,2020-01-12 18:30:00 UTC,heat,hold,656,661,661,WI,Waukesha,39,True,False,False,Gas
5,4f2f6ce44d66eb3721db595349d78bb95187b528,2020-01-28 14:05:00 UTC,heat,hold,733,729,729,WI,Port Washington,49,True,False,False,Gas
7,8cc73e6307b0799468bfdc2a9885a37b3c84a027,2020-01-18 18:55:00 UTC,auto,auto,690,753,690,WI,Chippewa Falls,118,False,False,False,Gas
9,c98ea2c5d537be3f67fb50a91277ad07b7e0d1ac,2020-01-23 13:15:00 UTC,heat,hold,687,707,672,WI,Madison,75,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
930377,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2020-01-24 18:10:00 UTC,heat,auto,629,700,640,WI,Blanchardville,110,False,False,False,Gas
930378,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2020-01-31 15:50:00 UTC,heat,auto,659,700,680,WI,Blanchardville,110,False,False,False,Gas
930379,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2020-01-25 16:50:00 UTC,heat,auto,670,700,680,WI,Blanchardville,110,False,False,False,Gas
930380,538667be6c9f06d4e5f2a0664807ad60fe226533,2020-01-12 18:15:00 UTC,heat,auto,659,740,670,WI,Poynette,110,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/WI/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/WI-day/2021-jan-day-WI.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,95e52770f4078992d60cfe0338c6d58b4f59fceb,2021-01-27 15:50:00 UTC,heat,hold,708,755,705,WI,Milwaukee,49,True,False,False,Gas
1,95e52770f4078992d60cfe0338c6d58b4f59fceb,2021-01-26 16:05:00 UTC,heat,hold,709,755,705,WI,Milwaukee,49,True,False,False,Gas
2,8b996973bf900b4d6ce84a72eb075c9fa60d35cf,2021-01-18 18:55:00 UTC,heat,hold,710,712,712,WI,Sun Prairie,46,False,False,False,Gas
3,14bb247a62f2f8e19e9bc3a8313856d0d3d4505a,2021-01-08 19:25:00 UTC,heat,hold,710,729,729,WI,Greenville,116,False,False,False,Gas
4,95e52770f4078992d60cfe0338c6d58b4f59fceb,2021-01-14 13:45:00 UTC,heat,hold,714,755,705,WI,Milwaukee,49,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
597757,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-01-01 17:25:00 UTC,heat,hold,653,700,660,WI,Pardeeville,110,False,False,False,Gas
597760,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-01-03 19:35:00 UTC,heat,hold,664,700,650,WI,Pardeeville,110,False,False,False,Gas
597765,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-01-01 14:35:00 UTC,heat,hold,643,700,660,WI,Pardeeville,110,False,False,False,Gas
597770,5ad5fe9be5c421b804af9f8c50896990b610dba8,2021-01-14 19:45:00 UTC,heat,hold,714,720,720,WI,Mayville,110,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/WI/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WI/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WI_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WI/jan/" + file)
    WI_jan = pd.concat([WI_jan, df])
    
WI_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,019822d5aa2b17d3e3737a0bed7d2c04ac03b1c0,Jan,2017,heat,hold,Neenah,710.274359,712.307692,712.307692,39.0,True,False,False
1,025ac253e71fd687111cb40439615119a305d1a4,Jan,2017,auto,auto,Madison,737.660000,790.980000,749.580000,40.0,False,False,False
2,027d1ae6b66bece659dd61aadfbd8f7cd43c187a,Jan,2017,auto,auto,Madison,718.295918,781.204082,704.510204,55.0,False,False,False
3,03049cbda96a4f44aa6a7a317fbbbe7f7f8b4192,Jan,2017,heat,auto,Janesville,696.651639,700.036885,700.036885,70.0,False,False,False
4,03049cbda96a4f44aa6a7a317fbbbe7f7f8b4192,Jan,2017,heat,hold,Janesville,697.749271,705.798834,700.804665,70.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
830,ff888f11d47874cf41fce2311e94a188eb31fb0c,Jan,2021,heat,hold,Appleton,710.000000,720.000000,720.000000,40.0,False,False,False
831,ff912c4c4d0c33c0ef22bbb0f6fd4ef1312c4fbe,Jan,2021,heat,hold,Mount Pleasant,663.228571,668.771429,657.885714,29.0,False,False,False
832,ffab5e87800a1a1d1a0f87bf473d27fd44ab0f5e,Jan,2021,heat,hold,Kenosha,684.342282,675.848993,675.848993,100.0,False,False,False
833,ffb8a83f9b1c575b586c53b5d1512383d9cb63d6,Jan,2021,heat,hold,Milwaukee,668.435897,680.256410,679.923077,90.0,False,False,False


In [34]:
WI_jan.to_csv("Scraper_Output/State_Month_Day/WI/WI_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/WI-day/2017-feb-day-WI.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,332002d2df385d19dd242f4372cf65d098cd124a,2017-02-10 18:45:00 UTC,auto,auto,729,775,725,WI,Oconomowoc,56,True,False,False,Gas
1,da452ce6bcf69215d9652ace124d2f80d22f5f04,2017-02-12 19:30:00 UTC,auto,auto,701,755,705,WI,Beaver Dam,16,False,False,False,Gas
2,332002d2df385d19dd242f4372cf65d098cd124a,2017-02-10 19:30:00 UTC,auto,auto,721,775,725,WI,Oconomowoc,56,True,False,False,Gas
3,da452ce6bcf69215d9652ace124d2f80d22f5f04,2017-02-14 14:45:00 UTC,auto,auto,703,755,705,WI,Beaver Dam,16,False,False,False,Gas
4,da452ce6bcf69215d9652ace124d2f80d22f5f04,2017-02-14 18:45:00 UTC,auto,auto,705,755,705,WI,Beaver Dam,16,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145808,45518103d49dc44b418ddc9a03a514f63398c629,2017-02-15 19:15:00 UTC,heat,auto,680,660,660,WI,Shorewood,110,False,False,False,Gas
145809,8de00609008982909f5d83b10c7f2d2793249f05,2017-02-12 15:40:00 UTC,heat,hold,676,680,680,WI,Fredonia,110,False,False,False,Gas
145810,887d9b204689b445962473bb540ed1735f2e033d,2017-02-05 18:35:00 UTC,heat,hold,665,680,680,WI,Madison,110,False,False,False,Gas
145811,8de00609008982909f5d83b10c7f2d2793249f05,2017-02-12 15:45:00 UTC,heat,hold,675,680,680,WI,Fredonia,110,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/WI/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/WI-day/2018-feb-day-WI.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d814475f2cc86e747abc03966f166487992ea78f,2018-02-04 19:35:00 UTC,heat,auto,732,744,696,WI,Elm Grove,68,False,False,False,Gas
1,332002d2df385d19dd242f4372cf65d098cd124a,2018-02-05 16:05:00 UTC,heat,hold,737,737,737,WI,Oconomowoc,56,True,False,False,Gas
2,1248b7c4ea80a038327d3bbcedbda6cd4a9240d5,2018-02-11 15:05:00 UTC,auto,auto,686,755,705,WI,Saint Francis,97,True,False,False,Gas
3,1248b7c4ea80a038327d3bbcedbda6cd4a9240d5,2018-02-11 15:15:00 UTC,auto,auto,702,755,705,WI,Saint Francis,97,True,False,False,Gas
4,d814475f2cc86e747abc03966f166487992ea78f,2018-02-26 18:10:00 UTC,heat,auto,686,846,605,WI,Elm Grove,68,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
489392,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2018-02-16 13:10:00 UTC,auto,hold,705,760,710,WI,Delavan,110,False,False,False,Gas
489393,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2018-02-16 16:50:00 UTC,auto,hold,704,760,710,WI,Delavan,110,False,False,False,Gas
489394,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2018-02-16 15:25:00 UTC,auto,hold,700,760,710,WI,Delavan,110,False,False,False,Gas
489395,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2018-02-16 13:30:00 UTC,auto,hold,701,760,710,WI,Delavan,110,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/WI/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/WI-day/2019-feb-day-WI.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d814475f2cc86e747abc03966f166487992ea78f,2019-02-17 18:35:00 UTC,heat,hold,721,791,709,WI,Elm Grove,68,False,False,False,Gas
1,f5b5e023c99de91a5f722e418aa7761c4f519021,2019-02-02 18:30:00 UTC,heat,auto,713,756,720,WI,Fort Atkinson,68,False,False,False,Gas
2,8414c7039f40df33552b8e4f61cc457312381267,2019-02-25 17:55:00 UTC,heat,hold,671,683,680,WI,East Troy,39,True,False,False,Gas
4,8414c7039f40df33552b8e4f61cc457312381267,2019-02-25 17:55:00 UTC,heat,hold,671,683,680,WI,East Troy,39,True,False,False,Gas
5,8414c7039f40df33552b8e4f61cc457312381267,2019-02-24 17:35:00 UTC,heat,hold,658,683,660,WI,East Troy,39,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
594641,8de00609008982909f5d83b10c7f2d2793249f05,2019-02-23 18:50:00 UTC,heat,auto,688,730,690,WI,Fredonia,110,False,False,False,Gas
594642,538667be6c9f06d4e5f2a0664807ad60fe226533,2019-02-17 14:55:00 UTC,heat,auto,671,740,700,WI,Poynette,110,False,False,False,Gas
594643,538667be6c9f06d4e5f2a0664807ad60fe226533,2019-02-13 16:35:00 UTC,heat,auto,683,740,690,WI,Poynette,110,False,False,False,Gas
594644,538667be6c9f06d4e5f2a0664807ad60fe226533,2019-02-13 15:15:00 UTC,heat,auto,665,740,690,WI,Poynette,110,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/WI/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/WI-day/2020-feb-day-WI.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9f9ac43446fbc284ad9d2f98c7b74d6948ff86ed,2020-02-01 15:00:00 UTC,heat,auto,770,650,644,WI,Necedah,18,False,False,False,Gas
2,56c7915153d9790351bdae727176d94db50a815c,2020-02-14 12:35:00 UTC,heat,auto,657,678,678,WI,Eau Claire,75,False,False,False,Gas
5,2638dc58d3eaac3c9442d8d0ec38925e9cfabfb9,2020-02-22 19:55:00 UTC,heat,hold,698,689,689,WI,Rochester,39,True,False,False,Gas
8,659c8c78161eb4920df07c06b7981069c56b96fc,2020-02-07 12:15:00 UTC,heat,hold,676,682,682,WI,Appleton,39,True,False,False,Gas
9,4f623e2f8f740a4f1cbf71e9bf1c68f47b67f1e2,2020-02-25 13:20:00 UTC,auto,hold,695,769,699,WI,Whitefish Bay,69,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
834504,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2020-02-22 16:20:00 UTC,heat,auto,691,700,700,WI,Blanchardville,110,False,False,False,Gas
834505,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2020-02-26 15:45:00 UTC,heat,auto,689,700,700,WI,Blanchardville,110,False,False,False,Gas
834506,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2020-02-16 14:25:00 UTC,heat,auto,635,700,640,WI,Blanchardville,110,False,False,False,Gas
834507,538667be6c9f06d4e5f2a0664807ad60fe226533,2020-02-09 17:55:00 UTC,heat,auto,672,740,690,WI,Poynette,110,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/WI/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/WI-day/2021-feb-day-WI.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,77c2642e75163231d4776af01438e594d6d2c51a,2021-02-25 13:25:00 UTC,heat,hold,657,665,665,WI,Fox Point,69,True,False,False,Gas
1,1248b7c4ea80a038327d3bbcedbda6cd4a9240d5,2021-02-19 13:55:00 UTC,auto,hold,711,776,648,WI,Saint Francis,97,True,False,False,Gas
2,8b996973bf900b4d6ce84a72eb075c9fa60d35cf,2021-02-13 18:05:00 UTC,heat,hold,708,712,712,WI,Sun Prairie,46,False,False,False,Gas
3,a75a83eb28f8d2fe041ca164406ad5cba709c83b,2021-02-28 12:55:00 UTC,heat,hold,645,786,649,WI,Waukesha,18,False,False,False,Gas
4,c98ea2c5d537be3f67fb50a91277ad07b7e0d1ac,2021-02-13 15:25:00 UTC,heat,hold,654,679,679,WI,Madison,75,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
547636,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-02-23 14:50:00 UTC,auto,hold,679,750,680,WI,Pardeeville,110,False,False,False,Gas
547638,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-02-21 13:00:00 UTC,auto,hold,684,750,700,WI,Pardeeville,110,False,False,False,Gas
547639,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-02-23 16:35:00 UTC,auto,hold,700,750,680,WI,Pardeeville,110,False,False,False,Gas
547643,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-02-23 15:10:00 UTC,auto,hold,674,750,680,WI,Pardeeville,110,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/WI/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WI/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WI_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WI/feb/" + file)
    WI_feb = pd.concat([WI_feb, df])
    
WI_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,019822d5aa2b17d3e3737a0bed7d2c04ac03b1c0,feb,2017,heat,hold,Neenah,717.622222,720.000000,720.000000,39.0,True,False,False
1,025ac253e71fd687111cb40439615119a305d1a4,feb,2017,auto,auto,Madison,737.627451,779.235294,738.352941,40.0,False,False,False
2,027d1ae6b66bece659dd61aadfbd8f7cd43c187a,feb,2017,auto,auto,Madison,723.486111,783.319444,722.902778,55.0,False,False,False
3,027d1ae6b66bece659dd61aadfbd8f7cd43c187a,feb,2017,auto,hold,Madison,737.954545,780.000000,730.000000,55.0,False,False,False
4,03049cbda96a4f44aa6a7a317fbbbe7f7f8b4192,feb,2017,heat,auto,Janesville,703.877193,710.000000,710.000000,70.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
808,fdd84348021ffa61435022158e55ed85c7b0479d,feb,2021,heat,hold,Milwaukee,700.627119,703.928814,703.928814,66.0,False,False,False
809,fe4982376bb431e289339d4f9804c92dc9ea49e7,feb,2021,heat,hold,Siren,708.400000,699.925000,699.625000,15.0,False,False,False
810,fe68f76d1a80f5e224b490e319cbb3c9bf0fb02b,feb,2021,auto,hold,Randall,675.833333,744.666667,698.000000,40.0,False,False,False
811,ffab5e87800a1a1d1a0f87bf473d27fd44ab0f5e,feb,2021,heat,hold,Kenosha,691.910112,694.943820,694.943820,100.0,False,False,False


In [67]:
WI_feb.to_csv("Scraper_Output/State_Month_Day/WI/WI_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/WI-day/2017-jun-day-WI.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
6,4949e2207e8f59c14d94cb0938fa6195ab0c455a,2017-06-24 13:15:00 UTC,cool,hold,716,734,686,WI,Oak Creek,120,False,False,False,Gas
16,332002d2df385d19dd242f4372cf65d098cd124a,2017-06-06 15:45:00 UTC,cool,hold,685,685,685,WI,Oconomowoc,56,True,False,False,Gas
24,07a306f920e1fb30ca93b20547ed050d8a868cf4,2017-06-27 17:55:00 UTC,auto,hold,703,830,660,WI,Plymouth,37,False,False,False,Gas
32,c0fbf519bebdd780ddbcd7973d940b5be673b712,2017-06-01 16:55:00 UTC,heat,hold,725,697,672,WI,West Bend,16,False,False,False,Gas
35,07a306f920e1fb30ca93b20547ed050d8a868cf4,2017-06-27 17:15:00 UTC,auto,hold,696,830,660,WI,Plymouth,37,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
248055,8e8440101775a9cf9b250b0ce7f54846d67082af,2017-06-17 16:40:00 UTC,auto,hold,740,760,640,WI,Pardeeville,110,False,False,False,Gas
248056,8e8440101775a9cf9b250b0ce7f54846d67082af,2017-06-18 12:55:00 UTC,auto,hold,736,760,640,WI,Pardeeville,110,False,False,False,Gas
248057,8e8440101775a9cf9b250b0ce7f54846d67082af,2017-06-17 14:00:00 UTC,auto,hold,746,760,640,WI,Pardeeville,110,False,False,False,Gas
248058,8e8440101775a9cf9b250b0ce7f54846d67082af,2017-06-18 17:20:00 UTC,auto,hold,746,760,640,WI,Pardeeville,110,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/WI/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/WI-day/2018-jun-day-WI.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ba3a9456ad58cf063776497c2421ef6214dc7f66,2018-06-02 14:45:00 UTC,cool,hold,670,726,670,WI,Madison,78,False,False,False,Gas
4,afec56d7366589706c281a8f8ec52ce37d5954f9,2018-06-01 13:25:00 UTC,cool,auto,684,722,672,WI,Racine,117,False,False,False,Gas
7,c5b0bb396734a22819b890e4639b445736b68cc0,2018-06-10 19:40:00 UTC,auto,hold,723,765,715,WI,Glenbeulah,108,False,False,False,Gas
13,3be06e7613b017a2c06230028ab0b849ef370fd3,2018-06-13 19:20:00 UTC,cool,hold,757,749,749,WI,Beloit,18,False,False,False,Gas
14,9f9ac43446fbc284ad9d2f98c7b74d6948ff86ed,2018-06-28 18:20:00 UTC,cool,auto,718,716,689,WI,Necedah,18,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
603134,8e8440101775a9cf9b250b0ce7f54846d67082af,2018-06-11 12:30:00 UTC,auto,hold,694,750,610,WI,Pardeeville,110,False,False,False,Gas
603137,8e8440101775a9cf9b250b0ce7f54846d67082af,2018-06-10 15:05:00 UTC,auto,hold,685,750,610,WI,Pardeeville,110,False,False,False,Gas
603141,8e8440101775a9cf9b250b0ce7f54846d67082af,2018-06-10 17:00:00 UTC,auto,hold,686,750,610,WI,Pardeeville,110,False,False,False,Gas
603145,8e8440101775a9cf9b250b0ce7f54846d67082af,2018-06-11 14:15:00 UTC,auto,hold,700,750,610,WI,Pardeeville,110,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/WI/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/WI-day/2019-jun-day-WI.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,f5b5e023c99de91a5f722e418aa7761c4f519021,2019-06-17 15:30:00 UTC,auto,auto,689,756,690,WI,Fort Atkinson,68,False,False,False,Gas
2,2225debfcbd4942ced607f0b837dfc567e36646e,2019-06-22 14:30:00 UTC,auto,hold,711,755,675,WI,Plymouth,118,False,False,False,Gas
4,8cc73e6307b0799468bfdc2a9885a37b3c84a027,2019-06-09 17:00:00 UTC,auto,hold,722,723,660,WI,Chippewa Falls,118,False,False,False,Gas
5,214c2f6b83631686c5262f694ffd339422d7c7b9,2019-06-29 13:50:00 UTC,auto,hold,677,675,625,WI,Fox Point,69,True,False,False,Gas
6,14bb247a62f2f8e19e9bc3a8313856d0d3d4505a,2019-06-26 13:50:00 UTC,cool,hold,703,707,707,WI,Greenville,116,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
802253,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2019-06-27 17:50:00 UTC,cool,auto,751,750,750,WI,Blanchardville,110,False,False,False,Gas
802254,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2019-06-30 11:40:00 UTC,cool,auto,752,750,750,WI,Blanchardville,110,False,False,False,Gas
802255,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2019-06-11 13:30:00 UTC,cool,auto,704,760,760,WI,Blanchardville,110,False,False,False,Gas
802256,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2019-06-11 13:50:00 UTC,cool,auto,707,760,760,WI,Blanchardville,110,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/WI/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/WI-day/2020-jun-day-WI.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8cc73e6307b0799468bfdc2a9885a37b3c84a027,2020-06-09 13:15:00 UTC,auto,hold,704,703,653,WI,Chippewa Falls,118,False,False,False,Gas
1,7dcecc3761f448e50c0d941a5fafa8543abb3120,2020-06-17 19:00:00 UTC,cool,hold,727,775,775,WI,Elkhorn,85,False,False,False,Gas
5,7dcecc3761f448e50c0d941a5fafa8543abb3120,2020-06-17 14:20:00 UTC,cool,hold,712,775,775,WI,Elkhorn,85,False,False,False,Gas
7,7dcecc3761f448e50c0d941a5fafa8543abb3120,2020-06-03 17:45:00 UTC,cool,hold,757,745,745,WI,Elkhorn,85,False,False,False,Gas
8,95e52770f4078992d60cfe0338c6d58b4f59fceb,2020-06-10 19:30:00 UTC,auto,auto,688,702,652,WI,Milwaukee,49,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
855025,8e8440101775a9cf9b250b0ce7f54846d67082af,2020-06-15 18:50:00 UTC,auto,hold,688,750,650,WI,Pardeeville,110,False,False,False,Gas
855026,8e8440101775a9cf9b250b0ce7f54846d67082af,2020-06-27 12:15:00 UTC,auto,hold,723,750,680,WI,Pardeeville,110,False,False,False,Gas
855027,8e8440101775a9cf9b250b0ce7f54846d67082af,2020-06-30 14:10:00 UTC,auto,hold,735,750,680,WI,Pardeeville,110,False,False,False,Gas
855028,8e8440101775a9cf9b250b0ce7f54846d67082af,2020-06-24 17:15:00 UTC,auto,hold,711,750,680,WI,Pardeeville,110,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/WI/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/WI-day/2021-jun-day-WI.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b6517f307d639e0d73296108bba91c8d51008345,2021-06-10 15:25:00 UTC,auto,hold,707,708,658,WI,Edgar,18,False,False,False,Gas
1,5acc1ae4f351133a5c0bb3bb2bbba38b0094214d,2021-06-18 17:05:00 UTC,auto,hold,723,723,673,WI,Milwaukee,99,True,False,False,Gas
2,5acc1ae4f351133a5c0bb3bb2bbba38b0094214d,2021-06-30 15:40:00 UTC,auto,hold,724,723,673,WI,Milwaukee,99,True,False,False,Gas
3,b6517f307d639e0d73296108bba91c8d51008345,2021-06-04 15:30:00 UTC,auto,hold,711,707,657,WI,Edgar,18,False,False,False,Gas
4,5acc1ae4f351133a5c0bb3bb2bbba38b0094214d,2021-06-13 16:00:00 UTC,auto,hold,720,723,673,WI,Milwaukee,99,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
647475,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-06-17 15:40:00 UTC,auto,hold,743,750,660,WI,Pardeeville,110,False,False,False,Gas
647476,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-06-17 13:30:00 UTC,auto,hold,735,750,660,WI,Pardeeville,110,False,False,False,Gas
647477,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-06-09 15:10:00 UTC,auto,hold,750,750,660,WI,Pardeeville,110,False,False,False,Gas
647478,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-06-04 14:25:00 UTC,auto,hold,743,750,660,WI,Pardeeville,110,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/WI/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WI/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WI_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WI/jun/" + file)
    WI_jun = pd.concat([WI_jun, df])
    
WI_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00c3de29f54d18652f1a52917ca911150381b87e,jun,2017,cool,auto,Elroy,695.447552,693.388112,663.825175,37.0,False,False,False
1,00c3de29f54d18652f1a52917ca911150381b87e,jun,2017,cool,hold,Elroy,703.285714,700.000000,700.000000,37.0,False,False,False
2,019822d5aa2b17d3e3737a0bed7d2c04ac03b1c0,jun,2017,auto,hold,Neenah,736.880952,730.547619,675.000000,39.0,True,False,False
3,025ac253e71fd687111cb40439615119a305d1a4,jun,2017,cool,hold,Madison,796.951807,776.506024,768.590361,40.0,False,False,False
4,027d1ae6b66bece659dd61aadfbd8f7cd43c187a,jun,2017,cool,auto,Madison,779.880000,776.093333,749.746667,55.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1047,fe68f76d1a80f5e224b490e319cbb3c9bf0fb02b,jun,2021,auto,hold,Randall,681.587923,680.000000,660.000000,40.0,False,False,False
1048,fea426f1bff9c145a33c7c482abad0ec83345310,jun,2021,cool,hold,Verona,742.583333,744.787037,744.787037,10.0,False,False,False
1049,ff888f11d47874cf41fce2311e94a188eb31fb0c,jun,2021,cool,hold,Appleton,743.282051,741.692308,741.692308,40.0,False,False,False
1050,ffab5e87800a1a1d1a0f87bf473d27fd44ab0f5e,jun,2021,cool,hold,Kenosha,750.623100,746.768997,746.747720,100.0,False,False,False


In [100]:
WI_jun.to_csv("Scraper_Output/State_Month_Day/WI/WI_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/WI-day/2017-jul-day-WI.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,403a731c7c4ccd121c8c46541d5762c23afc1f59,2017-07-30 17:30:00 UTC,auto,hold,766,765,635,WI,Hudson,37,True,False,False,Gas
9,403a731c7c4ccd121c8c46541d5762c23afc1f59,2017-07-15 19:10:00 UTC,auto,hold,754,745,635,WI,Hudson,37,True,False,False,Gas
13,9206d6665a7283a60ed698c4b0ea7a9f4f3fe317,2017-07-15 19:00:00 UTC,auto,hold,730,725,655,WI,Mukwonago,26,False,False,False,Gas
14,403a731c7c4ccd121c8c46541d5762c23afc1f59,2017-07-23 16:25:00 UTC,auto,hold,747,745,635,WI,Hudson,37,True,False,False,Gas
17,9206d6665a7283a60ed698c4b0ea7a9f4f3fe317,2017-07-22 15:00:00 UTC,auto,hold,724,725,655,WI,Mukwonago,26,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
310759,8e8440101775a9cf9b250b0ce7f54846d67082af,2017-07-21 14:20:00 UTC,auto,hold,749,760,640,WI,Pardeeville,110,False,False,False,Gas
310760,8e8440101775a9cf9b250b0ce7f54846d67082af,2017-07-08 17:20:00 UTC,auto,auto,728,760,640,WI,Pardeeville,110,False,False,False,Gas
310761,8e8440101775a9cf9b250b0ce7f54846d67082af,2017-07-14 17:40:00 UTC,auto,hold,719,760,640,WI,Pardeeville,110,False,False,False,Gas
310762,8e8440101775a9cf9b250b0ce7f54846d67082af,2017-07-09 16:55:00 UTC,auto,auto,736,760,640,WI,Pardeeville,110,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/WI/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/WI-day/2018-jul-day-WI.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,56c7915153d9790351bdae727176d94db50a815c,2018-07-30 12:20:00 UTC,cool,hold,704,702,702,WI,Eau Claire,75,False,False,False,Gas
1,af553e83c7bc8b31680ea663adff2eb020228779,2018-07-10 11:30:00 UTC,cool,auto,714,719,691,WI,Madison,75,False,False,False,Gas
2,da452ce6bcf69215d9652ace124d2f80d22f5f04,2018-07-27 13:05:00 UTC,auto,hold,699,695,645,WI,Beaver Dam,16,False,False,False,Gas
3,6a653c4a1ddfecdcb7647576e7a8707102fdb5b9,2018-07-10 19:35:00 UTC,cool,hold,721,727,706,WI,Manitowoc,37,False,False,False,Gas
4,4949e2207e8f59c14d94cb0938fa6195ab0c455a,2018-07-22 19:10:00 UTC,cool,hold,666,657,657,WI,Oak Creek,120,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
782690,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2018-07-13 19:45:00 UTC,cool,hold,755,750,750,WI,Blanchardville,110,False,False,False,Gas
782692,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2018-07-13 19:10:00 UTC,cool,hold,754,750,750,WI,Blanchardville,110,False,False,False,Gas
782693,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2018-07-01 18:50:00 UTC,cool,hold,751,750,750,WI,Blanchardville,110,False,False,False,Gas
782696,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2018-07-01 18:55:00 UTC,cool,hold,749,750,750,WI,Blanchardville,110,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/WI/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/WI-day/2019-jul-day-WI.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,14bb247a62f2f8e19e9bc3a8313856d0d3d4505a,2019-07-15 17:20:00 UTC,cool,hold,708,707,707,WI,Greenville,116,False,False,False,Gas
1,14bb247a62f2f8e19e9bc3a8313856d0d3d4505a,2019-07-20 16:25:00 UTC,cool,hold,756,777,777,WI,Greenville,116,False,False,False,Gas
2,8414c7039f40df33552b8e4f61cc457312381267,2019-07-15 11:40:00 UTC,auto,hold,721,723,633,WI,East Troy,39,True,False,False,Gas
3,8cc73e6307b0799468bfdc2a9885a37b3c84a027,2019-07-15 11:50:00 UTC,auto,hold,737,723,660,WI,Chippewa Falls,118,False,False,False,Gas
4,4949e2207e8f59c14d94cb0938fa6195ab0c455a,2019-07-21 11:50:00 UTC,cool,hold,754,840,790,WI,Oak Creek,120,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1055587,887d9b204689b445962473bb540ed1735f2e033d,2019-07-27 17:30:00 UTC,cool,hold,731,760,760,WI,Madison,110,False,False,False,Gas
1055588,887d9b204689b445962473bb540ed1735f2e033d,2019-07-28 14:15:00 UTC,cool,hold,744,760,760,WI,Madison,110,False,False,False,Gas
1055589,887d9b204689b445962473bb540ed1735f2e033d,2019-07-27 15:10:00 UTC,cool,hold,726,760,760,WI,Madison,110,False,False,False,Gas
1055590,887d9b204689b445962473bb540ed1735f2e033d,2019-07-28 16:15:00 UTC,cool,hold,742,760,760,WI,Madison,110,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/WI/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/WI-day/2020-jul-day-WI.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8f28a68cdcc9d63ca0e65d8b3b52e1b927e7d0f2,2020-07-12 18:40:00 UTC,cool,hold,756,743,720,WI,Rio,105,False,False,False,Gas
1,da452ce6bcf69215d9652ace124d2f80d22f5f04,2020-07-22 19:40:00 UTC,auto,hold,744,742,672,WI,Beaver Dam,16,False,False,False,Gas
2,4f2f6ce44d66eb3721db595349d78bb95187b528,2020-07-01 13:00:00 UTC,cool,auto,697,693,693,WI,Port Washington,49,True,False,False,Gas
4,8cc73e6307b0799468bfdc2a9885a37b3c84a027,2020-07-02 16:50:00 UTC,auto,hold,728,723,660,WI,Chippewa Falls,118,False,False,False,Gas
5,95e52770f4078992d60cfe0338c6d58b4f59fceb,2020-07-12 13:55:00 UTC,cool,auto,707,702,652,WI,Milwaukee,49,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
996369,8e8440101775a9cf9b250b0ce7f54846d67082af,2020-07-26 17:30:00 UTC,auto,hold,754,750,680,WI,Pardeeville,110,False,False,False,Gas
996370,8e8440101775a9cf9b250b0ce7f54846d67082af,2020-07-25 19:45:00 UTC,auto,hold,752,750,680,WI,Pardeeville,110,False,False,False,Gas
996371,8e8440101775a9cf9b250b0ce7f54846d67082af,2020-07-25 18:30:00 UTC,auto,hold,751,750,680,WI,Pardeeville,110,False,False,False,Gas
996372,8e8440101775a9cf9b250b0ce7f54846d67082af,2020-07-21 15:20:00 UTC,auto,hold,747,750,680,WI,Pardeeville,110,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/WI/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/WI-day/2021-jul-day-WI.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5acc1ae4f351133a5c0bb3bb2bbba38b0094214d,2021-07-05 14:05:00 UTC,auto,hold,719,723,673,WI,Milwaukee,99,True,False,False,Gas
1,da452ce6bcf69215d9652ace124d2f80d22f5f04,2021-07-20 18:00:00 UTC,auto,hold,701,702,652,WI,Beaver Dam,16,False,False,False,Gas
2,d5143d8120197d68837c583741a0dabb2c5ad5c8,2021-07-02 17:15:00 UTC,cool,hold,721,729,729,WI,Kenosha,49,True,False,False,Gas
3,4f623e2f8f740a4f1cbf71e9bf1c68f47b67f1e2,2021-07-06 17:45:00 UTC,cool,hold,725,722,722,WI,Whitefish Bay,69,True,False,False,Gas
4,da452ce6bcf69215d9652ace124d2f80d22f5f04,2021-07-21 19:35:00 UTC,auto,hold,706,702,652,WI,Beaver Dam,16,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
578739,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-07-28 14:55:00 UTC,auto,hold,762,760,660,WI,Pardeeville,110,False,False,False,Gas
578740,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-07-23 17:30:00 UTC,auto,hold,750,760,660,WI,Pardeeville,110,False,False,False,Gas
578741,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-07-19 14:45:00 UTC,auto,hold,745,760,660,WI,Pardeeville,110,False,False,False,Gas
578742,8e8440101775a9cf9b250b0ce7f54846d67082af,2021-07-25 15:00:00 UTC,auto,hold,763,760,660,WI,Pardeeville,110,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/WI/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WI/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WI_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WI/jul/" + file)
    WI_jul = pd.concat([WI_jul, df])
    
WI_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,019822d5aa2b17d3e3737a0bed7d2c04ac03b1c0,jul,2017,cool,hold,Neenah,748.000000,740.000000,730.000000,39.0,True,False,False
1,01c43c92ae2bf2038791831e614db5677d273720,jul,2017,cool,hold,Baldwin,730.687500,739.562500,738.437500,0.0,False,False,False
2,025ac253e71fd687111cb40439615119a305d1a4,jul,2017,cool,hold,Madison,789.700000,772.100000,765.100000,40.0,False,False,False
3,027d1ae6b66bece659dd61aadfbd8f7cd43c187a,jul,2017,cool,auto,Madison,741.081967,741.418033,692.139344,55.0,False,False,False
4,027d1ae6b66bece659dd61aadfbd8f7cd43c187a,jul,2017,cool,hold,Madison,761.673913,760.000000,760.000000,55.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
871,fe4982376bb431e289339d4f9804c92dc9ea49e7,jul,2021,cool,hold,Siren,767.791980,798.015038,780.583960,15.0,False,False,False
872,fe68f76d1a80f5e224b490e319cbb3c9bf0fb02b,jul,2021,auto,hold,Randall,681.904899,680.003458,660.003458,40.0,False,False,False
873,fea426f1bff9c145a33c7c482abad0ec83345310,jul,2021,cool,hold,Verona,751.013889,750.000000,750.000000,10.0,False,False,False
874,ff888f11d47874cf41fce2311e94a188eb31fb0c,jul,2021,cool,hold,Appleton,743.750000,740.000000,740.000000,40.0,False,False,False


In [133]:
WI_jul.to_csv("Scraper_Output/State_Month_Day/WI/WI_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/WI-day/2017-aug-day-WI.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,403a731c7c4ccd121c8c46541d5762c23afc1f59,2017-08-05 17:30:00 UTC,auto,hold,756,765,635,WI,Hudson,37,True,False,False,Gas
1,4704aa75adca66b356382bf169b191212069f359,2017-08-09 12:15:00 UTC,cool,auto,718,810,680,WI,Milwaukee,115,False,False,False,Gas
2,403a731c7c4ccd121c8c46541d5762c23afc1f59,2017-08-06 12:45:00 UTC,auto,hold,756,765,635,WI,Hudson,37,True,False,False,Gas
6,403a731c7c4ccd121c8c46541d5762c23afc1f59,2017-08-05 16:05:00 UTC,auto,hold,750,765,635,WI,Hudson,37,True,False,False,Gas
8,9d6154f6a92164b3c8dd560268efc5194270e8f2,2017-08-14 12:25:00 UTC,cool,auto,744,791,658,WI,Kenosha,16,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
260283,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2017-08-22 11:50:00 UTC,cool,auto,704,730,640,WI,Delavan,110,False,False,False,Gas
260284,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2017-08-11 15:25:00 UTC,cool,auto,724,730,640,WI,Delavan,110,False,False,False,Gas
260285,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2017-08-22 12:10:00 UTC,cool,auto,696,730,640,WI,Delavan,110,False,False,False,Gas
260286,8de00609008982909f5d83b10c7f2d2793249f05,2017-08-13 19:55:00 UTC,cool,hold,725,730,670,WI,Fredonia,110,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/WI/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/WI-day/2018-aug-day-WI.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0460dbf9ccce69c23f2981ee23cb6bc770deda67,2018-08-03 12:25:00 UTC,cool,auto,723,756,664,WI,Sauk City,105,False,False,False,Gas
1,25e3c802e27436281431d850def2fb6f2913bc81,2018-08-07 12:50:00 UTC,cool,hold,744,742,742,WI,Green Bay,16,False,False,False,Gas
2,14bb247a62f2f8e19e9bc3a8313856d0d3d4505a,2018-08-06 17:15:00 UTC,cool,hold,729,717,717,WI,Greenville,116,False,False,False,Gas
3,70a2e50264bc74c4a36474b6d96c44b06a674676,2018-08-19 19:45:00 UTC,cool,hold,731,735,735,WI,Wauwatosa,75,True,False,False,Gas
4,70a2e50264bc74c4a36474b6d96c44b06a674676,2018-08-21 12:50:00 UTC,cool,hold,737,735,735,WI,Wauwatosa,75,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
793886,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2018-08-05 15:25:00 UTC,cool,hold,751,750,750,WI,Blanchardville,110,False,False,False,Gas
793887,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2018-08-04 19:10:00 UTC,cool,hold,761,750,750,WI,Blanchardville,110,False,False,False,Gas
793888,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2018-08-04 18:30:00 UTC,cool,hold,768,750,750,WI,Blanchardville,110,False,False,False,Gas
793889,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2018-08-04 18:15:00 UTC,cool,hold,771,750,750,WI,Blanchardville,110,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/WI/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/WI-day/2019-aug-day-WI.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0815f2a59ca659a1bed3f985e34ac03b2c6bca72,2019-08-28 16:50:00 UTC,cool,hold,702,810,790,WI,Mountain,37,False,False,False,Gas
1,0460dbf9ccce69c23f2981ee23cb6bc770deda67,2019-08-11 18:40:00 UTC,cool,auto,740,755,705,WI,Sauk City,105,False,False,False,Gas
2,c98ea2c5d537be3f67fb50a91277ad07b7e0d1ac,2019-08-11 13:20:00 UTC,cool,hold,691,689,689,WI,Madison,75,False,False,False,Gas
3,0815f2a59ca659a1bed3f985e34ac03b2c6bca72,2019-08-21 17:55:00 UTC,cool,hold,744,810,790,WI,Mountain,37,False,False,False,Gas
7,da452ce6bcf69215d9652ace124d2f80d22f5f04,2019-08-24 10:35:00 UTC,auto,hold,687,694,644,WI,Beaver Dam,16,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886207,8e8440101775a9cf9b250b0ce7f54846d67082af,2019-08-06 15:30:00 UTC,auto,hold,734,750,680,WI,Pardeeville,110,False,False,False,Gas
886208,887d9b204689b445962473bb540ed1735f2e033d,2019-08-04 17:30:00 UTC,cool,hold,733,750,750,WI,Madison,110,False,False,False,Gas
886209,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2019-08-03 19:40:00 UTC,cool,auto,754,750,750,WI,Blanchardville,110,False,False,False,Gas
886210,8e8440101775a9cf9b250b0ce7f54846d67082af,2019-08-09 16:30:00 UTC,auto,hold,719,750,680,WI,Pardeeville,110,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/WI/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/WI-day/2020-aug-day-WI.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8cc73e6307b0799468bfdc2a9885a37b3c84a027,2020-08-27 19:40:00 UTC,auto,hold,731,733,660,WI,Chippewa Falls,118,False,False,False,Gas
1,8cc73e6307b0799468bfdc2a9885a37b3c84a027,2020-08-24 15:55:00 UTC,auto,hold,726,723,660,WI,Chippewa Falls,118,False,False,False,Gas
2,14bb247a62f2f8e19e9bc3a8313856d0d3d4505a,2020-08-10 13:40:00 UTC,cool,hold,738,736,736,WI,Greenville,116,False,False,False,Gas
4,4f2f6ce44d66eb3721db595349d78bb95187b528,2020-08-31 17:50:00 UTC,cool,auto,696,691,691,WI,Port Washington,49,True,False,False,Gas
5,d814475f2cc86e747abc03966f166487992ea78f,2020-08-28 13:30:00 UTC,cool,hold,773,810,790,WI,Elm Grove,68,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
906924,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2020-08-25 16:50:00 UTC,cool,auto,732,720,720,WI,Blanchardville,110,False,False,False,Gas
906925,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2020-08-24 16:55:00 UTC,cool,auto,720,720,720,WI,Blanchardville,110,False,False,False,Gas
906926,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2020-08-23 17:45:00 UTC,cool,auto,724,720,720,WI,Blanchardville,110,False,False,False,Gas
906927,4df6632a0c91430afa0e72ff0fbe3b8d9bb049ea,2020-08-26 15:40:00 UTC,cool,auto,726,720,720,WI,Blanchardville,110,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/WI/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WI/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WI_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WI/aug/" + file)
    WI_aug = pd.concat([WI_aug, df])
    
WI_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,019822d5aa2b17d3e3737a0bed7d2c04ac03b1c0,aug,2017,auto,hold,Neenah,744.300000,720.700000,670.200000,39.0,True,False,False
1,019822d5aa2b17d3e3737a0bed7d2c04ac03b1c0,aug,2017,cool,hold,Neenah,715.333333,720.000000,720.000000,39.0,True,False,False
2,03049cbda96a4f44aa6a7a317fbbbe7f7f8b4192,aug,2017,cool,hold,Janesville,731.436090,740.000000,740.000000,70.0,False,False,False
3,03745e88650dff906c44cbccdcbb56d420ecc414,aug,2017,cool,hold,Delafield,727.275986,730.072879,730.045002,45.0,False,False,False
4,042ae46ec1ea6310991ced5d7d0616ee6b5d4fad,aug,2017,cool,hold,Janesville,749.353741,777.129252,772.231293,30.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1437,fe68f76d1a80f5e224b490e319cbb3c9bf0fb02b,aug,2020,auto,hold,Randall,683.300689,680.000000,660.000000,40.0,False,False,False
1438,fed9c607ae0642dd8cb57fba241281b76ff5c2cf,aug,2020,auto,auto,Spring Valley,763.015748,780.000000,700.000000,15.0,False,False,False
1439,ff912c4c4d0c33c0ef22bbb0f6fd4ef1312c4fbe,aug,2020,cool,hold,Mount Pleasant,741.497630,733.872038,719.715640,29.0,False,False,False
1440,ffab5e87800a1a1d1a0f87bf473d27fd44ab0f5e,aug,2020,cool,auto,Kenosha,742.756395,741.025579,741.132765,100.0,False,False,False


In [160]:
WI_aug.to_csv("Scraper_Output/State_Month_Day/WI/WI_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/WI-day/2017-dec-day-WI.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a33d3f9f86ad1159dea44bab93ca1c371fd917a8,2017-12-04 13:25:00 UTC,heat,auto,676,810,680,WI,Wausau,115,True,False,False,Gas
1,a33d3f9f86ad1159dea44bab93ca1c371fd917a8,2017-12-03 17:20:00 UTC,heat,auto,684,810,680,WI,Wausau,115,True,False,False,Gas
2,ac3f5312ef690e2eac486b3b348a8c41cbbad386,2017-12-21 12:15:00 UTC,auto,hold,706,765,715,WI,Milwaukee,118,False,False,False,Gas
3,ac3f5312ef690e2eac486b3b348a8c41cbbad386,2017-12-30 14:15:00 UTC,auto,hold,713,765,715,WI,Milwaukee,118,False,False,False,Gas
4,ac3f5312ef690e2eac486b3b348a8c41cbbad386,2017-12-22 12:10:00 UTC,auto,hold,704,765,715,WI,Milwaukee,118,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
466056,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2017-12-19 17:00:00 UTC,auto,hold,693,750,700,WI,Delavan,110,False,False,False,Gas
466057,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2017-12-07 15:05:00 UTC,auto,hold,659,750,700,WI,Delavan,110,False,False,False,Gas
466058,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2017-12-25 14:45:00 UTC,auto,hold,671,750,680,WI,Delavan,110,False,False,False,Gas
466059,ba121a002a566e4ee11b9a727bb2c7cb42d0fca0,2017-12-29 15:45:00 UTC,auto,hold,681,750,700,WI,Delavan,110,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/WI/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/WI-day/2018-dec-day-WI.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,f5b5e023c99de91a5f722e418aa7761c4f519021,2018-12-11 14:00:00 UTC,heat,auto,719,756,730,WI,Fort Atkinson,68,False,False,False,Gas
4,8414c7039f40df33552b8e4f61cc457312381267,2018-12-14 14:05:00 UTC,heat,auto,683,683,680,WI,East Troy,39,True,False,False,Gas
6,f5b5e023c99de91a5f722e418aa7761c4f519021,2018-12-31 19:25:00 UTC,heat,hold,675,676,676,WI,Fort Atkinson,68,False,False,False,Gas
7,8414c7039f40df33552b8e4f61cc457312381267,2018-12-19 16:25:00 UTC,heat,hold,673,683,680,WI,East Troy,39,True,False,False,Gas
8,f5b5e023c99de91a5f722e418aa7761c4f519021,2018-12-24 13:30:00 UTC,heat,auto,718,756,720,WI,Fort Atkinson,68,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
835739,a640d0786b08190e76d3a35785285753eb355e1e,2018-12-13 17:40:00 UTC,heat,hold,723,730,730,WI,Fairchild,110,False,False,False,Gas
835740,a640d0786b08190e76d3a35785285753eb355e1e,2018-12-13 13:05:00 UTC,heat,hold,728,730,730,WI,Fairchild,110,False,False,False,Gas
835741,a640d0786b08190e76d3a35785285753eb355e1e,2018-12-14 18:35:00 UTC,heat,hold,728,730,730,WI,Fairchild,110,False,False,False,Gas
835742,a640d0786b08190e76d3a35785285753eb355e1e,2018-12-13 12:35:00 UTC,heat,hold,730,730,730,WI,Fairchild,110,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/WI/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/WI-day/2019-dec-day-WI.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,95e52770f4078992d60cfe0338c6d58b4f59fceb,2019-12-31 19:45:00 UTC,auto,hold,686,737,687,WI,Milwaukee,49,True,False,False,Gas
1,57e2f779d2f453140d591582577b27a1ab57fb36,2019-12-15 16:50:00 UTC,heat,hold,732,738,738,WI,West Allis,97,False,False,False,Gas
2,8cc73e6307b0799468bfdc2a9885a37b3c84a027,2019-12-07 13:25:00 UTC,auto,hold,690,753,690,WI,Chippewa Falls,118,False,False,False,Gas
3,8cc73e6307b0799468bfdc2a9885a37b3c84a027,2019-12-27 15:50:00 UTC,auto,hold,687,753,680,WI,Chippewa Falls,118,False,False,False,Gas
4,0cf2a5a56c29efbd7b4b297e58576c69a9eeb786,2019-12-25 13:25:00 UTC,heat,auto,642,640,640,WI,Elkhorn,69,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
952542,a640d0786b08190e76d3a35785285753eb355e1e,2019-12-27 13:15:00 UTC,heat,auto,710,720,720,WI,Fairchild,110,False,False,False,Gas
952543,a640d0786b08190e76d3a35785285753eb355e1e,2019-12-14 18:50:00 UTC,heat,auto,712,720,720,WI,Fairchild,110,False,False,False,Gas
952544,a640d0786b08190e76d3a35785285753eb355e1e,2019-12-27 12:50:00 UTC,heat,auto,718,720,720,WI,Fairchild,110,False,False,False,Gas
952545,a640d0786b08190e76d3a35785285753eb355e1e,2019-12-26 16:55:00 UTC,heat,auto,721,720,720,WI,Fairchild,110,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/WI/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/WI-day/2020-dec-day-WI.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,14bb247a62f2f8e19e9bc3a8313856d0d3d4505a,2020-12-15 19:00:00 UTC,heat,hold,704,701,701,WI,Greenville,116,False,False,False,Gas
6,4fea448ed33f31ef63a5775faf4a8b73c8dbdf86,2020-12-24 19:00:00 UTC,heat,hold,680,725,725,WI,Sturgeon Bay,98,False,False,False,Gas
7,4fea448ed33f31ef63a5775faf4a8b73c8dbdf86,2020-12-13 14:05:00 UTC,heat,hold,646,650,697,WI,Sturgeon Bay,98,False,False,False,Gas
8,95e52770f4078992d60cfe0338c6d58b4f59fceb,2020-12-08 16:35:00 UTC,heat,auto,722,782,732,WI,Milwaukee,49,True,False,False,Gas
9,4a48653da1ad9a45d99775d1e9a519fa26926b0a,2020-12-28 17:05:00 UTC,heat,hold,658,685,647,WI,West Bend,49,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
848080,5ad5fe9be5c421b804af9f8c50896990b610dba8,2020-12-11 19:35:00 UTC,heat,auto,723,720,720,WI,Mayville,110,False,False,False,Gas
848081,5ad5fe9be5c421b804af9f8c50896990b610dba8,2020-12-11 19:05:00 UTC,heat,auto,716,720,720,WI,Mayville,110,False,False,False,Gas
848082,5ad5fe9be5c421b804af9f8c50896990b610dba8,2020-12-11 19:20:00 UTC,heat,auto,722,720,720,WI,Mayville,110,False,False,False,Gas
848083,5ad5fe9be5c421b804af9f8c50896990b610dba8,2020-12-11 18:40:00 UTC,heat,auto,719,720,720,WI,Mayville,110,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/WI/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WI/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WI_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WI/dec/" + file)
    WI_dec = pd.concat([WI_dec, df])
    
WI_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,00c3de29f54d18652f1a52917ca911150381b87e,dec,2017,heat,auto,Elroy,670.000000,673.175573,672.503817,37.0,False,False,False
1,00c3de29f54d18652f1a52917ca911150381b87e,dec,2017,heat,hold,Elroy,673.756098,676.512195,675.634146,37.0,False,False,False
2,00c6e6440353848d5f98854b8e0470a8762f7b89,dec,2017,heat,auto,Minocqua,710.621600,717.105600,717.076000,40.0,False,False,False
3,01d210acbcd27062b6c0a9696c19a0e9b774a81d,dec,2017,heat,hold,Madison,652.971429,650.000000,650.000000,27.0,False,False,False
4,027d1ae6b66bece659dd61aadfbd8f7cd43c187a,dec,2017,auto,auto,Madison,703.666667,770.000000,690.000000,55.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1389,ff912c4c4d0c33c0ef22bbb0f6fd4ef1312c4fbe,dec,2020,heat,hold,Mount Pleasant,653.727273,675.090909,657.045455,29.0,False,False,False
1390,ffab5e87800a1a1d1a0f87bf473d27fd44ab0f5e,dec,2020,heat,auto,Kenosha,687.823190,677.997438,688.200512,100.0,False,False,False
1391,ffab5e87800a1a1d1a0f87bf473d27fd44ab0f5e,dec,2020,heat,hold,Kenosha,668.409449,663.228346,661.291339,100.0,False,False,False
1392,ffb8a83f9b1c575b586c53b5d1512383d9cb63d6,dec,2020,heat,hold,Milwaukee,681.576923,689.369231,687.461538,90.0,False,False,False


In [187]:
WI_dec.to_csv("Scraper_Output/State_Month_Day/WI/WI_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/WI/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WI_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/WI/" + file)
    WI_all = pd.concat([WI_all, df])
    
WI_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,019822d5aa2b17d3e3737a0bed7d2c04ac03b1c0,aug,2017,auto,hold,Neenah,744.300000,720.700000,670.200000,39.0,True,False,False
1,019822d5aa2b17d3e3737a0bed7d2c04ac03b1c0,aug,2017,cool,hold,Neenah,715.333333,720.000000,720.000000,39.0,True,False,False
2,03049cbda96a4f44aa6a7a317fbbbe7f7f8b4192,aug,2017,cool,hold,Janesville,731.436090,740.000000,740.000000,70.0,False,False,False
3,03745e88650dff906c44cbccdcbb56d420ecc414,aug,2017,cool,hold,Delafield,727.275986,730.072879,730.045002,45.0,False,False,False
4,042ae46ec1ea6310991ced5d7d0616ee6b5d4fad,aug,2017,cool,hold,Janesville,749.353741,777.129252,772.231293,30.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6905,fe68f76d1a80f5e224b490e319cbb3c9bf0fb02b,jun,2021,auto,hold,Randall,681.587923,680.000000,660.000000,40.0,False,False,False
6906,fea426f1bff9c145a33c7c482abad0ec83345310,jun,2021,cool,hold,Verona,742.583333,744.787037,744.787037,10.0,False,False,False
6907,ff888f11d47874cf41fce2311e94a188eb31fb0c,jun,2021,cool,hold,Appleton,743.282051,741.692308,741.692308,40.0,False,False,False
6908,ffab5e87800a1a1d1a0f87bf473d27fd44ab0f5e,jun,2021,cool,hold,Kenosha,750.623100,746.768997,746.747720,100.0,False,False,False


In [190]:
WI_all.to_csv("Scraper_Output/State_Month_Day/WI_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mWIe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['WI']
Unique jan_2018: ['WI']
Unique jan_2019: ['WI']
Unique jan_2020: ['WI']
Unique jan_2021: ['WI']
Unique feb_2017: ['WI']
Unique feb_2018: ['WI']
Unique feb_2019: ['WI']
Unique feb_2020: ['WI']
Unique feb_2021: ['WI']
Unique jun_2017: ['WI']
Unique jun_2018: ['WI']
Unique jun_2019: ['WI']
Unique jun_2020: ['WI']
Unique jun_2021: ['WI']
Unique jul_2017: ['WI']
Unique jul_2018: ['WI']
Unique jul_2019: ['WI']
Unique jul_2020: ['WI']
Unique jul_2021: ['WI']
Unique aug_2017: ['WI']
Unique aug_2018: ['WI']
Unique aug_2019: ['WI']
Unique aug_2020: ['WI']
Unique dec_2017: ['WI']
Unique dec_2018: ['WI']
Unique dec_2019: ['WI']
Unique dec_2020: ['WI']
