# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/MN-day/2017-jan-day-MN.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6cb6f8cc3b20aaf02c3c9f4c0eb4341991952faf,2017-01-17 14:45:00 UTC,heat,hold,658,664,664,MN,Robbinsdale,76,False,False,False,Gas
1,45b0869f90f4e72f3584a2bb549c9bdd3fe6f794,2017-01-15 19:20:00 UTC,heat,auto,640,640,640,MN,St. Louis Park,76,False,False,False,Gas
2,8195e3c11e9bc633757add05cb49b3261e416978,2017-01-31 12:45:00 UTC,auto,auto,704,755,705,MN,Shorewood,26,False,False,False,Gas
3,8195e3c11e9bc633757add05cb49b3261e416978,2017-01-30 15:00:00 UTC,auto,auto,704,755,705,MN,Shorewood,26,False,False,False,Gas
4,8195e3c11e9bc633757add05cb49b3261e416978,2017-01-31 18:50:00 UTC,auto,auto,703,755,705,MN,Shorewood,26,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
219284,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-01-09 15:20:00 UTC,heat,hold,695,700,700,MN,Hastings,120,False,False,False,Gas
219285,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-01-09 19:30:00 UTC,heat,hold,696,700,700,MN,Hastings,120,False,False,False,Gas
219286,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-01-09 17:45:00 UTC,heat,hold,694,700,700,MN,Hastings,120,False,False,False,Gas
219287,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-01-14 14:25:00 UTC,heat,hold,692,700,700,MN,Hastings,120,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
01cbc83700158311cb3594824a8c3db5ee300914,Jan,2017,heat,auto,Cottage Grove,682.375000,728.250000,722.625000,40.0,False,False,False
01cbc83700158311cb3594824a8c3db5ee300914,Jan,2017,heat,hold,Cottage Grove,671.556420,676.085603,675.042802,40.0,False,False,False
0391f60712af17c3706935f5ca5c601dd4870506,Jan,2017,heat,auto,Plymouth,720.062500,722.375000,722.281250,35.0,False,False,False
03f6e79d6093959a1f0220772d5170d643304f46,Jan,2017,heat,hold,Inver Grove Heights,692.920000,707.320000,705.906667,25.0,False,False,False
04058bb8f16852f34d0b2360ae68660683e9d705,Jan,2017,auto,auto,Minnetonka,681.958333,751.666667,686.222222,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
fcb38d5deb84004fc841a63127632ca96a7f64d0,Jan,2017,heat,hold,South St Paul,693.791667,702.083333,697.958333,60.0,False,False,False
fcf8370d3ec2547e4db00b999bc63cdffa33ad00,Jan,2017,heat,hold,Maple Grove,680.189655,683.284483,683.176724,5.0,False,False,False
fd9bf04199d9fd8d3d74084fb2eb9e41f35f213f,Jan,2017,heat,auto,Woodbury,676.638158,687.697368,680.013158,15.0,False,False,False
ff72a50c09967eb4ccf4d5cdb2d9421bc8202aa7,Jan,2017,heat,auto,Saint Paul,713.528846,716.286243,716.262574,70.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/MN/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/MN-day/2018-jan-day-MN.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2d83a1ace42dcd177a1ac9d6fd8ec2b01ee04bd8,2018-01-21 14:50:00 UTC,heat,auto,695,771,699,MN,Minneapolis,99,True,False,False,Gas
1,28292f2f2834c99d4934720183afebd3f2b872c5,2018-01-08 12:55:00 UTC,heat,auto,685,729,675,MN,Chisago City,27,False,False,False,Gas
2,9a116f845822dbefdbdbb146ecf2e912ae8663f4,2018-01-31 12:45:00 UTC,auto,auto,635,685,635,MN,Minneapolis,110,False,False,False,Gas
3,9a116f845822dbefdbdbb146ecf2e912ae8663f4,2018-01-13 13:25:00 UTC,auto,auto,571,765,715,MN,Minneapolis,110,False,False,False,Gas
4,ee95ec7c5fdb5e3dc4e32a47c3a562b7f057fe26,2018-01-30 12:35:00 UTC,heat,auto,707,713,713,MN,Plymouth,7,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
604327,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2018-01-20 15:40:00 UTC,heat,hold,749,750,750,MN,St Paul,120,False,False,False,Gas
604328,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2018-01-12 18:15:00 UTC,heat,hold,753,750,750,MN,St Paul,120,False,False,False,Gas
604329,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2018-01-20 16:45:00 UTC,heat,hold,749,750,750,MN,St Paul,120,False,False,False,Gas
604330,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2018-01-12 16:00:00 UTC,heat,hold,747,750,750,MN,St Paul,120,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/MN/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/MN-day/2019-jan-day-MN.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6cb6f8cc3b20aaf02c3c9f4c0eb4341991952faf,2019-01-19 18:15:00 UTC,heat,hold,705,712,712,MN,Robbinsdale,76,False,False,False,Gas
1,be75e2dfe9f47bbe7bebfaf35d69bef780e8e771,2019-01-28 14:20:00 UTC,heat,hold,680,676,676,MN,Richfield,69,True,False,False,Gas
2,6cb6f8cc3b20aaf02c3c9f4c0eb4341991952faf,2019-01-12 15:55:00 UTC,heat,hold,663,662,662,MN,Robbinsdale,76,False,False,False,Gas
3,be75e2dfe9f47bbe7bebfaf35d69bef780e8e771,2019-01-28 18:15:00 UTC,heat,hold,670,676,676,MN,Richfield,69,True,False,False,Gas
4,6cb6f8cc3b20aaf02c3c9f4c0eb4341991952faf,2019-01-19 19:20:00 UTC,heat,hold,710,712,712,MN,Robbinsdale,76,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
941580,34d7823bddf5613a67e4627f74910aaa31120b10,2019-01-04 16:35:00 UTC,heat,auto,689,760,690,MN,Saint Paul,120,False,False,False,Gas
941581,34d7823bddf5613a67e4627f74910aaa31120b10,2019-01-04 15:40:00 UTC,heat,auto,681,760,690,MN,Saint Paul,120,False,False,False,Gas
941582,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2019-01-17 11:55:00 UTC,heat,auto,707,760,750,MN,Minneapolis,120,False,False,False,Gas
941583,34d7823bddf5613a67e4627f74910aaa31120b10,2019-01-04 17:20:00 UTC,heat,auto,688,760,690,MN,Saint Paul,120,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/MN/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/MN-day/2020-jan-day-MN.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d4956b154169cbb3b3378a4ca8827aefcdfed777,2020-01-04 16:20:00 UTC,heat,hold,652,655,655,MN,Saint Paul,57,False,False,False,Gas
1,c3b4e82abceae3691bed3d2caa0bf2d667916ef3,2020-01-23 19:50:00 UTC,heat,hold,708,709,709,MN,Champlin,69,False,False,False,Gas
2,d4956b154169cbb3b3378a4ca8827aefcdfed777,2020-01-25 14:15:00 UTC,heat,hold,678,685,685,MN,Saint Paul,57,False,False,False,Gas
3,32f88624c5e27218aba4d25cb09ea77ed576529a,2020-01-20 11:40:00 UTC,heat,hold,676,685,685,MN,Minneapolis,86,False,False,False,Gas
4,d4956b154169cbb3b3378a4ca8827aefcdfed777,2020-01-27 11:40:00 UTC,heat,hold,683,685,685,MN,Saint Paul,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1061786,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2020-01-06 12:55:00 UTC,heat,auto,716,760,720,MN,Minneapolis,120,False,False,False,Gas
1061787,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2020-01-29 13:30:00 UTC,heat,auto,697,760,720,MN,Minneapolis,120,False,False,False,Gas
1061788,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2020-01-01 13:20:00 UTC,heat,auto,696,760,700,MN,Minneapolis,120,False,False,False,Gas
1061789,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2020-01-18 15:05:00 UTC,heat,auto,712,760,720,MN,Minneapolis,120,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/MN/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/MN-day/2021-jan-day-MN.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3f634f2d32e3794678c0e0a9bd9b397b284dd5af,2021-01-31 16:25:00 UTC,heat,hold,691,692,692,MN,Arden Hills,46,False,False,False,Gas
1,7ac125e391a1fe2bbef55b70b25c652ac3e60196,2021-01-23 15:40:00 UTC,heat,hold,698,707,707,MN,Duluth,99,True,False,False,Gas
2,3f634f2d32e3794678c0e0a9bd9b397b284dd5af,2021-01-11 19:45:00 UTC,heat,hold,696,702,702,MN,Arden Hills,46,False,False,False,Gas
3,c3b4e82abceae3691bed3d2caa0bf2d667916ef3,2021-01-26 18:05:00 UTC,heat,hold,687,689,689,MN,Champlin,69,False,False,False,Gas
4,45b0869f90f4e72f3584a2bb549c9bdd3fe6f794,2021-01-18 17:40:00 UTC,heat,hold,675,685,685,MN,St. Louis Park,76,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
664042,76b9a00be648b09df094868f1db636902c346ebd,2021-01-24 14:35:00 UTC,heat,hold,732,740,740,MN,Minneapolis,120,False,False,False,Gas
664043,76b9a00be648b09df094868f1db636902c346ebd,2021-01-24 07:40:00 UTC,heat,hold,737,740,740,MN,Minneapolis,120,False,False,False,Gas
664044,76b9a00be648b09df094868f1db636902c346ebd,2021-01-25 18:50:00 UTC,heat,hold,737,740,740,MN,Minneapolis,120,False,False,False,Gas
664045,76b9a00be648b09df094868f1db636902c346ebd,2021-01-24 10:05:00 UTC,heat,hold,736,740,740,MN,Minneapolis,120,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/MN/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MN/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MN_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MN/jan/" + file)
    MN_jan = pd.concat([MN_jan, df])
    
MN_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01cbc83700158311cb3594824a8c3db5ee300914,Jan,2017,heat,auto,Cottage Grove,682.375000,728.250000,722.625000,40.0,False,False,False
1,01cbc83700158311cb3594824a8c3db5ee300914,Jan,2017,heat,hold,Cottage Grove,671.556420,676.085603,675.042802,40.0,False,False,False
2,0391f60712af17c3706935f5ca5c601dd4870506,Jan,2017,heat,auto,Plymouth,720.062500,722.375000,722.281250,35.0,False,False,False
3,03f6e79d6093959a1f0220772d5170d643304f46,Jan,2017,heat,hold,Inver Grove Heights,692.920000,707.320000,705.906667,25.0,False,False,False
4,04058bb8f16852f34d0b2360ae68660683e9d705,Jan,2017,auto,auto,Minnetonka,681.958333,751.666667,686.222222,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
866,fd3a0d3ba745db19b544336963f31da8970aa40e,Jan,2021,heat,hold,Minneapolis,692.400000,696.000000,696.000000,120.0,False,False,False
867,fd8e69f04e13bc6274e4c1b8165f56811986bba9,Jan,2021,heat,hold,Lakeville,687.595238,690.503663,690.503663,10.0,True,False,False
868,fe9c6ace8431413b3c3d2d4a8d17b24ae6074d16,Jan,2021,heat,hold,Minneapolis,657.047368,673.189474,673.189474,0.0,False,False,False
869,ff1241a71824ad577de19d1aa0d46b50646e3b60,Jan,2021,auto,hold,Minneapolis,672.408284,739.322485,675.019231,0.0,False,False,False


In [34]:
MN_jan.to_csv("Scraper_Output/State_Month_Day/MN/MN_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/MN-day/2017-feb-day-MN.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cdb5b8f006364af38c53be363e454900c2e1f046,2017-02-24 13:50:00 UTC,heat,hold,673,675,675,MN,St. Cloud,36,False,False,False,Gas
1,8195e3c11e9bc633757add05cb49b3261e416978,2017-02-02 14:10:00 UTC,auto,auto,699,755,705,MN,Shorewood,26,False,False,False,Gas
2,8195e3c11e9bc633757add05cb49b3261e416978,2017-02-03 19:55:00 UTC,auto,auto,701,755,705,MN,Shorewood,26,False,False,False,Gas
3,cdb5b8f006364af38c53be363e454900c2e1f046,2017-02-24 13:00:00 UTC,heat,hold,665,675,675,MN,St. Cloud,36,False,False,False,Gas
4,8195e3c11e9bc633757add05cb49b3261e416978,2017-02-03 17:10:00 UTC,auto,auto,701,755,705,MN,Shorewood,26,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192897,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-02-04 13:40:00 UTC,heat,hold,689,690,690,MN,Hastings,120,False,False,False,Gas
192898,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-02-02 13:40:00 UTC,heat,hold,690,690,690,MN,Hastings,120,False,False,False,Gas
192899,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2017-02-18 15:00:00 UTC,heat,auto,698,740,700,MN,Minneapolis,120,False,False,False,Gas
192900,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2017-02-26 15:15:00 UTC,heat,hold,691,740,700,MN,Minneapolis,120,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/MN/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/MN-day/2018-feb-day-MN.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,872c4ddc97b2e33017dc4979288bcfa3cbbd07e4,2018-02-15 15:20:00 UTC,heat,hold,705,702,702,MN,Maple Grove,46,False,False,False,Gas
2,a757b3557d27ef19db100240f6a3e1a08f00c05e,2018-02-17 16:25:00 UTC,heat,hold,694,688,688,MN,Maple Grove,8,True,False,False,Gas
5,872c4ddc97b2e33017dc4979288bcfa3cbbd07e4,2018-02-01 15:20:00 UTC,heat,hold,696,704,704,MN,Maple Grove,46,False,False,False,Gas
9,3ac5410d4f644bb56736cdc1d80cbdac4b8ffba5,2018-02-27 14:15:00 UTC,heat,hold,695,738,689,MN,Minneapolis,97,False,False,False,Gas
14,ad0ab9c3194ffab2885c105afb5622b7e9d9fd14,2018-02-17 17:15:00 UTC,heat,hold,611,708,641,MN,Duluth,97,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
571195,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2018-02-03 16:20:00 UTC,heat,auto,680,760,690,MN,Minneapolis,120,False,False,False,Gas
571196,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2018-02-06 15:00:00 UTC,heat,hold,759,760,760,MN,St Paul,120,False,False,False,Gas
571197,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2018-02-06 15:15:00 UTC,heat,hold,750,760,760,MN,St Paul,120,False,False,False,Gas
571198,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2018-02-03 16:05:00 UTC,heat,auto,690,760,690,MN,Minneapolis,120,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/MN/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/MN-day/2019-feb-day-MN.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d4956b154169cbb3b3378a4ca8827aefcdfed777,2019-02-16 18:00:00 UTC,heat,hold,682,685,685,MN,Saint Paul,57,False,False,False,Gas
1,e65417109e371292e4d043abebc73c09f4e609e0,2019-02-19 18:00:00 UTC,heat,auto,710,737,710,MN,Glenwood,7,False,False,False,Gas
2,ae0b372e89a4f61d5bed1db1a38147b8aabb8fe4,2019-02-16 18:45:00 UTC,heat,auto,611,830,610,MN,Minneapolis,117,True,False,False,Gas
3,1e38a1c91900e05849ee21fe9cad3b2db2211e56,2019-02-20 12:55:00 UTC,heat,hold,630,640,630,MN,Plymouth,26,False,False,False,Gas
4,be75e2dfe9f47bbe7bebfaf35d69bef780e8e771,2019-02-04 16:05:00 UTC,heat,hold,674,676,676,MN,Richfield,69,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
647998,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2019-02-04 13:00:00 UTC,heat,auto,711,760,710,MN,Minneapolis,120,False,False,False,Gas
647999,ea7a0f4bf3439fc7b71de2a6cd22e027f4722328,2019-02-16 15:35:00 UTC,heat,auto,664,760,680,MN,Minneapolis,120,False,False,False,Gas
648000,34d7823bddf5613a67e4627f74910aaa31120b10,2019-02-20 19:05:00 UTC,heat,auto,685,760,690,MN,Saint Paul,120,False,False,False,Gas
648001,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2019-02-04 13:45:00 UTC,heat,auto,706,760,710,MN,Minneapolis,120,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/MN/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/MN-day/2020-feb-day-MN.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d4956b154169cbb3b3378a4ca8827aefcdfed777,2020-02-24 12:50:00 UTC,heat,hold,678,685,685,MN,Saint Paul,57,False,False,False,Gas
1,32f88624c5e27218aba4d25cb09ea77ed576529a,2020-02-16 14:05:00 UTC,heat,hold,698,707,707,MN,Minneapolis,86,False,False,False,Gas
2,3b8cf6054707a947c74a64834cfb1f130f6fc733,2020-02-05 15:15:00 UTC,heat,auto,672,651,650,MN,Hutchinson,69,False,False,False,Gas
3,d4956b154169cbb3b3378a4ca8827aefcdfed777,2020-02-18 13:30:00 UTC,heat,hold,684,685,685,MN,Saint Paul,57,False,False,False,Gas
4,3b8cf6054707a947c74a64834cfb1f130f6fc733,2020-02-05 16:15:00 UTC,heat,auto,661,651,650,MN,Hutchinson,69,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
935355,ab2d15482a16d5a86384137c2b9b5b7fc4ab480d,2020-02-22 16:55:00 UTC,heat,auto,698,760,740,MN,Minneapolis,120,False,False,False,Gas
935356,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2020-02-05 14:25:00 UTC,heat,auto,718,760,720,MN,Minneapolis,120,False,False,False,Gas
935357,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2020-02-27 13:15:00 UTC,heat,auto,683,760,710,MN,Minneapolis,120,False,False,False,Gas
935358,ab2d15482a16d5a86384137c2b9b5b7fc4ab480d,2020-02-24 15:10:00 UTC,heat,auto,732,760,750,MN,Minneapolis,120,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/MN/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/MN-day/2021-feb-day-MN.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3ed4ca22f15f009cba2451150c82fbdc1980511b,2021-02-18 19:10:00 UTC,heat,hold,697,706,706,MN,Stillwater,115,False,False,False,Gas
1,7ac125e391a1fe2bbef55b70b25c652ac3e60196,2021-02-16 16:45:00 UTC,heat,hold,695,707,707,MN,Duluth,99,True,False,False,Gas
2,45b0869f90f4e72f3584a2bb549c9bdd3fe6f794,2021-02-08 19:20:00 UTC,heat,hold,663,665,665,MN,St. Louis Park,76,False,False,False,Gas
3,da2c61dbb7eb6cf11955c79efd03d901f39954e3,2021-02-11 12:20:00 UTC,heat,hold,654,655,655,MN,Apple Valley,27,False,False,False,Gas
4,3f634f2d32e3794678c0e0a9bd9b397b284dd5af,2021-02-12 14:55:00 UTC,heat,hold,716,722,722,MN,Arden Hills,46,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
608618,3cf6a477c363180362031171fec1235767275824,2021-02-17 18:40:00 UTC,auto,hold,686,760,690,MN,St Paul,120,False,False,False,Gas
608619,3cf6a477c363180362031171fec1235767275824,2021-02-04 13:20:00 UTC,auto,hold,664,760,670,MN,St Paul,120,False,False,False,Gas
608620,3cf6a477c363180362031171fec1235767275824,2021-02-17 19:10:00 UTC,auto,hold,688,760,690,MN,St Paul,120,False,False,False,Gas
608621,3cf6a477c363180362031171fec1235767275824,2021-02-18 17:15:00 UTC,auto,hold,681,760,710,MN,St Paul,120,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/MN/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MN/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MN_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MN/feb/" + file)
    MN_feb = pd.concat([MN_feb, df])
    
MN_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01cbc83700158311cb3594824a8c3db5ee300914,feb,2017,heat,auto,Cottage Grove,682.166667,700.966667,698.200000,40.0,False,False,False
1,01cbc83700158311cb3594824a8c3db5ee300914,feb,2017,heat,hold,Cottage Grove,680.556923,679.652308,678.461538,40.0,False,False,False
2,01e1619793696fb693b082ea3d26b964d4d2c5ff,feb,2017,heat,auto,Minneapolis,712.093458,714.890966,714.890966,55.0,False,False,False
3,01e1619793696fb693b082ea3d26b964d4d2c5ff,feb,2017,heat,hold,Minneapolis,681.200000,670.000000,670.000000,55.0,False,False,False
4,0391f60712af17c3706935f5ca5c601dd4870506,feb,2017,heat,auto,Plymouth,707.566667,710.000000,710.000000,35.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
882,fd8e69f04e13bc6274e4c1b8165f56811986bba9,feb,2021,heat,hold,Lakeville,685.876682,690.000000,690.000000,10.0,True,False,False
883,fe7636f125dc072104d039d7f9c7e7db881738e0,feb,2021,auto,hold,Wayzata,685.662835,736.111111,686.111111,5.0,True,False,False
884,fe9c6ace8431413b3c3d2d4a8d17b24ae6074d16,feb,2021,heat,hold,Minneapolis,635.316626,677.854523,677.519560,0.0,False,False,False
885,ff1241a71824ad577de19d1aa0d46b50646e3b60,feb,2021,auto,hold,Minneapolis,678.629921,753.283879,682.248653,0.0,False,False,False


In [67]:
MN_feb.to_csv("Scraper_Output/State_Month_Day/MN/MN_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/MN-day/2017-jun-day-MN.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d839828eb38dc8c388f63fb0c651dd188e20afde,2017-06-05 15:00:00 UTC,auto,hold,680,685,635,MN,New Hope,57,False,False,False,Gas
1,d839828eb38dc8c388f63fb0c651dd188e20afde,2017-06-04 18:45:00 UTC,auto,hold,689,685,635,MN,New Hope,57,False,False,False,Gas
2,d839828eb38dc8c388f63fb0c651dd188e20afde,2017-06-05 14:30:00 UTC,auto,hold,688,685,635,MN,New Hope,57,False,False,False,Gas
3,0689734d40f4386ff9cd1055e6bde7d7be33fd40,2017-06-29 18:05:00 UTC,cool,hold,754,778,772,MN,Cottage Grove,26,False,False,False,Gas
4,d839828eb38dc8c388f63fb0c651dd188e20afde,2017-06-24 14:10:00 UTC,cool,hold,670,685,635,MN,New Hope,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
320726,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-06-27 13:00:00 UTC,cool,hold,672,750,750,MN,Hastings,120,False,False,False,Gas
320727,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-06-19 12:10:00 UTC,cool,hold,687,750,750,MN,Hastings,120,False,False,False,Gas
320728,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-06-19 14:30:00 UTC,cool,hold,695,750,750,MN,Hastings,120,False,False,False,Gas
320729,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-06-19 19:40:00 UTC,cool,hold,705,750,750,MN,Hastings,120,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/MN/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/MN-day/2018-jun-day-MN.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7b9c43360d9a2d1751afbb5ec54d1616347d5a96,2018-06-02 19:05:00 UTC,heat,hold,711,712,712,MN,Mound,38,False,False,False,Gas
1,94bbab10b4e5a9227094cabb2106590c62f427b9,2018-06-23 15:55:00 UTC,cool,hold,700,696,696,MN,St. Cloud,26,False,False,False,Gas
10,7b9c43360d9a2d1751afbb5ec54d1616347d5a96,2018-06-24 13:40:00 UTC,heat,hold,712,722,722,MN,Mound,38,False,False,False,Gas
12,a9bb433e9f19b657021ae09e60a851879d18c680,2018-06-24 16:35:00 UTC,cool,auto,771,701,653,MN,Saint Paul,97,False,False,False,Gas
13,f18172d3368f392c5dce6842185440ff1e781fff,2018-06-01 16:45:00 UTC,cool,hold,731,732,732,MN,Cottage Grove,8,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
769472,3f46872d3ced214755fb86ed41c90b4e6e3c577d,2018-06-13 09:45:00 UTC,cool,hold,709,760,700,MN,Bayport,120,False,False,False,Gas
769473,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2018-06-16 14:05:00 UTC,cool,auto,765,760,750,MN,St Paul,120,False,False,False,Gas
769474,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2018-06-16 16:50:00 UTC,cool,auto,754,760,750,MN,St Paul,120,False,False,False,Gas
769475,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2018-06-16 14:35:00 UTC,cool,auto,756,760,750,MN,St Paul,120,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/MN/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/MN-day/2019-jun-day-MN.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3c6b19b32c099285ff3d0af9ab1e722404d4dc66,2019-06-29 18:15:00 UTC,cool,hold,738,736,736,MN,Plymouth,6,False,False,False,Gas
1,529425585c3bab5be856bf744d349c022294cc84,2019-06-29 19:15:00 UTC,cool,hold,776,775,775,MN,Saint Paul,77,False,False,False,Gas
2,32f88624c5e27218aba4d25cb09ea77ed576529a,2019-06-08 15:15:00 UTC,cool,auto,682,640,695,MN,Minneapolis,86,False,False,False,Gas
3,adc0c371a7fba491a39bc8709534398926caa24e,2019-06-30 14:40:00 UTC,cool,hold,743,752,700,MN,St. Cloud,6,False,False,False,Gas
4,c3861613d9fed35e20877252a0b8c540b1f45c78,2019-06-30 15:20:00 UTC,auto,auto,736,737,687,MN,Saint Paul,115,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
949435,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2019-06-26 16:15:00 UTC,cool,hold,761,760,730,MN,St Paul,120,False,False,False,Gas
949436,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2019-06-04 18:15:00 UTC,cool,auto,759,760,710,MN,St Paul,120,False,False,False,Gas
949437,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2019-06-26 15:00:00 UTC,cool,hold,749,760,730,MN,St Paul,120,False,False,False,Gas
949438,545c5910cff6b2872ffb60a12b5e69edf0103f5f,2019-06-23 17:35:00 UTC,cool,hold,708,760,760,MN,Stillwater,120,True,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/MN/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/MN-day/2020-jun-day-MN.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,88c1f9b57773bcdb7283fdd98607d3604df51cb5,2020-06-14 19:40:00 UTC,cool,hold,687,686,686,MN,East Grand Forks,117,True,False,False,Gas
1,ae0b372e89a4f61d5bed1db1a38147b8aabb8fe4,2020-06-23 12:35:00 UTC,cool,hold,713,810,790,MN,Minneapolis,117,True,False,False,Gas
2,ae0b372e89a4f61d5bed1db1a38147b8aabb8fe4,2020-06-19 19:35:00 UTC,cool,hold,752,810,790,MN,Minneapolis,117,True,False,False,Gas
3,4aa6405a8ed1d128d0bc4fccc096a2a217224ed3,2020-06-28 12:50:00 UTC,cool,hold,616,620,620,MN,St. Cloud,87,False,False,False,Gas
5,ae0b372e89a4f61d5bed1db1a38147b8aabb8fe4,2020-06-23 15:10:00 UTC,cool,hold,714,810,790,MN,Minneapolis,117,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1043249,34d7823bddf5613a67e4627f74910aaa31120b10,2020-06-02 17:10:00 UTC,cool,hold,752,760,760,MN,Saint Paul,120,False,False,False,Gas
1043250,34d7823bddf5613a67e4627f74910aaa31120b10,2020-06-02 17:10:00 UTC,cool,hold,752,760,760,MN,Saint Paul,120,False,False,False,Gas
1043251,3f46872d3ced214755fb86ed41c90b4e6e3c577d,2020-06-05 15:10:00 UTC,cool,hold,722,760,760,MN,Bayport,120,False,False,False,Gas
1043252,3cf6a477c363180362031171fec1235767275824,2020-06-07 15:10:00 UTC,auto,hold,700,760,620,MN,St Paul,120,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/MN/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/MN-day/2021-jun-day-MN.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6cb03f233f2bda952021cac79e8f643da4d2970a,2021-06-16 18:20:00 UTC,cool,hold,763,731,731,MN,Minneapolis,97,False,False,False,Gas
1,c3b4e82abceae3691bed3d2caa0bf2d667916ef3,2021-06-09 19:50:00 UTC,cool,hold,745,739,739,MN,Champlin,69,False,False,False,Gas
2,da2c61dbb7eb6cf11955c79efd03d901f39954e3,2021-06-09 17:00:00 UTC,cool,hold,765,765,765,MN,Apple Valley,27,False,False,False,Gas
3,94bbab10b4e5a9227094cabb2106590c62f427b9,2021-06-30 18:45:00 UTC,cool,hold,764,755,755,MN,St. Cloud,26,False,False,False,Gas
4,adc0c371a7fba491a39bc8709534398926caa24e,2021-06-12 16:55:00 UTC,cool,hold,753,746,746,MN,St. Cloud,6,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
734929,3f46872d3ced214755fb86ed41c90b4e6e3c577d,2021-06-23 14:35:00 UTC,cool,hold,706,750,750,MN,Bayport,120,False,False,False,Gas
734930,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2021-06-18 18:10:00 UTC,cool,hold,731,750,750,MN,Hastings,120,False,False,False,Gas
734931,3f46872d3ced214755fb86ed41c90b4e6e3c577d,2021-06-23 18:35:00 UTC,cool,hold,720,750,750,MN,Bayport,120,False,False,False,Gas
734932,3cf6a477c363180362031171fec1235767275824,2021-06-16 16:35:00 UTC,auto,hold,695,750,630,MN,St Paul,120,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/MN/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MN/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MN_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MN/jun/" + file)
    MN_jun = pd.concat([MN_jun, df])
    
MN_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,003694c41cad5e9b6fd22ab076ac82741f649155,jun,2017,cool,hold,Cottage Grove,713.673913,718.898551,683.757246,0.0,False,False,False
1,01cbc83700158311cb3594824a8c3db5ee300914,jun,2017,auto,hold,Cottage Grove,680.552632,736.763158,674.131579,40.0,False,False,False
2,01cbc83700158311cb3594824a8c3db5ee300914,jun,2017,cool,hold,Cottage Grove,709.720430,731.460215,702.879570,40.0,False,False,False
3,01e1619793696fb693b082ea3d26b964d4d2c5ff,jun,2017,cool,auto,Minneapolis,719.266586,777.164053,732.934258,55.0,False,False,False
4,01e1619793696fb693b082ea3d26b964d4d2c5ff,jun,2017,cool,hold,Minneapolis,731.461444,741.585067,741.507956,55.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1123,fe7636f125dc072104d039d7f9c7e7db881738e0,jun,2021,cool,hold,Wayzata,742.021563,751.830189,751.789757,5.0,True,False,False
1124,fe9c6ace8431413b3c3d2d4a8d17b24ae6074d16,jun,2021,cool,hold,Minneapolis,703.413534,711.992481,711.992481,0.0,False,False,False
1125,ff1241a71824ad577de19d1aa0d46b50646e3b60,jun,2021,auto,hold,Minneapolis,707.571665,716.311516,629.106877,0.0,False,False,False
1126,ff72a50c09967eb4ccf4d5cdb2d9421bc8202aa7,jun,2021,auto,hold,Saint Paul,725.694010,727.915365,668.835938,70.0,False,False,False


In [100]:
MN_jun.to_csv("Scraper_Output/State_Month_Day/MN/MN_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/MN-day/2017-jul-day-MN.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f4551477415292e42f8419e58ae36474c56e4902,2017-07-17 18:50:00 UTC,auto,hold,805,830,660,MN,White Bear,6,False,False,False,Gas
1,f4551477415292e42f8419e58ae36474c56e4902,2017-07-31 18:10:00 UTC,auto,auto,763,810,660,MN,White Bear,6,False,False,False,Gas
2,32f88624c5e27218aba4d25cb09ea77ed576529a,2017-07-23 13:05:00 UTC,cool,hold,750,755,755,MN,Minneapolis,86,False,False,False,Gas
3,32f88624c5e27218aba4d25cb09ea77ed576529a,2017-07-24 16:45:00 UTC,cool,hold,728,755,755,MN,Minneapolis,86,False,False,False,Gas
4,3c6b19b32c099285ff3d0af9ab1e722404d4dc66,2017-07-23 16:35:00 UTC,cool,auto,708,712,641,MN,Plymouth,6,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415037,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-07-31 19:35:00 UTC,cool,hold,743,740,740,MN,Hastings,120,False,False,False,Gas
415038,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-07-28 15:30:00 UTC,cool,hold,744,760,760,MN,Hastings,120,False,False,False,Gas
415039,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-07-28 19:10:00 UTC,cool,hold,753,760,760,MN,Hastings,120,False,False,False,Gas
415040,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-07-28 16:20:00 UTC,cool,hold,749,760,760,MN,Hastings,120,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/MN/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/MN-day/2018-jul-day-MN.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
3,4347288e4bc01595036443af7dca41bfc4bbf6cc,2018-07-17 15:45:00 UTC,cool,hold,731,830,770,MN,Lowry,110,False,False,False,Gas
5,ee95ec7c5fdb5e3dc4e32a47c3a562b7f057fe26,2018-07-06 12:25:00 UTC,cool,hold,724,805,685,MN,Plymouth,7,False,False,False,Gas
6,4347288e4bc01595036443af7dca41bfc4bbf6cc,2018-07-17 15:00:00 UTC,cool,hold,731,830,770,MN,Lowry,110,False,False,False,Gas
9,1cda7373e661844a266fee96c91f2132e236ef42,2018-07-15 16:55:00 UTC,auto,hold,760,738,675,MN,Minneapolis,115,False,False,False,Gas
11,1e38a1c91900e05849ee21fe9cad3b2db2211e56,2018-07-19 12:00:00 UTC,cool,hold,725,748,644,MN,Plymouth,26,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
860518,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2018-07-01 16:55:00 UTC,cool,auto,740,760,740,MN,Hastings,120,False,False,False,Gas
860519,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2018-07-01 11:40:00 UTC,cool,auto,746,760,740,MN,Hastings,120,False,False,False,Gas
860520,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2018-07-01 13:45:00 UTC,cool,auto,745,760,740,MN,Hastings,120,False,False,False,Gas
860521,34d7823bddf5613a67e4627f74910aaa31120b10,2018-07-23 15:30:00 UTC,cool,auto,730,760,676,MN,Saint Paul,120,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/MN/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/MN-day/2019-jul-day-MN.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,01112bda59dc47b1ca327145792441aef251738c,2019-07-03 19:00:00 UTC,auto,hold,766,761,680,MN,Saint Paul,115,False,False,False,Gas
2,24fbf860ae4d2b19e2a67e4122973e3b1570c124,2019-07-20 11:25:00 UTC,cool,hold,770,810,790,MN,Rochester,8,False,False,False,Gas
5,7b9c43360d9a2d1751afbb5ec54d1616347d5a96,2019-07-27 17:45:00 UTC,cool,hold,728,711,711,MN,Mound,38,False,False,False,Gas
6,ee67de1db07eef8fcfb56801be52901153acaebf,2019-07-27 16:05:00 UTC,auto,auto,763,810,700,MN,Edina,69,False,False,False,Gas
7,0fb6802e7d46e4861488a532038126dc8f3deb63,2019-07-01 13:25:00 UTC,auto,auto,736,732,682,MN,Saint Paul,69,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1170663,34d7823bddf5613a67e4627f74910aaa31120b10,2019-07-05 18:30:00 UTC,cool,auto,730,760,630,MN,Saint Paul,120,False,False,False,Gas
1170664,34d7823bddf5613a67e4627f74910aaa31120b10,2019-07-27 16:10:00 UTC,cool,hold,725,760,760,MN,Saint Paul,120,False,False,False,Gas
1170665,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2019-07-18 11:45:00 UTC,cool,hold,758,760,730,MN,St Paul,120,False,False,False,Gas
1170666,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2019-07-16 16:25:00 UTC,cool,hold,762,760,730,MN,St Paul,120,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/MN/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/MN-day/2020-jul-day-MN.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,be29fb3892489608c5697898cdcb17fc796e764a,2020-07-03 16:40:00 UTC,cool,hold,701,697,697,MN,Woodbury,27,False,False,False,Gas
1,3f634f2d32e3794678c0e0a9bd9b397b284dd5af,2020-07-21 14:30:00 UTC,cool,hold,709,742,742,MN,Arden Hills,46,False,False,False,Gas
2,6aa32b8ff0beefbdd52db89fcc70150a0c140b98,2020-07-02 18:10:00 UTC,auto,hold,753,743,643,MN,Minneapolis,56,False,False,False,Gas
3,da2c61dbb7eb6cf11955c79efd03d901f39954e3,2020-07-27 16:15:00 UTC,cool,hold,759,775,775,MN,Apple Valley,27,False,False,False,Gas
4,3f634f2d32e3794678c0e0a9bd9b397b284dd5af,2020-07-17 14:30:00 UTC,cool,hold,719,742,742,MN,Arden Hills,46,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1168881,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2020-07-29 12:50:00 UTC,cool,auto,745,760,760,MN,Hastings,120,False,False,False,Gas
1168882,3cf6a477c363180362031171fec1235767275824,2020-07-07 17:30:00 UTC,cool,hold,723,760,760,MN,St Paul,120,False,False,False,Gas
1168883,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2020-07-18 14:00:00 UTC,cool,auto,752,760,760,MN,Hastings,120,False,False,False,Gas
1168884,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2020-07-18 12:35:00 UTC,cool,auto,755,760,760,MN,Hastings,120,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/MN/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/MN-day/2021-jul-day-MN.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,529425585c3bab5be856bf744d349c022294cc84,2021-07-22 11:55:00 UTC,cool,hold,774,778,625,MN,Saint Paul,77,False,False,False,Gas
1,f4551477415292e42f8419e58ae36474c56e4902,2021-07-10 11:35:00 UTC,cool,hold,687,685,685,MN,White Bear,6,False,False,False,Gas
2,37eb3477328acfe5b2676ad60185620eb9a0e257,2021-07-14 15:15:00 UTC,cool,hold,689,630,630,MN,Edina,59,True,False,False,Gas
3,94bbab10b4e5a9227094cabb2106590c62f427b9,2021-07-21 19:40:00 UTC,cool,hold,755,755,755,MN,St. Cloud,26,False,False,False,Gas
4,94bbab10b4e5a9227094cabb2106590c62f427b9,2021-07-23 16:00:00 UTC,cool,hold,759,755,755,MN,St. Cloud,26,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
711315,4a660b38ac98956aba2f8c77b9e5a3b387ac94c4,2021-07-16 12:25:00 UTC,cool,hold,757,760,760,MN,Saint Paul,120,False,False,False,Gas
711316,3cf6a477c363180362031171fec1235767275824,2021-07-19 19:25:00 UTC,auto,hold,757,760,640,MN,St Paul,120,False,False,False,Gas
711317,4a660b38ac98956aba2f8c77b9e5a3b387ac94c4,2021-07-16 18:40:00 UTC,cool,hold,760,760,760,MN,Saint Paul,120,False,False,False,Gas
711318,4a660b38ac98956aba2f8c77b9e5a3b387ac94c4,2021-07-16 15:00:00 UTC,cool,hold,766,760,760,MN,Saint Paul,120,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/MN/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MN/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MN_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MN/jul/" + file)
    MN_jul = pd.concat([MN_jul, df])
    
MN_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,003694c41cad5e9b6fd22ab076ac82741f649155,jul,2017,cool,auto,Cottage Grove,720.388889,720.000000,680.000000,0.0,False,False,False
1,003694c41cad5e9b6fd22ab076ac82741f649155,jul,2017,cool,hold,Cottage Grove,716.931034,717.357759,682.668103,0.0,False,False,False
2,01cbc83700158311cb3594824a8c3db5ee300914,jul,2017,cool,hold,Cottage Grove,715.728365,717.843750,717.403846,40.0,False,False,False
3,01e1619793696fb693b082ea3d26b964d4d2c5ff,jul,2017,cool,auto,Minneapolis,720.214477,751.461126,734.538874,55.0,False,False,False
4,01e1619793696fb693b082ea3d26b964d4d2c5ff,jul,2017,cool,hold,Minneapolis,727.498202,733.652817,733.547743,55.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
953,fe7636f125dc072104d039d7f9c7e7db881738e0,jul,2021,cool,hold,Wayzata,747.571429,749.942857,749.600000,5.0,True,False,False
954,fe9c6ace8431413b3c3d2d4a8d17b24ae6074d16,jul,2021,cool,hold,Minneapolis,708.334884,728.116279,728.116279,0.0,False,False,False
955,ff1241a71824ad577de19d1aa0d46b50646e3b60,jul,2021,auto,hold,Minneapolis,718.073171,728.967480,637.710569,0.0,False,False,False
956,ff72a50c09967eb4ccf4d5cdb2d9421bc8202aa7,jul,2021,auto,hold,Saint Paul,726.743459,727.656669,670.007658,70.0,False,False,False


In [133]:
MN_jul.to_csv("Scraper_Output/State_Month_Day/MN/MN_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/MN-day/2017-aug-day-MN.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6869983c3f35b5675a0d51f4919a2037ed1dc33a,2017-08-02 12:55:00 UTC,cool,hold,758,770,770,MN,Coon Rapids,57,False,False,False,Gas
1,6869983c3f35b5675a0d51f4919a2037ed1dc33a,2017-08-21 15:30:00 UTC,cool,hold,778,770,770,MN,Coon Rapids,57,False,False,False,Gas
2,6869983c3f35b5675a0d51f4919a2037ed1dc33a,2017-08-22 11:50:00 UTC,cool,hold,748,770,770,MN,Coon Rapids,57,False,False,False,Gas
3,6869983c3f35b5675a0d51f4919a2037ed1dc33a,2017-08-02 16:45:00 UTC,cool,hold,774,770,770,MN,Coon Rapids,57,False,False,False,Gas
4,6869983c3f35b5675a0d51f4919a2037ed1dc33a,2017-08-02 19:50:00 UTC,cool,hold,769,770,770,MN,Coon Rapids,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
349000,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-08-12 11:40:00 UTC,cool,hold,701,760,760,MN,Hastings,120,False,False,False,Gas
349001,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-08-11 13:05:00 UTC,cool,hold,698,760,760,MN,Hastings,120,False,False,False,Gas
349002,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-08-09 19:30:00 UTC,cool,hold,719,760,760,MN,Hastings,120,False,False,False,Gas
349003,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2017-08-09 14:45:00 UTC,cool,hold,723,760,760,MN,Hastings,120,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/MN/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/MN-day/2018-aug-day-MN.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,d22a64307f7e81bf648e0620dcb9378d43701370,2018-08-04 14:45:00 UTC,auto,hold,683,703,653,MN,Minneapolis,58,True,False,False,Gas
6,6ccdceee51c1f27e12211c1cd000f2fff42ca58f,2018-08-04 15:55:00 UTC,cool,auto,729,739,739,MN,Minneapolis,7,False,False,False,Gas
11,d22a64307f7e81bf648e0620dcb9378d43701370,2018-08-04 14:35:00 UTC,auto,hold,681,703,653,MN,Minneapolis,58,True,False,False,Gas
13,a05f7f839217122731438cb9f3e46b100cdf827f,2018-08-18 18:35:00 UTC,cool,hold,780,777,777,MN,minneapolis,78,False,False,False,Gas
16,d679d21d790c3c502d55b57da0627a249db06ba2,2018-08-13 14:30:00 UTC,cool,auto,788,787,750,MN,Minneapolis,78,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
847485,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2018-08-11 15:45:00 UTC,cool,auto,761,760,680,MN,Minneapolis,120,False,False,False,Gas
847486,3cf6a477c363180362031171fec1235767275824,2018-08-08 16:30:00 UTC,auto,hold,756,760,650,MN,St Paul,120,False,False,False,Gas
847487,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2018-08-15 13:20:00 UTC,cool,auto,754,760,740,MN,Hastings,120,False,False,False,Gas
847488,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2018-08-16 12:40:00 UTC,cool,auto,751,760,740,MN,Hastings,120,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/MN/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/MN-day/2019-aug-day-MN.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6cb6f8cc3b20aaf02c3c9f4c0eb4341991952faf,2019-08-04 13:35:00 UTC,cool,hold,703,665,665,MN,Robbinsdale,76,False,False,False,Gas
2,6cb6f8cc3b20aaf02c3c9f4c0eb4341991952faf,2019-08-04 13:25:00 UTC,cool,hold,703,665,665,MN,Robbinsdale,76,False,False,False,Gas
3,7fa25f9af0c28ae144cce853ad27d85e4c193f78,2019-08-18 12:30:00 UTC,cool,hold,750,810,790,MN,Luverne,58,False,False,False,Gas
4,94bbab10b4e5a9227094cabb2106590c62f427b9,2019-08-25 15:40:00 UTC,cool,hold,703,709,709,MN,St. Cloud,26,False,False,False,Gas
5,6aa32b8ff0beefbdd52db89fcc70150a0c140b98,2019-08-01 12:10:00 UTC,cool,auto,749,810,790,MN,Minneapolis,56,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1008712,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2019-08-07 19:30:00 UTC,cool,hold,761,760,730,MN,St Paul,120,False,False,False,Gas
1008713,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2019-08-04 11:50:00 UTC,cool,hold,761,760,730,MN,St Paul,120,False,False,False,Gas
1008714,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2019-08-07 19:55:00 UTC,cool,hold,761,760,730,MN,St Paul,120,False,False,False,Gas
1008715,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2019-08-02 14:25:00 UTC,cool,hold,762,760,730,MN,St Paul,120,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/MN/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/MN-day/2020-aug-day-MN.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a05f7f839217122731438cb9f3e46b100cdf827f,2020-08-16 17:15:00 UTC,cool,hold,749,810,750,MN,minneapolis,78,False,False,False,Gas
1,da2c61dbb7eb6cf11955c79efd03d901f39954e3,2020-08-26 18:50:00 UTC,cool,hold,753,755,755,MN,Apple Valley,27,False,False,False,Gas
2,0c8baa2d333bd02d5d50629503bb495a402d95b2,2020-08-25 12:35:00 UTC,cool,hold,776,775,775,MN,Bloomington,59,False,False,False,Gas
3,be29fb3892489608c5697898cdcb17fc796e764a,2020-08-10 17:00:00 UTC,cool,hold,713,717,677,MN,Woodbury,27,False,False,False,Gas
4,3f634f2d32e3794678c0e0a9bd9b397b284dd5af,2020-08-24 17:10:00 UTC,cool,hold,748,742,702,MN,Arden Hills,46,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1049035,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2020-08-11 19:10:00 UTC,cool,hold,729,760,760,MN,Hastings,120,False,False,False,Gas
1049036,3cf6a477c363180362031171fec1235767275824,2020-08-10 19:10:00 UTC,cool,hold,709,760,760,MN,St Paul,120,False,False,False,Gas
1049037,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2020-08-10 18:40:00 UTC,cool,hold,703,760,760,MN,Hastings,120,False,False,False,Gas
1049038,fc37f548ab83d8685b412b6893a5a0e0ba2f3d5f,2020-08-10 14:55:00 UTC,cool,hold,690,760,760,MN,Hastings,120,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/MN/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MN/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MN_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MN/aug/" + file)
    MN_aug = pd.concat([MN_aug, df])
    
MN_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,003694c41cad5e9b6fd22ab076ac82741f649155,aug,2017,cool,auto,Cottage Grove,718.693989,720.357923,679.530055,0.0,False,False,False
1,003694c41cad5e9b6fd22ab076ac82741f649155,aug,2017,cool,hold,Cottage Grove,715.220820,716.517350,714.372240,0.0,False,False,False
2,01cbc83700158311cb3594824a8c3db5ee300914,aug,2017,auto,hold,Cottage Grove,694.520833,750.229167,695.375000,40.0,False,False,False
3,01cbc83700158311cb3594824a8c3db5ee300914,aug,2017,cool,hold,Cottage Grove,702.419214,713.956332,686.441048,40.0,False,False,False
4,01e1619793696fb693b082ea3d26b964d4d2c5ff,aug,2017,cool,auto,Minneapolis,704.666667,720.222222,739.777778,55.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1731,fe9c6ace8431413b3c3d2d4a8d17b24ae6074d16,aug,2020,cool,hold,Minneapolis,713.636364,724.370629,724.370629,0.0,False,False,False
1732,ff1241a71824ad577de19d1aa0d46b50646e3b60,aug,2020,auto,auto,Minneapolis,748.414097,751.977974,623.171806,0.0,False,False,False
1733,ff1241a71824ad577de19d1aa0d46b50646e3b60,aug,2020,auto,hold,Minneapolis,741.774374,743.384386,625.172208,0.0,False,False,False
1734,ff72a50c09967eb4ccf4d5cdb2d9421bc8202aa7,aug,2020,cool,auto,Saint Paul,740.833333,740.000000,720.000000,70.0,False,False,False


In [160]:
MN_aug.to_csv("Scraper_Output/State_Month_Day/MN/MN_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/MN-day/2017-dec-day-MN.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,01112bda59dc47b1ca327145792441aef251738c,2017-12-22 18:40:00 UTC,auto,auto,678,724,674,MN,Saint Paul,115,False,False,False,Gas
1,9a116f845822dbefdbdbb146ecf2e912ae8663f4,2017-12-15 13:50:00 UTC,heat,hold,707,716,716,MN,Minneapolis,110,False,False,False,Gas
2,01112bda59dc47b1ca327145792441aef251738c,2017-12-26 15:15:00 UTC,auto,auto,674,724,674,MN,Saint Paul,115,False,False,False,Gas
3,01112bda59dc47b1ca327145792441aef251738c,2017-12-24 12:40:00 UTC,auto,auto,673,724,674,MN,Saint Paul,115,False,False,False,Gas
4,01112bda59dc47b1ca327145792441aef251738c,2017-12-28 18:40:00 UTC,auto,auto,675,724,674,MN,Saint Paul,115,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
603561,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2017-12-29 18:40:00 UTC,heat,hold,742,750,750,MN,St Paul,120,False,False,False,Gas
603562,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2017-12-06 15:10:00 UTC,heat,hold,749,750,750,MN,St Paul,120,False,False,False,Gas
603563,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2017-12-28 18:45:00 UTC,heat,hold,749,750,750,MN,St Paul,120,False,False,False,Gas
603564,a08916b21cf1f066ad2771ec769f4cb53b63abb5,2017-12-28 17:10:00 UTC,heat,hold,742,750,750,MN,St Paul,120,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/MN/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/MN-day/2018-dec-day-MN.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,be75e2dfe9f47bbe7bebfaf35d69bef780e8e771,2018-12-05 15:50:00 UTC,heat,hold,678,679,679,MN,Richfield,69,True,False,False,Gas
1,1e38a1c91900e05849ee21fe9cad3b2db2211e56,2018-12-22 19:25:00 UTC,heat,hold,617,640,610,MN,Plymouth,26,False,False,False,Gas
2,4c30637e96a38c7482181a46348396e4f78689d5,2018-12-26 14:55:00 UTC,heat,auto,699,722,688,MN,Minneapolis,110,False,False,False,Gas
3,1e38a1c91900e05849ee21fe9cad3b2db2211e56,2018-12-09 17:05:00 UTC,heat,hold,649,640,640,MN,Plymouth,26,False,False,False,Gas
5,d22a64307f7e81bf648e0620dcb9378d43701370,2018-12-10 18:25:00 UTC,heat,auto,675,655,680,MN,Minneapolis,58,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
898996,34d7823bddf5613a67e4627f74910aaa31120b10,2018-12-26 17:30:00 UTC,heat,auto,642,760,650,MN,Saint Paul,120,False,False,False,Gas
898997,34d7823bddf5613a67e4627f74910aaa31120b10,2018-12-07 17:15:00 UTC,heat,auto,627,760,650,MN,Saint Paul,120,False,False,False,Gas
898998,34d7823bddf5613a67e4627f74910aaa31120b10,2018-12-07 17:20:00 UTC,heat,auto,634,760,650,MN,Saint Paul,120,False,False,False,Gas
898999,34d7823bddf5613a67e4627f74910aaa31120b10,2018-12-12 19:45:00 UTC,heat,auto,649,760,650,MN,Saint Paul,120,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/MN/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/MN-day/2019-dec-day-MN.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3b8cf6054707a947c74a64834cfb1f130f6fc733,2019-12-27 15:30:00 UTC,heat,hold,654,651,620,MN,Hutchinson,69,False,False,False,Gas
1,3f634f2d32e3794678c0e0a9bd9b397b284dd5af,2019-12-25 14:55:00 UTC,heat,hold,682,722,690,MN,Arden Hills,46,False,False,False,Gas
3,5901861da900925f39dedb164e38956f3f7c96f6,2019-12-01 14:15:00 UTC,heat,hold,651,675,675,MN,Minneapolis,110,False,False,False,Gas
5,3b8cf6054707a947c74a64834cfb1f130f6fc733,2019-12-27 15:10:00 UTC,heat,hold,658,651,620,MN,Hutchinson,69,False,False,False,Gas
6,d4956b154169cbb3b3378a4ca8827aefcdfed777,2019-12-04 12:35:00 UTC,heat,hold,672,675,675,MN,Saint Paul,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1077214,34d7823bddf5613a67e4627f74910aaa31120b10,2019-12-01 18:00:00 UTC,heat,auto,700,760,630,MN,Saint Paul,120,False,False,False,Gas
1077215,6689ccb1a4faed0a3efb38dd184ae2d3bdd15362,2019-12-14 17:25:00 UTC,heat,auto,725,760,750,MN,Minneapolis,120,False,False,False,Gas
1077216,34d7823bddf5613a67e4627f74910aaa31120b10,2019-12-01 18:40:00 UTC,heat,auto,690,760,630,MN,Saint Paul,120,False,False,False,Gas
1077217,34d7823bddf5613a67e4627f74910aaa31120b10,2019-12-01 18:55:00 UTC,heat,auto,688,760,630,MN,Saint Paul,120,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/MN/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/MN-day/2020-dec-day-MN.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3f634f2d32e3794678c0e0a9bd9b397b284dd5af,2020-12-04 19:20:00 UTC,heat,hold,701,702,702,MN,Arden Hills,46,False,False,False,Gas
1,3f634f2d32e3794678c0e0a9bd9b397b284dd5af,2020-12-09 19:10:00 UTC,heat,hold,698,702,702,MN,Arden Hills,46,False,False,False,Gas
2,0c8baa2d333bd02d5d50629503bb495a402d95b2,2020-12-29 15:35:00 UTC,auto,hold,721,775,725,MN,Bloomington,59,False,False,False,Gas
3,69a1c92947e81a273d17266d9dbed206b1d3335d,2020-12-28 17:20:00 UTC,heat,hold,698,721,721,MN,New Brighton,59,False,False,False,Gas
4,be29fb3892489608c5697898cdcb17fc796e764a,2020-12-23 19:55:00 UTC,heat,hold,693,696,696,MN,Woodbury,27,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
900568,76b9a00be648b09df094868f1db636902c346ebd,2020-12-28 19:15:00 UTC,heat,hold,721,720,720,MN,Minneapolis,120,False,False,False,Gas
900569,3f46872d3ced214755fb86ed41c90b4e6e3c577d,2020-12-15 18:15:00 UTC,heat,auto,716,720,720,MN,Bayport,120,False,False,False,Gas
900570,3f46872d3ced214755fb86ed41c90b4e6e3c577d,2020-12-09 18:10:00 UTC,heat,auto,723,730,730,MN,Bayport,120,False,False,False,Gas
900571,3f46872d3ced214755fb86ed41c90b4e6e3c577d,2020-12-08 16:15:00 UTC,heat,auto,725,730,730,MN,Bayport,120,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/MN/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MN/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MN_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MN/dec/" + file)
    MN_dec = pd.concat([MN_dec, df])
    
MN_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,003694c41cad5e9b6fd22ab076ac82741f649155,dec,2017,heat,auto,Cottage Grove,689.262376,720.108911,689.863861,0.0,False,False,False
1,003694c41cad5e9b6fd22ab076ac82741f649155,dec,2017,heat,hold,Cottage Grove,688.592466,693.068493,679.147260,0.0,False,False,False
2,01112bda59dc47b1ca327145792441aef251738c,dec,2017,auto,auto,Saint Paul,674.594444,731.000000,674.750000,115.0,False,False,False
3,01b76f222272ad708ad27deccbd0612d4e1664d0,dec,2017,heat,auto,Rosemount,697.342857,719.371429,709.314286,10.0,False,False,False
4,01cbc83700158311cb3594824a8c3db5ee300914,dec,2017,heat,auto,Cottage Grove,678.525773,686.948454,685.494845,40.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1543,fe9a1858e649bee93e9a796cdeb7dd1aef612de9,dec,2020,heat,hold,Dayton,716.000000,725.000000,725.000000,40.0,False,False,False
1544,fe9c6ace8431413b3c3d2d4a8d17b24ae6074d16,dec,2020,heat,hold,Minneapolis,655.210158,669.493870,669.493870,0.0,False,False,False
1545,ff1241a71824ad577de19d1aa0d46b50646e3b60,dec,2020,auto,auto,Minneapolis,684.094340,743.271226,687.825472,0.0,False,False,False
1546,ff1241a71824ad577de19d1aa0d46b50646e3b60,dec,2020,auto,hold,Minneapolis,671.748299,738.095238,675.152575,0.0,False,False,False


In [187]:
MN_dec.to_csv("Scraper_Output/State_Month_Day/MN/MN_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/MN/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MN_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/MN/" + file)
    MN_all = pd.concat([MN_all, df])
    
MN_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,003694c41cad5e9b6fd22ab076ac82741f649155,aug,2017,cool,auto,Cottage Grove,718.693989,720.357923,679.530055,0.0,False,False,False
1,003694c41cad5e9b6fd22ab076ac82741f649155,aug,2017,cool,hold,Cottage Grove,715.220820,716.517350,714.372240,0.0,False,False,False
2,01cbc83700158311cb3594824a8c3db5ee300914,aug,2017,auto,hold,Cottage Grove,694.520833,750.229167,695.375000,40.0,False,False,False
3,01cbc83700158311cb3594824a8c3db5ee300914,aug,2017,cool,hold,Cottage Grove,702.419214,713.956332,686.441048,40.0,False,False,False
4,01e1619793696fb693b082ea3d26b964d4d2c5ff,aug,2017,cool,auto,Minneapolis,704.666667,720.222222,739.777778,55.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7487,fe7636f125dc072104d039d7f9c7e7db881738e0,jun,2021,cool,hold,Wayzata,742.021563,751.830189,751.789757,5.0,True,False,False
7488,fe9c6ace8431413b3c3d2d4a8d17b24ae6074d16,jun,2021,cool,hold,Minneapolis,703.413534,711.992481,711.992481,0.0,False,False,False
7489,ff1241a71824ad577de19d1aa0d46b50646e3b60,jun,2021,auto,hold,Minneapolis,707.571665,716.311516,629.106877,0.0,False,False,False
7490,ff72a50c09967eb4ccf4d5cdb2d9421bc8202aa7,jun,2021,auto,hold,Saint Paul,725.694010,727.915365,668.835938,70.0,False,False,False


In [190]:
MN_all.to_csv("Scraper_Output/State_Month_Day/MN_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mMNe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['MN']
Unique jan_2018: ['MN']
Unique jan_2019: ['MN']
Unique jan_2020: ['MN']
Unique jan_2021: ['MN']
Unique feb_2017: ['MN']
Unique feb_2018: ['MN']
Unique feb_2019: ['MN']
Unique feb_2020: ['MN']
Unique feb_2021: ['MN']
Unique jun_2017: ['MN']
Unique jun_2018: ['MN']
Unique jun_2019: ['MN']
Unique jun_2020: ['MN']
Unique jun_2021: ['MN']
Unique jul_2017: ['MN']
Unique jul_2018: ['MN']
Unique jul_2019: ['MN']
Unique jul_2020: ['MN']
Unique jul_2021: ['MN']
Unique aug_2017: ['MN']
Unique aug_2018: ['MN']
Unique aug_2019: ['MN']
Unique aug_2020: ['MN']
Unique dec_2017: ['MN']
Unique dec_2018: ['MN']
Unique dec_2019: ['MN']
Unique dec_2020: ['MN']
