# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/MS-day/2017-jan-day-MS.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,4fe01e07e4cc830b6998642dd0327a85d13b615a,2017-01-28 14:10:00 UTC,heat,hold,707,704,704,MS,Jackson,20,False,False,False,Gas
1,4fe01e07e4cc830b6998642dd0327a85d13b615a,2017-01-08 19:50:00 UTC,heat,hold,734,738,738,MS,Jackson,20,False,False,False,Gas
2,6a4e92ba22307727b97e8d8deef28f5373f93ade,2017-01-29 14:55:00 UTC,auto,auto,674,737,686,MS,Oxford,35,True,False,True,Electric
3,e46f291ea1b93a73e8c0dbfd7a0ac8c1fd1f0ad7,2017-01-15 18:35:00 UTC,auto,auto,686,685,635,MS,Brandon,25,False,False,False,Gas
4,7e94fec7feacae0a160256474e0b2b5b4fc73598,2017-01-13 16:50:00 UTC,auto,auto,703,754,704,MS,Olive Branch,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63484,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2017-01-29 12:50:00 UTC,heat,auto,761,760,760,MS,Mooreville,10,False,False,False,Gas
63485,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2017-01-06 15:35:00 UTC,heat,auto,761,760,760,MS,Mooreville,10,False,False,False,Gas
63486,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2017-01-24 17:45:00 UTC,heat,auto,760,760,760,MS,Mooreville,10,False,False,False,Gas
63487,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2017-01-10 12:50:00 UTC,heat,auto,759,760,760,MS,Mooreville,10,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
01194744830da7bbc9603d51735c58faef66f72e,Jan,2017,cool,auto,Ocean Springs,704.883117,737.571429,683.116883,0.0,False,False,True
01194744830da7bbc9603d51735c58faef66f72e,Jan,2017,cool,hold,Ocean Springs,721.615385,732.977667,732.977667,0.0,False,False,True
01194744830da7bbc9603d51735c58faef66f72e,Jan,2017,heat,auto,Ocean Springs,692.216667,708.183333,705.283333,0.0,False,False,True
01194744830da7bbc9603d51735c58faef66f72e,Jan,2017,heat,hold,Ocean Springs,714.270330,691.597802,691.597802,0.0,False,False,True
024d1624a424d57f4cdc19a5fa9aa6d90739ef33,Jan,2017,auto,hold,Horn Lake,712.636364,755.000000,695.000000,26.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...
fe9bc276b524605ee152e4051285e4e080e77a3f,Jan,2017,heat,auto,Tupelo,700.236842,714.005263,702.510526,75.0,False,False,False
fe9bc276b524605ee152e4051285e4e080e77a3f,Jan,2017,heat,hold,Tupelo,693.190871,688.796680,688.796680,75.0,False,False,False
ffe938566049e7ad2eb8c1726ec2215bfda979d7,Jan,2017,cool,auto,Mooreville,755.084906,744.207547,747.547170,10.0,False,False,False
ffe938566049e7ad2eb8c1726ec2215bfda979d7,Jan,2017,heat,auto,Mooreville,756.862367,748.020823,748.158964,10.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/MS/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/MS-day/2018-jan-day-MS.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,273ce90afa5e8188e6ff7544f58ac1484611ee8f,2018-01-12 10:05:00 UTC,heat,hold,700,699,699,MS,Madison,10,False,False,False,Gas
1,83a4d060a27b70bfeb5a64d07cdc779aa2201fca,2018-01-10 15:20:00 UTC,heat,hold,711,716,716,MS,Brandon,10,False,False,False,Gas
3,273ce90afa5e8188e6ff7544f58ac1484611ee8f,2018-01-22 17:50:00 UTC,heat,hold,684,684,684,MS,Madison,10,False,False,False,Gas
4,9b718599ed03efd98e2b15948c86641a7cd4b249,2018-01-07 16:30:00 UTC,heat,hold,717,745,745,MS,Madison,0,False,False,False,Gas
5,83a4d060a27b70bfeb5a64d07cdc779aa2201fca,2018-01-10 17:45:00 UTC,heat,hold,712,716,716,MS,Brandon,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
213849,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-01-20 15:15:00 UTC,heat,auto,766,760,760,MS,Mooreville,10,False,False,False,Gas
213850,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-01-29 19:30:00 UTC,heat,auto,759,760,760,MS,Mooreville,10,False,False,False,Gas
213851,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-01-12 18:35:00 UTC,heat,hold,773,760,760,MS,Mooreville,10,False,False,False,Gas
213852,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-01-12 18:15:00 UTC,heat,hold,778,760,760,MS,Mooreville,10,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/MS/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/MS-day/2019-jan-day-MS.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d23049697dc10f7bda2995dcd45a259452e331c5,2019-01-13 14:35:00 UTC,auto,hold,667,674,624,MS,Belden,20,False,False,False,Gas
1,d23049697dc10f7bda2995dcd45a259452e331c5,2019-01-12 12:55:00 UTC,auto,hold,617,668,618,MS,Belden,20,False,False,False,Gas
2,7a0b2f12434fc5c99328cd9d20d8d54b0cfe335a,2019-01-12 18:15:00 UTC,heat,hold,649,650,626,MS,Ecru,30,False,False,False,Gas
3,71fc691a191d2bbea3ea5b823d23477631bb8ea4,2019-01-20 14:35:00 UTC,heat,hold,658,676,643,MS,Ecru,30,False,False,False,Gas
4,d23049697dc10f7bda2995dcd45a259452e331c5,2019-01-12 14:45:00 UTC,auto,hold,655,668,618,MS,Belden,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
300815,f918d405d012b701c0cb10dd807539f0a731ceea,2019-01-27 13:05:00 UTC,heat,auto,743,740,740,MS,Pass Christian,10,False,False,True,Electric
300816,f918d405d012b701c0cb10dd807539f0a731ceea,2019-01-28 16:10:00 UTC,heat,hold,738,740,740,MS,Pass Christian,10,False,False,True,Electric
300817,f918d405d012b701c0cb10dd807539f0a731ceea,2019-01-27 13:25:00 UTC,heat,auto,736,740,740,MS,Pass Christian,10,False,False,True,Electric
300818,f918d405d012b701c0cb10dd807539f0a731ceea,2019-01-28 12:40:00 UTC,heat,hold,736,740,740,MS,Pass Christian,10,False,False,True,Electric


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/MS/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/MS-day/2020-jan-day-MS.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,96161eebbbcaaadc0c47b24f4815c16e9f7518ce,2020-01-27 13:45:00 UTC,auxHeatOnly,auto,677,699,662,MS,Kiln,18,True,False,True,Electric
3,ddc9cd51c44a01e89ea06266d7d641a4bd735de2,2020-01-05 19:45:00 UTC,auto,auto,648,744,677,MS,Booneville,60,False,False,True,Electric
4,16daf0ca4bfcc11cc2e4721db2c14c38559c0a92,2020-01-06 12:40:00 UTC,heat,hold,688,687,687,MS,Amory,10,True,False,True,Electric
6,f918d405d012b701c0cb10dd807539f0a731ceea,2020-01-22 18:05:00 UTC,heat,auto,715,743,717,MS,Pass Christian,10,False,False,True,Electric
7,f918d405d012b701c0cb10dd807539f0a731ceea,2020-01-22 13:25:00 UTC,heat,auto,712,743,717,MS,Pass Christian,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
317683,7cc2be730bbe938e66cd541effe371e022c894d3,2020-01-23 16:35:00 UTC,cool,hold,735,740,740,MS,Ocean Springs,0,False,False,True,Electric
317684,7cc2be730bbe938e66cd541effe371e022c894d3,2020-01-23 18:25:00 UTC,cool,hold,710,740,740,MS,Ocean Springs,0,False,False,True,Electric
317687,748c069371486cb31498d4fa5bec2a5949d72e83,2020-01-14 12:55:00 UTC,auto,hold,739,810,760,MS,Ocean Springs,19,False,False,False,Gas
317688,748c069371486cb31498d4fa5bec2a5949d72e83,2020-01-14 12:35:00 UTC,auto,hold,718,810,760,MS,Ocean Springs,19,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/MS/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/MS-day/2021-jan-day-MS.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,cd632477aa8e4b117fe3fefc978149512e38a1be,2021-01-10 17:50:00 UTC,auto,hold,683,742,682,MS,Clinton,10,False,False,False,Gas
1,1acb7c5c1a18c3a6baf8917710d9b20eab9e2ac4,2021-01-06 15:55:00 UTC,auto,hold,678,734,684,MS,Tupelo,20,False,False,False,Gas
2,e1d6f2c384d4e72dcc273a1d3d9625f298294878,2021-01-17 16:55:00 UTC,heat,hold,678,688,648,MS,Starkville,0,False,False,False,Gas
3,1acb7c5c1a18c3a6baf8917710d9b20eab9e2ac4,2021-01-07 17:40:00 UTC,auto,hold,739,794,744,MS,Tupelo,20,False,False,False,Gas
5,cd632477aa8e4b117fe3fefc978149512e38a1be,2021-01-24 12:40:00 UTC,auto,hold,684,742,682,MS,Clinton,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192780,dc1e40c795a411c773c78d408555dc894f11c103,2021-01-11 15:30:00 UTC,heat,hold,696,760,760,MS,New Albany,10,False,False,False,Gas
192781,dc1e40c795a411c773c78d408555dc894f11c103,2021-01-11 16:10:00 UTC,heat,hold,710,760,760,MS,New Albany,10,False,False,False,Gas
192782,f0674498c898912150ff0fd0cff9c42ea053a96c,2021-01-10 18:25:00 UTC,heat,hold,757,760,760,MS,Madison,10,False,False,False,Gas
192783,f0674498c898912150ff0fd0cff9c42ea053a96c,2021-01-10 18:15:00 UTC,heat,hold,767,760,760,MS,Madison,10,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/MS/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MS/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MS_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MS/jan/" + file)
    MS_jan = pd.concat([MS_jan, df])
    
MS_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01194744830da7bbc9603d51735c58faef66f72e,Jan,2017,cool,auto,Ocean Springs,704.883117,737.571429,683.116883,0.0,False,False,True
1,01194744830da7bbc9603d51735c58faef66f72e,Jan,2017,cool,hold,Ocean Springs,721.615385,732.977667,732.977667,0.0,False,False,True
2,01194744830da7bbc9603d51735c58faef66f72e,Jan,2017,heat,auto,Ocean Springs,692.216667,708.183333,705.283333,0.0,False,False,True
3,01194744830da7bbc9603d51735c58faef66f72e,Jan,2017,heat,hold,Ocean Springs,714.270330,691.597802,691.597802,0.0,False,False,True
4,024d1624a424d57f4cdc19a5fa9aa6d90739ef33,Jan,2017,auto,hold,Horn Lake,712.636364,755.000000,695.000000,26.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
301,fdfe0112380a8ee36c3dfe16899298a72c886d49,Jan,2021,heat,hold,Jackson,676.365079,665.999142,665.999142,30.0,False,False,False
302,fe9bc276b524605ee152e4051285e4e080e77a3f,Jan,2021,cool,hold,Tupelo,673.536585,687.585366,687.585366,75.0,False,False,False
303,fe9bc276b524605ee152e4051285e4e080e77a3f,Jan,2021,heat,hold,Tupelo,714.558621,709.366897,708.497931,75.0,False,False,False
304,ff5551a641dc7446df1914428ded425dde839080,Jan,2021,heat,hold,Madison,654.030303,651.151515,650.484848,30.0,False,False,False


In [34]:
MS_jan.to_csv("Scraper_Output/State_Month_Day/MS/MS_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/MS-day/2017-feb-day-MS.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,83d9dc866298389693c96dd26cbe0bd6e3ad3725,2017-02-01 19:35:00 UTC,auto,hold,729,724,694,MS,Brandon,15,False,False,False,Gas
1,c46244df34259c2508f601fd1ab7e084acce2f78,2017-02-10 19:35:00 UTC,cool,hold,724,708,708,MS,Hernando,5,True,False,False,Gas
2,e2a5d9e8bff94f9fc2f5a8a8ed4dc57a80e0c7ef,2017-02-19 14:40:00 UTC,auto,hold,734,780,709,MS,Olive Branch,0,False,False,False,Gas
3,8bb37505af1f12286cb960fed0eb3177c49e121b,2017-02-25 16:45:00 UTC,heat,auto,690,721,671,MS,Brandon,10,False,False,False,Gas
5,0cc861d81926d70c036828385eb0e61b6e98dfaa,2017-02-09 12:50:00 UTC,auto,hold,746,834,762,MS,Madison,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63035,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2017-02-04 14:15:00 UTC,heat,auto,759,760,760,MS,Mooreville,10,False,False,False,Gas
63036,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2017-02-09 13:20:00 UTC,heat,auto,766,760,760,MS,Mooreville,10,False,False,False,Gas
63037,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2017-02-03 13:10:00 UTC,heat,auto,754,760,760,MS,Mooreville,10,False,False,False,Gas
63038,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2017-02-09 16:20:00 UTC,heat,auto,758,760,760,MS,Mooreville,10,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/MS/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/MS-day/2018-feb-day-MS.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d74d5111738daa0acc3caa06e4e99ce488c26d14,2018-02-27 15:20:00 UTC,cool,auto,704,778,646,MS,Vancleave,8,False,False,False,Gas
1,ad506c1a54f94de229685985f025564ac449d6a1,2018-02-04 15:00:00 UTC,heat,hold,721,718,718,MS,Madison,10,False,False,False,Gas
2,22bb4d85ac19f25ec081e494857ca94dc84f13c8,2018-02-03 14:05:00 UTC,heat,auto,644,800,800,MS,Tupelo,10,False,False,False,Gas
5,96161eebbbcaaadc0c47b24f4815c16e9f7518ce,2018-02-17 17:25:00 UTC,auto,auto,688,685,635,MS,Kiln,18,True,False,True,Electric
6,273ce90afa5e8188e6ff7544f58ac1484611ee8f,2018-02-02 08:40:00 UTC,heat,hold,665,666,666,MS,Madison,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
176451,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-02-13 19:25:00 UTC,heat,auto,764,760,760,MS,Mooreville,10,False,False,False,Gas
176452,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-02-13 15:05:00 UTC,heat,auto,759,760,760,MS,Mooreville,10,False,False,False,Gas
176453,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-02-13 17:15:00 UTC,heat,auto,759,760,760,MS,Mooreville,10,False,False,False,Gas
176454,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-02-13 16:15:00 UTC,heat,auto,758,760,760,MS,Mooreville,10,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/MS/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/MS-day/2019-feb-day-MS.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d23049697dc10f7bda2995dcd45a259452e331c5,2019-02-03 14:40:00 UTC,auto,hold,677,742,652,MS,Belden,20,False,False,False,Gas
1,d23049697dc10f7bda2995dcd45a259452e331c5,2019-02-23 18:20:00 UTC,auto,hold,668,675,625,MS,Belden,20,False,False,False,Gas
2,d23049697dc10f7bda2995dcd45a259452e331c5,2019-02-02 17:45:00 UTC,auto,hold,670,742,652,MS,Belden,20,False,False,False,Gas
3,d23049697dc10f7bda2995dcd45a259452e331c5,2019-02-03 12:55:00 UTC,auto,hold,658,742,652,MS,Belden,20,False,False,False,Gas
4,d23049697dc10f7bda2995dcd45a259452e331c5,2019-02-03 17:15:00 UTC,auto,hold,664,742,652,MS,Belden,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192407,e9457c34b2cd92265d34d254523d8cbc1f11f06c,2019-02-19 13:05:00 UTC,heat,auto,698,770,700,MS,Crystal Springs,45,False,False,False,Gas
192408,e9457c34b2cd92265d34d254523d8cbc1f11f06c,2019-02-15 13:00:00 UTC,heat,auto,706,770,700,MS,Crystal Springs,45,False,False,False,Gas
192409,e9457c34b2cd92265d34d254523d8cbc1f11f06c,2019-02-15 14:05:00 UTC,heat,auto,706,770,700,MS,Crystal Springs,45,False,False,False,Gas
192410,e9457c34b2cd92265d34d254523d8cbc1f11f06c,2019-02-17 16:45:00 UTC,heat,auto,697,770,700,MS,Crystal Springs,45,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/MS/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/MS-day/2020-feb-day-MS.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,18b69a558a6686ecef17b11bebcfa1a6c2379817,2020-02-06 18:30:00 UTC,auto,hold,687,752,702,MS,Amory,10,True,False,True,Electric
1,f918d405d012b701c0cb10dd807539f0a731ceea,2020-02-08 14:45:00 UTC,heat,auto,715,743,717,MS,Pass Christian,10,False,False,True,Electric
3,f918d405d012b701c0cb10dd807539f0a731ceea,2020-02-06 19:30:00 UTC,heat,auto,711,743,717,MS,Pass Christian,10,False,False,True,Electric
4,18b69a558a6686ecef17b11bebcfa1a6c2379817,2020-02-10 17:55:00 UTC,auto,hold,703,752,702,MS,Amory,10,True,False,True,Electric
5,f918d405d012b701c0cb10dd807539f0a731ceea,2020-02-05 17:15:00 UTC,heat,auto,716,743,717,MS,Pass Christian,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
285438,748c069371486cb31498d4fa5bec2a5949d72e83,2020-02-20 17:10:00 UTC,auto,hold,742,800,750,MS,Ocean Springs,19,False,False,False,Gas
285439,748c069371486cb31498d4fa5bec2a5949d72e83,2020-02-20 17:30:00 UTC,auto,hold,745,800,750,MS,Ocean Springs,19,False,False,False,Gas
285440,748c069371486cb31498d4fa5bec2a5949d72e83,2020-02-20 17:25:00 UTC,auto,hold,742,800,750,MS,Ocean Springs,19,False,False,False,Gas
285441,748c069371486cb31498d4fa5bec2a5949d72e83,2020-02-07 17:00:00 UTC,auto,hold,709,800,750,MS,Ocean Springs,19,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/MS/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/MS-day/2021-feb-day-MS.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8eb3b0d0f522fe3eaa94e40ced1bd19de9792e8c,2021-02-12 13:55:00 UTC,auto,hold,678,732,682,MS,Madison,10,False,False,False,Gas
1,2c6ba90605f8141dbf6c975dd5852d70d875f842,2021-02-18 14:15:00 UTC,heat,hold,691,696,696,MS,Tupelo,0,True,False,False,Gas
2,2c6ba90605f8141dbf6c975dd5852d70d875f842,2021-02-18 15:35:00 UTC,heat,hold,699,696,696,MS,Tupelo,0,True,False,False,Gas
3,bc23f35349d4bb4d810c89fa75667f22b5170e4f,2021-02-26 15:40:00 UTC,cool,hold,668,666,666,MS,Madison,5,False,False,False,Gas
4,79e8f5fa36fa0cb86d9cc37149ddcc769e701c59,2021-02-04 19:40:00 UTC,auto,hold,724,779,729,MS,Brandon,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
184996,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2021-02-08 19:15:00 UTC,heat,hold,764,760,760,MS,Mooreville,10,False,False,False,Gas
184997,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2021-02-08 14:20:00 UTC,heat,hold,747,760,760,MS,Mooreville,10,False,False,False,Gas
184998,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2021-02-08 18:30:00 UTC,heat,hold,754,760,760,MS,Mooreville,10,False,False,False,Gas
184999,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2021-02-08 14:30:00 UTC,heat,hold,762,760,760,MS,Mooreville,10,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/MS/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MS/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MS_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MS/feb/" + file)
    MS_feb = pd.concat([MS_feb, df])
    
MS_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01194744830da7bbc9603d51735c58faef66f72e,feb,2017,cool,auto,Ocean Springs,750.444444,760.000000,740.000000,0.0,False,False,True
1,01194744830da7bbc9603d51735c58faef66f72e,feb,2017,cool,hold,Ocean Springs,724.678068,735.730382,735.730382,0.0,False,False,True
2,01194744830da7bbc9603d51735c58faef66f72e,feb,2017,heat,auto,Ocean Springs,706.048000,710.456000,709.464000,0.0,False,False,True
3,01194744830da7bbc9603d51735c58faef66f72e,feb,2017,heat,hold,Ocean Springs,726.640401,711.283668,711.283668,0.0,False,False,True
4,024d1624a424d57f4cdc19a5fa9aa6d90739ef33,feb,2017,auto,auto,Horn Lake,702.869565,761.369565,711.206522,26.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
304,fe9bc276b524605ee152e4051285e4e080e77a3f,feb,2021,heat,hold,Tupelo,713.960854,711.400356,709.423488,75.0,False,False,False
305,ff5551a641dc7446df1914428ded425dde839080,feb,2021,cool,hold,Madison,643.541667,690.000000,690.000000,30.0,False,False,False
306,ff5551a641dc7446df1914428ded425dde839080,feb,2021,heat,hold,Madison,634.937500,650.000000,640.000000,30.0,False,False,False
307,ffe938566049e7ad2eb8c1726ec2215bfda979d7,feb,2021,cool,hold,Mooreville,786.000000,740.000000,740.000000,10.0,False,False,False


In [67]:
MS_feb.to_csv("Scraper_Output/State_Month_Day/MS/MS_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/MS-day/2017-jun-day-MS.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,77045d611c3cf60d028145a458d955bd64f29192,2017-06-28 11:35:00 UTC,cool,hold,734,735,735,MS,Biloxi,15,False,False,False,Gas
1,a8324dbb55ef5e62f81ebac499a34cad487b2be0,2017-06-10 11:35:00 UTC,cool,hold,718,719,719,MS,Gautier,0,False,False,False,Gas
2,22bb4d85ac19f25ec081e494857ca94dc84f13c8,2017-06-25 19:55:00 UTC,cool,auto,675,670,800,MS,Tupelo,10,False,False,False,Gas
3,2185b8c26725cbbe15a37357dfd1e210a118ff9a,2017-06-03 16:15:00 UTC,auto,hold,718,718,656,MS,Byhalia,15,False,False,False,Gas
6,22bb4d85ac19f25ec081e494857ca94dc84f13c8,2017-06-25 16:50:00 UTC,cool,auto,675,670,800,MS,Tupelo,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94626,be35204453ce52fb60900381376b731c0192106b,2017-06-21 13:10:00 UTC,cool,hold,759,760,760,MS,Meridian,0,True,False,True,Electric
94627,f0674498c898912150ff0fd0cff9c42ea053a96c,2017-06-18 15:05:00 UTC,cool,auto,793,790,760,MS,Madison,10,False,False,False,Gas
94628,f0674498c898912150ff0fd0cff9c42ea053a96c,2017-06-18 13:20:00 UTC,cool,auto,784,790,760,MS,Madison,10,False,False,False,Gas
94629,f0674498c898912150ff0fd0cff9c42ea053a96c,2017-06-18 11:50:00 UTC,cool,auto,794,790,760,MS,Madison,10,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/MS/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/MS-day/2018-jun-day-MS.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
4,cb0a84fc29ae0cb0bff3a61328f2c40d1a61df53,2018-06-06 19:20:00 UTC,cool,hold,740,720,718,MS,Tupelo,20,True,False,False,Gas
5,a60a0a3e848c4d4eb8396c7cf700b5c1a9093412,2018-06-07 11:35:00 UTC,auto,hold,688,685,635,MS,Newton,0,False,False,True,Electric
6,a60a0a3e848c4d4eb8396c7cf700b5c1a9093412,2018-06-18 15:45:00 UTC,auto,hold,681,685,635,MS,Newton,0,False,False,True,Electric
10,1acb7c5c1a18c3a6baf8917710d9b20eab9e2ac4,2018-06-13 16:05:00 UTC,auto,hold,725,723,663,MS,Tupelo,20,False,False,False,Gas
11,ea10cf5c507d618900db08565bee4a242fd87134,2018-06-10 18:25:00 UTC,cool,hold,775,775,775,MS,Madison,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
254350,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-06-27 16:50:00 UTC,cool,hold,762,760,760,MS,Mooreville,10,False,False,False,Gas
254351,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-06-06 19:55:00 UTC,cool,hold,763,760,760,MS,Mooreville,10,False,False,False,Gas
254352,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-06-27 18:05:00 UTC,cool,hold,767,760,760,MS,Mooreville,10,False,False,False,Gas
254353,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-06-27 17:00:00 UTC,cool,hold,766,760,760,MS,Mooreville,10,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/MS/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/MS-day/2019-jun-day-MS.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,6958304af4064a7f1e351b21c0ee3d85f30d02cb,2019-06-07 13:05:00 UTC,auto,auto,670,703,673,MS,Oxford,5,False,False,False,Gas
2,eaee34a616466326eadd77bee921f8d969137b7d,2019-06-07 18:30:00 UTC,auto,auto,703,703,653,MS,Southaven,20,True,False,False,Gas
3,b97e267eaef40bd27c8fff6a536a0deb3ea69821,2019-06-24 18:10:00 UTC,cool,auto,685,680,669,MS,Purvis,10,True,False,True,Electric
4,a599456ea9be8ddee6ee185ef3f0e7527c656bf5,2019-06-23 18:35:00 UTC,cool,auto,723,707,707,MS,Moss Point,9,False,False,False,Gas
6,eaee34a616466326eadd77bee921f8d969137b7d,2019-06-03 19:05:00 UTC,auto,auto,720,703,653,MS,Southaven,20,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
348838,e59b56ae8d1b4b366f01f1ea46606f2c366a775d,2019-06-22 09:20:00 UTC,cool,auto,701,700,700,MS,Starkville,10,False,False,False,Gas
348839,e59b56ae8d1b4b366f01f1ea46606f2c366a775d,2019-06-22 07:30:00 UTC,cool,auto,700,700,700,MS,Starkville,10,False,False,False,Gas
348840,efcfa7e140b4d02ab30082b745226397c0bf4895,2019-06-15 11:45:00 UTC,cool,auto,703,700,700,MS,Horn Lake,30,False,False,False,Gas
348841,efcfa7e140b4d02ab30082b745226397c0bf4895,2019-06-13 12:25:00 UTC,cool,auto,704,700,700,MS,Horn Lake,30,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/MS/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/MS-day/2020-jun-day-MS.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,98903dec3dd9e9c36b342c55aac6231d1f256211,2020-06-20 14:10:00 UTC,cool,auto,702,716,684,MS,Olive Branch,30,False,False,False,Gas
1,3e4287b86d0da46f17303aedd61e8f5399e30cc3,2020-06-20 14:25:00 UTC,cool,hold,751,775,775,MS,Madison,19,True,False,False,Gas
2,575c2720bda08f9c686a150357caf94d2a8447e7,2020-06-22 10:50:00 UTC,cool,hold,700,718,718,MS,Ovett,9,False,False,False,Gas
3,16daf0ca4bfcc11cc2e4721db2c14c38559c0a92,2020-06-28 19:15:00 UTC,cool,hold,731,727,727,MS,Amory,10,True,False,True,Electric
4,1621d17dd769a418287ee8a1702e662d6d7fe4a1,2020-06-13 18:15:00 UTC,cool,hold,687,685,625,MS,Oxford,30,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
332072,641bbf7373a2e07e201cf1f4fd3c5e6abd37de54,2020-06-04 15:35:00 UTC,auto,hold,693,695,665,MS,Oxford,10,False,False,False,Gas
332073,641bbf7373a2e07e201cf1f4fd3c5e6abd37de54,2020-06-04 19:50:00 UTC,auto,hold,686,695,665,MS,Oxford,10,False,False,False,Gas
332074,641bbf7373a2e07e201cf1f4fd3c5e6abd37de54,2020-06-23 18:40:00 UTC,auto,hold,701,695,665,MS,Oxford,10,False,False,False,Gas
332075,641bbf7373a2e07e201cf1f4fd3c5e6abd37de54,2020-06-15 14:30:00 UTC,auto,hold,701,695,665,MS,Oxford,10,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/MS/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/MS-day/2021-jun-day-MS.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,eb6fcfbb6678f3fbff59c8951af2325210523719,2021-06-17 19:20:00 UTC,cool,hold,746,749,749,MS,Columbus,39,False,False,True,Electric
1,a27e9562d509962fa19524e68a06dec21f2503fb,2021-06-04 15:20:00 UTC,cool,hold,717,722,722,MS,Oxford,30,False,False,False,Gas
2,8bb37505af1f12286cb960fed0eb3177c49e121b,2021-06-01 19:00:00 UTC,auto,hold,708,702,652,MS,Brandon,10,False,False,False,Gas
3,6f4cf8588e5f3242d3ccd0fd933c7f4b6764c5b1,2021-06-06 18:30:00 UTC,cool,hold,740,735,735,MS,Moss Point,20,False,False,False,Gas
4,eb6fcfbb6678f3fbff59c8951af2325210523719,2021-06-17 18:00:00 UTC,cool,hold,751,749,749,MS,Columbus,39,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
205399,f0674498c898912150ff0fd0cff9c42ea053a96c,2021-06-05 14:25:00 UTC,cool,hold,763,760,760,MS,Madison,10,False,False,False,Gas
205400,f0674498c898912150ff0fd0cff9c42ea053a96c,2021-06-06 09:50:00 UTC,cool,hold,751,760,760,MS,Madison,10,False,False,False,Gas
205401,f0674498c898912150ff0fd0cff9c42ea053a96c,2021-06-06 09:30:00 UTC,cool,hold,754,760,760,MS,Madison,10,False,False,False,Gas
205402,f0674498c898912150ff0fd0cff9c42ea053a96c,2021-06-06 07:00:00 UTC,cool,hold,752,760,760,MS,Madison,10,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/MS/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MS/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MS_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MS/jun/" + file)
    MS_jun = pd.concat([MS_jun, df])
    
MS_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01194744830da7bbc9603d51735c58faef66f72e,jun,2017,cool,auto,Ocean Springs,738.008032,736.922691,676.021084,0.0,False,False,True
1,01194744830da7bbc9603d51735c58faef66f72e,jun,2017,cool,hold,Ocean Springs,735.143590,736.944872,736.944872,0.0,False,False,True
2,022c67a9cc7538740068f7248330a614c0ddb96f,jun,2017,auto,auto,Biloxi,708.250000,719.375000,669.375000,5.0,True,False,True
3,022c67a9cc7538740068f7248330a614c0ddb96f,jun,2017,cool,auto,Biloxi,710.983607,714.721311,684.721311,5.0,True,False,True
4,0442aded7dc2237382d8074027fe1db6823268d0,jun,2017,auto,hold,Oxford,697.459306,698.123142,633.970276,26.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
232,fdfe0112380a8ee36c3dfe16899298a72c886d49,jun,2021,cool,hold,Jackson,731.842051,730.814017,730.814017,30.0,False,False,False
233,fe9bc276b524605ee152e4051285e4e080e77a3f,jun,2021,cool,hold,Tupelo,709.118644,705.403955,705.403955,75.0,False,False,False
234,ff5551a641dc7446df1914428ded425dde839080,jun,2021,cool,hold,Madison,708.410112,716.943820,715.078652,30.0,False,False,False
235,ffe938566049e7ad2eb8c1726ec2215bfda979d7,jun,2021,cool,hold,Mooreville,733.231047,731.041742,731.041742,10.0,False,False,False


In [100]:
MS_jun.to_csv("Scraper_Output/State_Month_Day/MS/MS_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/MS-day/2017-jul-day-MS.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,53a9f3f8f5ec8a83916689c53046deb93598bb85,2017-07-15 12:25:00 UTC,cool,hold,742,745,745,MS,Madison,15,False,False,False,Gas
1,d23049697dc10f7bda2995dcd45a259452e331c5,2017-07-16 17:55:00 UTC,cool,hold,739,735,735,MS,Belden,20,False,False,False,Gas
2,d23049697dc10f7bda2995dcd45a259452e331c5,2017-07-19 11:45:00 UTC,cool,hold,730,735,735,MS,Belden,20,False,False,False,Gas
3,782aa187364048e83a195c76583bda416532d39d,2017-07-19 15:20:00 UTC,auto,hold,731,765,735,MS,Madison,25,False,False,False,Gas
4,a8324dbb55ef5e62f81ebac499a34cad487b2be0,2017-07-04 17:25:00 UTC,cool,hold,723,718,718,MS,Gautier,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
103968,f0674498c898912150ff0fd0cff9c42ea053a96c,2017-07-09 19:40:00 UTC,cool,auto,796,790,760,MS,Madison,10,False,False,False,Gas
103969,f0674498c898912150ff0fd0cff9c42ea053a96c,2017-07-09 14:45:00 UTC,cool,auto,787,790,760,MS,Madison,10,False,False,False,Gas
103970,f0674498c898912150ff0fd0cff9c42ea053a96c,2017-07-09 12:15:00 UTC,cool,auto,776,790,760,MS,Madison,10,False,False,False,Gas
103971,f0674498c898912150ff0fd0cff9c42ea053a96c,2017-07-09 17:30:00 UTC,cool,auto,791,790,760,MS,Madison,10,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/MS/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/MS-day/2018-jul-day-MS.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b5173564363b407ac06c9701134b97a7c42f3d03,2018-07-13 19:30:00 UTC,cool,hold,742,692,692,MS,Tupelo,48,True,False,False,Gas
2,d2da7494cf18bad8fe144eccffcce8d1ff3d9817,2018-07-18 11:50:00 UTC,cool,hold,743,745,745,MS,Flowood,0,False,False,False,Gas
3,79e8f5fa36fa0cb86d9cc37149ddcc769e701c59,2018-07-18 18:30:00 UTC,cool,hold,750,752,752,MS,Brandon,30,False,False,False,Gas
4,f918d405d012b701c0cb10dd807539f0a731ceea,2018-07-22 15:25:00 UTC,cool,hold,779,762,762,MS,Pass Christian,10,False,False,True,Electric
5,d2da7494cf18bad8fe144eccffcce8d1ff3d9817,2018-07-17 14:05:00 UTC,cool,hold,754,765,765,MS,Flowood,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
264915,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-07-27 10:10:00 UTC,cool,auto,751,760,760,MS,Mooreville,10,False,False,False,Gas
264916,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-07-25 13:55:00 UTC,cool,auto,763,760,760,MS,Mooreville,10,False,False,False,Gas
264917,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-07-27 09:20:00 UTC,cool,auto,765,760,760,MS,Mooreville,10,False,False,False,Gas
264918,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-07-27 10:40:00 UTC,cool,auto,764,760,760,MS,Mooreville,10,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/MS/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/MS-day/2019-jul-day-MS.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
3,b97e267eaef40bd27c8fff6a536a0deb3ea69821,2019-07-22 15:15:00 UTC,cool,auto,711,719,679,MS,Purvis,10,True,False,True,Electric
4,1acb7c5c1a18c3a6baf8917710d9b20eab9e2ac4,2019-07-29 18:25:00 UTC,auto,hold,727,703,653,MS,Tupelo,20,False,False,False,Gas
6,eaee34a616466326eadd77bee921f8d969137b7d,2019-07-12 18:45:00 UTC,auto,auto,696,693,643,MS,Southaven,20,True,False,False,Gas
7,3c67e64a200d91a6a19384106382ec07affa3475,2019-07-15 19:50:00 UTC,auto,hold,730,717,667,MS,Starkville,5,False,False,False,Gas
10,e61f7de1790ed6e52ac20dcde3492c8b9d485a1a,2019-07-14 17:15:00 UTC,auto,hold,730,730,673,MS,Long Beach,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
358022,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2019-07-14 16:30:00 UTC,cool,auto,765,760,760,MS,Mooreville,10,False,False,False,Gas
358023,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2019-07-14 07:10:00 UTC,cool,auto,764,760,760,MS,Mooreville,10,False,False,False,Gas
358024,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2019-07-14 16:05:00 UTC,cool,auto,763,760,760,MS,Mooreville,10,False,False,False,Gas
358025,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2019-07-14 08:40:00 UTC,cool,auto,766,760,760,MS,Mooreville,10,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/MS/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/MS-day/2020-jul-day-MS.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e7fa8035e7ddf75bc0bf5f4a597278afa95d29f3,2020-07-06 12:30:00 UTC,cool,hold,720,719,719,MS,Gulfport,20,False,False,True,Electric
1,e7fa8035e7ddf75bc0bf5f4a597278afa95d29f3,2020-07-02 13:40:00 UTC,cool,hold,723,719,719,MS,Gulfport,20,False,False,True,Electric
2,eb6fcfbb6678f3fbff59c8951af2325210523719,2020-07-27 12:15:00 UTC,cool,hold,737,739,739,MS,Columbus,39,False,False,True,Electric
3,b97e267eaef40bd27c8fff6a536a0deb3ea69821,2020-07-21 13:35:00 UTC,cool,hold,674,677,677,MS,Purvis,10,True,False,True,Electric
5,d24c955854398897407bb1cc7e8470981db392c1,2020-07-26 16:45:00 UTC,auto,hold,698,697,647,MS,Oxford,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
327954,fdfe0112380a8ee36c3dfe16899298a72c886d49,2020-07-02 18:40:00 UTC,cool,hold,759,760,760,MS,Jackson,30,False,False,False,Gas
327955,fdfe0112380a8ee36c3dfe16899298a72c886d49,2020-07-01 19:50:00 UTC,cool,hold,761,760,760,MS,Jackson,30,False,False,False,Gas
327956,fdfe0112380a8ee36c3dfe16899298a72c886d49,2020-07-01 15:05:00 UTC,cool,hold,761,760,760,MS,Jackson,30,False,False,False,Gas
327957,fe9bc276b524605ee152e4051285e4e080e77a3f,2020-07-16 19:15:00 UTC,cool,auto,770,760,760,MS,Tupelo,75,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/MS/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/MS-day/2021-jul-day-MS.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8d5497d8eeb1d76fa99ce5d78b2fa338d679d805,2021-07-10 16:45:00 UTC,cool,hold,747,755,755,MS,Ocean Springs,10,False,False,True,Electric
1,8bb37505af1f12286cb960fed0eb3177c49e121b,2021-07-24 12:50:00 UTC,auto,hold,697,686,636,MS,Brandon,10,False,False,False,Gas
2,a27e9562d509962fa19524e68a06dec21f2503fb,2021-07-29 12:55:00 UTC,cool,hold,709,712,712,MS,Oxford,30,False,False,False,Gas
3,2bc31afd13f246dba0252b321a1b4e32381e39ad,2021-07-11 19:00:00 UTC,auto,hold,710,713,653,MS,Clinton,10,False,False,False,Gas
4,c5b7a9591a03cc5fc6d0be096d68324779eb3088,2021-07-21 10:40:00 UTC,cool,hold,798,800,800,MS,Gulfport,55,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203024,f0674498c898912150ff0fd0cff9c42ea053a96c,2021-07-01 08:55:00 UTC,cool,hold,766,760,760,MS,Madison,10,False,False,False,Gas
203025,f0674498c898912150ff0fd0cff9c42ea053a96c,2021-07-22 08:55:00 UTC,cool,hold,754,760,760,MS,Madison,10,False,False,False,Gas
203026,f0674498c898912150ff0fd0cff9c42ea053a96c,2021-07-18 19:25:00 UTC,cool,hold,768,760,760,MS,Madison,10,False,False,False,Gas
203027,f0674498c898912150ff0fd0cff9c42ea053a96c,2021-07-29 14:25:00 UTC,cool,hold,757,760,760,MS,Madison,10,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/MS/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MS/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MS_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MS/jul/" + file)
    MS_jul = pd.concat([MS_jul, df])
    
MS_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01194744830da7bbc9603d51735c58faef66f72e,jul,2017,cool,auto,Ocean Springs,738.582902,737.663212,738.264249,0.0,False,False,True
1,01194744830da7bbc9603d51735c58faef66f72e,jul,2017,cool,hold,Ocean Springs,742.113695,743.492248,743.490956,0.0,False,False,True
2,024d1624a424d57f4cdc19a5fa9aa6d90739ef33,jul,2017,auto,hold,Horn Lake,721.333333,720.000000,670.000000,26.0,True,False,True
3,0442aded7dc2237382d8074027fe1db6823268d0,jul,2017,auto,auto,Oxford,689.952381,690.857143,640.000000,26.0,False,False,False
4,0442aded7dc2237382d8074027fe1db6823268d0,jul,2017,auto,hold,Oxford,698.350642,698.817979,639.062535,26.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
214,faa211ab8d7004a4f676ea83f1169cda20554042,jul,2021,auto,hold,Moss Point,690.867577,689.625939,630.000000,20.0,True,False,True
215,fdfe0112380a8ee36c3dfe16899298a72c886d49,jul,2021,cool,hold,Jackson,730.270657,729.854939,729.854939,30.0,False,False,False
216,fe9bc276b524605ee152e4051285e4e080e77a3f,jul,2021,cool,hold,Tupelo,725.833333,720.000000,720.000000,75.0,False,False,False
217,ff5551a641dc7446df1914428ded425dde839080,jul,2021,cool,hold,Madison,721.467980,724.310345,721.541872,30.0,False,False,False


In [133]:
MS_jul.to_csv("Scraper_Output/State_Month_Day/MS/MS_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/MS-day/2017-aug-day-MS.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a60a0a3e848c4d4eb8396c7cf700b5c1a9093412,2017-08-17 19:05:00 UTC,auto,hold,769,771,651,MS,Newton,0,False,False,True,Electric
1,4fe01e07e4cc830b6998642dd0327a85d13b615a,2017-08-26 16:20:00 UTC,auto,hold,723,739,678,MS,Jackson,20,False,False,False,Gas
2,a60a0a3e848c4d4eb8396c7cf700b5c1a9093412,2017-08-30 14:25:00 UTC,auto,hold,718,721,651,MS,Newton,0,False,False,True,Electric
3,53a9f3f8f5ec8a83916689c53046deb93598bb85,2017-08-11 12:45:00 UTC,cool,hold,690,735,735,MS,Madison,15,False,False,False,Gas
4,a60a0a3e848c4d4eb8396c7cf700b5c1a9093412,2017-08-26 17:40:00 UTC,auto,hold,746,751,651,MS,Newton,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115369,be35204453ce52fb60900381376b731c0192106b,2017-08-05 16:15:00 UTC,cool,hold,763,760,760,MS,Meridian,0,True,False,True,Electric
115370,be35204453ce52fb60900381376b731c0192106b,2017-08-07 12:40:00 UTC,cool,hold,762,760,760,MS,Meridian,0,True,False,True,Electric
115371,be35204453ce52fb60900381376b731c0192106b,2017-08-31 17:40:00 UTC,cool,hold,758,760,760,MS,Meridian,0,True,False,True,Electric
115372,be35204453ce52fb60900381376b731c0192106b,2017-08-05 16:10:00 UTC,cool,hold,762,760,760,MS,Meridian,0,True,False,True,Electric


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/MS/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/MS-day/2018-aug-day-MS.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0442aded7dc2237382d8074027fe1db6823268d0,2018-08-21 13:40:00 UTC,auto,hold,686,722,672,MS,Oxford,26,False,False,False,Gas
1,e7fa8035e7ddf75bc0bf5f4a597278afa95d29f3,2018-08-13 11:40:00 UTC,cool,hold,740,743,743,MS,Gulfport,20,False,False,True,Electric
2,3471cc502c2f35682aac256d68cc86083f4bd28f,2018-08-11 18:35:00 UTC,cool,hold,750,742,742,MS,Brandon,20,False,False,False,Gas
3,b92895e7b640505e4844b475ebf1d76483786365,2018-08-26 17:00:00 UTC,cool,auto,711,709,709,MS,Purvis,0,False,False,True,Electric
4,b92895e7b640505e4844b475ebf1d76483786365,2018-08-25 12:50:00 UTC,cool,auto,708,709,709,MS,Purvis,0,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
258252,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-08-05 11:35:00 UTC,cool,auto,752,760,760,MS,Mooreville,10,False,False,False,Gas
258253,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-08-05 13:45:00 UTC,cool,auto,757,760,760,MS,Mooreville,10,False,False,False,Gas
258254,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-08-05 12:30:00 UTC,cool,auto,756,760,760,MS,Mooreville,10,False,False,False,Gas
258255,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-08-18 18:10:00 UTC,cool,auto,757,760,760,MS,Mooreville,10,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/MS/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/MS-day/2019-aug-day-MS.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,95dd14ef17a6a1561dea9b0046b0bc9d11b9b9ff,2019-08-22 12:35:00 UTC,cool,hold,781,779,779,MS,Brandon,9,True,False,False,Gas
1,cd632477aa8e4b117fe3fefc978149512e38a1be,2019-08-25 18:05:00 UTC,auto,hold,721,716,666,MS,Clinton,10,False,False,False,Gas
2,295d5108c3ad59096d41bf12a67714ae31641a85,2019-08-07 14:35:00 UTC,cool,auto,763,759,646,MS,Madison,20,True,False,False,Gas
3,f48ff8af79c722f86b46269df5785ce40223e5b2,2019-08-09 17:20:00 UTC,cool,hold,699,679,679,MS,Southaven,10,False,False,False,Gas
5,95dd14ef17a6a1561dea9b0046b0bc9d11b9b9ff,2019-08-28 17:05:00 UTC,cool,hold,776,779,779,MS,Brandon,9,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
351468,f0674498c898912150ff0fd0cff9c42ea053a96c,2019-08-25 19:55:00 UTC,cool,hold,764,760,760,MS,Madison,10,False,False,False,Gas
351469,f0674498c898912150ff0fd0cff9c42ea053a96c,2019-08-25 18:30:00 UTC,cool,hold,757,760,760,MS,Madison,10,False,False,False,Gas
351470,f0674498c898912150ff0fd0cff9c42ea053a96c,2019-08-25 16:45:00 UTC,cool,hold,762,760,760,MS,Madison,10,False,False,False,Gas
351471,f0674498c898912150ff0fd0cff9c42ea053a96c,2019-08-25 12:50:00 UTC,cool,hold,762,760,760,MS,Madison,10,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/MS/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/MS-day/2020-aug-day-MS.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,675453cae9b4ab73fefa9dfcc356a214facb645f,2020-08-08 12:55:00 UTC,cool,hold,700,693,693,MS,Madison,10,False,False,False,Gas
2,12b757b138fde9ee0e12c487691e154ecc131712,2020-08-16 19:05:00 UTC,cool,auto,754,750,745,MS,Jackson,0,False,False,False,Gas
3,f352ad15d20ecc6e685bfa9284c295b5d942d43f,2020-08-31 14:10:00 UTC,cool,hold,700,700,653,MS,Madison,0,False,False,False,Gas
4,9ecfff8a196424ef9e7409a9f04ddcb676c4866a,2020-08-29 17:35:00 UTC,cool,hold,729,714,696,MS,Gulfport,45,False,False,False,Gas
5,1c771224a928c91a10a647dc8296505bed7b71fd,2020-08-02 19:55:00 UTC,auto,hold,775,803,673,MS,Clinton,10,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
336776,dc1e40c795a411c773c78d408555dc894f11c103,2020-08-03 19:05:00 UTC,cool,auto,704,700,700,MS,New Albany,10,False,False,False,Gas
336777,dc1e40c795a411c773c78d408555dc894f11c103,2020-08-20 11:40:00 UTC,cool,auto,699,700,700,MS,New Albany,10,False,False,False,Gas
336778,dc1e40c795a411c773c78d408555dc894f11c103,2020-08-03 12:45:00 UTC,cool,auto,704,700,700,MS,New Albany,10,False,False,False,Gas
336779,dc1e40c795a411c773c78d408555dc894f11c103,2020-08-20 19:50:00 UTC,cool,auto,704,700,700,MS,New Albany,10,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/MS/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MS/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MS_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MS/aug/" + file)
    MS_aug = pd.concat([MS_aug, df])
    
MS_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01194744830da7bbc9603d51735c58faef66f72e,aug,2017,cool,auto,Ocean Springs,760.175676,767.140541,746.391892,0.0,False,False,True
1,01194744830da7bbc9603d51735c58faef66f72e,aug,2017,cool,hold,Ocean Springs,744.396632,740.381665,740.364827,0.0,False,False,True
2,022c67a9cc7538740068f7248330a614c0ddb96f,aug,2017,auto,auto,Biloxi,706.000000,739.000000,670.000000,5.0,True,False,True
3,0442aded7dc2237382d8074027fe1db6823268d0,aug,2017,auto,hold,Oxford,702.380866,703.107100,640.000000,26.0,False,False,False
4,04e9b5167752b184a42353cfbf667d3e89d98f33,aug,2017,cool,auto,Long Beach,741.443709,746.624724,680.000000,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
401,fe9bc276b524605ee152e4051285e4e080e77a3f,aug,2020,cool,hold,Tupelo,765.852201,774.937107,774.937107,75.0,False,False,False
402,ff5551a641dc7446df1914428ded425dde839080,aug,2020,cool,auto,Madison,691.906475,690.446043,689.841727,30.0,False,False,False
403,ff5551a641dc7446df1914428ded425dde839080,aug,2020,cool,hold,Madison,726.074627,729.000000,725.604478,30.0,False,False,False
404,ffe938566049e7ad2eb8c1726ec2215bfda979d7,aug,2020,cool,auto,Mooreville,735.812931,733.133015,733.133015,10.0,False,False,False


In [160]:
MS_aug.to_csv("Scraper_Output/State_Month_Day/MS/MS_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/MS-day/2017-dec-day-MS.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,024d1624a424d57f4cdc19a5fa9aa6d90739ef33,2017-12-22 11:50:00 UTC,auto,hold,729,785,735,MS,Horn Lake,26,True,False,True,Electric
1,6958304af4064a7f1e351b21c0ee3d85f30d02cb,2017-12-23 14:35:00 UTC,auto,auto,728,785,735,MS,Oxford,5,False,False,False,Gas
2,9b718599ed03efd98e2b15948c86641a7cd4b249,2017-12-23 19:10:00 UTC,heat,hold,740,795,745,MS,Madison,0,False,False,False,Gas
4,0cc861d81926d70c036828385eb0e61b6e98dfaa,2017-12-06 12:50:00 UTC,heat,auto,731,797,702,MS,Madison,10,False,False,False,Gas
5,6958304af4064a7f1e351b21c0ee3d85f30d02cb,2017-12-03 16:50:00 UTC,auto,hold,722,773,723,MS,Oxford,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
167508,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2017-12-23 15:45:00 UTC,heat,auto,776,760,760,MS,Mooreville,10,False,False,False,Gas
167509,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2017-12-23 15:25:00 UTC,heat,auto,771,760,760,MS,Mooreville,10,False,False,False,Gas
167510,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2017-12-07 16:50:00 UTC,heat,auto,763,760,760,MS,Mooreville,10,False,False,False,Gas
167511,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2017-12-23 15:40:00 UTC,heat,auto,775,760,760,MS,Mooreville,10,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/MS/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/MS-day/2018-dec-day-MS.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1b6fe47c57f6e62dba5ace2d6a2504776c43f87f,2018-12-15 18:55:00 UTC,heat,hold,703,745,677,MS,Olive Branch,0,True,False,False,Gas
1,d23049697dc10f7bda2995dcd45a259452e331c5,2018-12-23 13:05:00 UTC,auto,hold,654,722,642,MS,Belden,20,False,False,False,Gas
2,6958304af4064a7f1e351b21c0ee3d85f30d02cb,2018-12-07 16:45:00 UTC,auto,hold,715,759,659,MS,Oxford,5,False,False,False,Gas
3,d23049697dc10f7bda2995dcd45a259452e331c5,2018-12-09 19:55:00 UTC,auto,hold,684,742,682,MS,Belden,20,False,False,False,Gas
4,db2579b46576227a29f30401612ae2a9f036935c,2018-12-16 14:55:00 UTC,auto,hold,705,762,692,MS,Vicksburg,65,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
295044,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-12-05 10:45:00 UTC,heat,auto,762,760,760,MS,Mooreville,10,False,False,False,Gas
295045,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-12-05 07:45:00 UTC,heat,auto,763,760,760,MS,Mooreville,10,False,False,False,Gas
295046,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-12-09 16:25:00 UTC,heat,auto,758,760,760,MS,Mooreville,10,False,False,False,Gas
295047,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2018-12-09 16:05:00 UTC,heat,auto,755,760,760,MS,Mooreville,10,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/MS/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/MS-day/2019-dec-day-MS.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
2,f918d405d012b701c0cb10dd807539f0a731ceea,2019-12-30 13:30:00 UTC,heat,auto,723,743,717,MS,Pass Christian,10,False,False,True,Electric
3,16daf0ca4bfcc11cc2e4721db2c14c38559c0a92,2019-12-30 13:45:00 UTC,auxHeatOnly,hold,687,677,677,MS,Amory,10,True,False,True,Electric
4,f47b3881ab449632a3dfc5d0b96cc84335174dd3,2019-12-20 15:55:00 UTC,auto,hold,689,750,693,MS,Petal,10,False,False,False,Gas
5,f918d405d012b701c0cb10dd807539f0a731ceea,2019-12-23 14:50:00 UTC,heat,auto,718,743,717,MS,Pass Christian,10,False,False,True,Electric
8,71fc691a191d2bbea3ea5b823d23477631bb8ea4,2019-12-12 17:45:00 UTC,heat,hold,661,669,669,MS,Ecru,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
307340,748c069371486cb31498d4fa5bec2a5949d72e83,2019-12-21 15:30:00 UTC,auto,hold,696,800,750,MS,Ocean Springs,19,False,False,False,Gas
307341,41db72abb39a21e919affae19ff3413510b13633,2019-12-29 16:15:00 UTC,cool,auto,719,750,750,MS,Ocean Springs,15,False,False,False,Gas
307342,8d5497d8eeb1d76fa99ce5d78b2fa338d679d805,2019-12-29 19:00:00 UTC,cool,auto,761,760,760,MS,Ocean Springs,10,False,False,True,Electric
307343,8d5497d8eeb1d76fa99ce5d78b2fa338d679d805,2019-12-29 18:55:00 UTC,cool,auto,761,760,760,MS,Ocean Springs,10,False,False,True,Electric


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/MS/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/MS-day/2020-dec-day-MS.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,629f03d3cff1523e3f45e61248076717d308fe72,2020-12-27 17:15:00 UTC,cool,hold,689,692,692,MS,Clinton,10,False,False,False,Gas
1,aa17f4db1d39aee255374137e60637b4b46314db,2020-12-13 16:30:00 UTC,auto,hold,661,720,662,MS,Jackson,0,False,False,False,Gas
2,77045d611c3cf60d028145a458d955bd64f29192,2020-12-15 13:00:00 UTC,heat,hold,689,682,682,MS,Biloxi,15,False,False,False,Gas
3,b97e267eaef40bd27c8fff6a536a0deb3ea69821,2020-12-04 11:10:00 UTC,heat,hold,656,661,661,MS,Purvis,10,True,False,True,Electric
5,1acb7c5c1a18c3a6baf8917710d9b20eab9e2ac4,2020-12-08 19:00:00 UTC,auto,hold,715,754,704,MS,Tupelo,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
270866,f0674498c898912150ff0fd0cff9c42ea053a96c,2020-12-13 13:45:00 UTC,heat,auto,756,760,760,MS,Madison,10,False,False,False,Gas
270867,f0674498c898912150ff0fd0cff9c42ea053a96c,2020-12-13 19:20:00 UTC,heat,auto,759,760,760,MS,Madison,10,False,False,False,Gas
270868,f0674498c898912150ff0fd0cff9c42ea053a96c,2020-12-13 18:05:00 UTC,heat,auto,752,760,760,MS,Madison,10,False,False,False,Gas
270869,ffe938566049e7ad2eb8c1726ec2215bfda979d7,2020-12-19 15:45:00 UTC,heat,hold,757,760,760,MS,Mooreville,10,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/MS/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/MS/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MS_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/MS/dec/" + file)
    MS_dec = pd.concat([MS_dec, df])
    
MS_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01194744830da7bbc9603d51735c58faef66f72e,dec,2017,cool,auto,Ocean Springs,732.222222,735.559524,710.000000,0.0,False,False,True
1,01194744830da7bbc9603d51735c58faef66f72e,dec,2017,cool,hold,Ocean Springs,723.419192,735.151515,735.151515,0.0,False,False,True
2,01194744830da7bbc9603d51735c58faef66f72e,dec,2017,heat,auto,Ocean Springs,710.068783,714.227513,710.695767,0.0,False,False,True
3,01194744830da7bbc9603d51735c58faef66f72e,dec,2017,heat,hold,Ocean Springs,697.422996,695.005274,695.005274,0.0,False,False,True
4,022c67a9cc7538740068f7248330a614c0ddb96f,dec,2017,auto,auto,Biloxi,731.164557,780.000000,730.000000,5.0,True,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
481,ff5551a641dc7446df1914428ded425dde839080,dec,2020,heat,auto,Madison,674.104167,680.562500,679.791667,30.0,False,False,False
482,ff5551a641dc7446df1914428ded425dde839080,dec,2020,heat,hold,Madison,659.727273,680.000000,680.000000,30.0,False,False,False
483,ffe938566049e7ad2eb8c1726ec2215bfda979d7,dec,2020,cool,hold,Mooreville,735.111111,703.000000,703.000000,10.0,False,False,False
484,ffe938566049e7ad2eb8c1726ec2215bfda979d7,dec,2020,heat,auto,Mooreville,732.797885,734.270270,734.270270,10.0,False,False,False


In [187]:
MS_dec.to_csv("Scraper_Output/State_Month_Day/MS/MS_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/MS/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
MS_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/MS/" + file)
    MS_all = pd.concat([MS_all, df])
    
MS_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,01194744830da7bbc9603d51735c58faef66f72e,aug,2017,cool,auto,Ocean Springs,760.175676,767.140541,746.391892,0.0,False,False,True
1,01194744830da7bbc9603d51735c58faef66f72e,aug,2017,cool,hold,Ocean Springs,744.396632,740.381665,740.364827,0.0,False,False,True
2,022c67a9cc7538740068f7248330a614c0ddb96f,aug,2017,auto,auto,Biloxi,706.000000,739.000000,670.000000,5.0,True,False,True
3,0442aded7dc2237382d8074027fe1db6823268d0,aug,2017,auto,hold,Oxford,702.380866,703.107100,640.000000,26.0,False,False,False
4,04e9b5167752b184a42353cfbf667d3e89d98f33,aug,2017,cool,auto,Long Beach,741.443709,746.624724,680.000000,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1603,fdfe0112380a8ee36c3dfe16899298a72c886d49,jun,2021,cool,hold,Jackson,731.842051,730.814017,730.814017,30.0,False,False,False
1604,fe9bc276b524605ee152e4051285e4e080e77a3f,jun,2021,cool,hold,Tupelo,709.118644,705.403955,705.403955,75.0,False,False,False
1605,ff5551a641dc7446df1914428ded425dde839080,jun,2021,cool,hold,Madison,708.410112,716.943820,715.078652,30.0,False,False,False
1606,ffe938566049e7ad2eb8c1726ec2215bfda979d7,jun,2021,cool,hold,Mooreville,733.231047,731.041742,731.041742,10.0,False,False,False


In [190]:
MS_all.to_csv("Scraper_Output/State_Month_Day/MS_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mMSe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['MS']
Unique jan_2018: ['MS']
Unique jan_2019: ['MS']
Unique jan_2020: ['MS']
Unique jan_2021: ['MS']
Unique feb_2017: ['MS']
Unique feb_2018: ['MS']
Unique feb_2019: ['MS']
Unique feb_2020: ['MS']
Unique feb_2021: ['MS']
Unique jun_2017: ['MS']
Unique jun_2018: ['MS']
Unique jun_2019: ['MS']
Unique jun_2020: ['MS']
Unique jun_2021: ['MS']
Unique jul_2017: ['MS']
Unique jul_2018: ['MS']
Unique jul_2019: ['MS']
Unique jul_2020: ['MS']
Unique jul_2021: ['MS']
Unique aug_2017: ['MS']
Unique aug_2018: ['MS']
Unique aug_2019: ['MS']
Unique aug_2020: ['MS']
Unique dec_2017: ['MS']
Unique dec_2018: ['MS']
Unique dec_2019: ['MS']
Unique dec_2020: ['MS']
