# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/ND-day/2017-jan-day-ND.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,591651a8eb29731436279c2cab4ed8735e2bb4b9,2017-01-01T16:15:00Z,heat,hold,664,708,666,ND,Fargo,65,False,False,False,Gas
1,d89f6b4e9059b9f5a54136b034c24b93284de323,2017-01-14T14:20:00Z,heat,hold,670,678,678,ND,Grand Forks,5,False,False,False,Gas
2,d89f6b4e9059b9f5a54136b034c24b93284de323,2017-01-01T14:45:00Z,heat,auto,715,738,720,ND,Grand Forks,5,False,False,False,Gas
3,d89f6b4e9059b9f5a54136b034c24b93284de323,2017-01-14T18:00:00Z,heat,auto,711,719,706,ND,Grand Forks,5,False,False,False,Gas
4,0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,2017-01-31T18:25:00Z,auto,auto,666,728,660,ND,Bismarck,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12532,7704704fb68e3b5388328d60bc0d285590698ad5,2017-01-02T16:05:00Z,heat,hold,704,720,720,ND,Lincoln,5,False,False,False,Gas
12533,7704704fb68e3b5388328d60bc0d285590698ad5,2017-01-02T19:25:00Z,heat,hold,715,720,720,ND,Lincoln,5,False,False,False,Gas
12534,7704704fb68e3b5388328d60bc0d285590698ad5,2017-01-03T11:30:00Z,heat,hold,719,720,720,ND,Lincoln,5,False,False,False,Gas
12535,7704704fb68e3b5388328d60bc0d285590698ad5,2017-01-02T18:40:00Z,heat,hold,720,720,720,ND,Lincoln,5,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
078b48dc9011fc73ec28a0464a6e0b260e5a8715,Jan,2017,heat,hold,Williston,702.746835,703.227848,703.227848,5.0,False,False,False
0c0bd0b708210af527c4acd1652566b20b9e1113,Jan,2017,heat,auto,Casselton,689.096774,691.645161,690.548387,70.0,False,False,False
0c0bd0b708210af527c4acd1652566b20b9e1113,Jan,2017,heat,hold,Casselton,710.290043,713.056277,712.935065,70.0,False,False,False
0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,Jan,2017,auto,auto,Bismarck,693.557823,786.394558,693.210884,15.0,False,False,False
0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,Jan,2017,auto,hold,Bismarck,691.993802,799.917355,695.762397,15.0,False,False,False
0f282db9c216fb43ec7241fe29cd5c947102a773,Jan,2017,heat,auto,Grand Forks,708.0,720.0,720.0,0.0,False,False,False
0f282db9c216fb43ec7241fe29cd5c947102a773,Jan,2017,heat,hold,Grand Forks,671.588362,674.790948,674.715517,0.0,False,False,False
31bab83a250783cc0d48f1509a6fdfeb059990b7,Jan,2017,auxHeatOnly,auto,Argusville,651.053659,654.132927,654.132927,0.0,False,False,True
31bab83a250783cc0d48f1509a6fdfeb059990b7,Jan,2017,auxHeatOnly,hold,Argusville,661.013305,665.210218,665.210218,0.0,False,False,True
3e1895a4447b3d22741a2a18a91fa946c1562773,Jan,2017,heat,auto,Lincoln,684.343284,701.313433,701.313433,45.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/ND/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/ND-day/2018-jan-day-ND.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d89f6b4e9059b9f5a54136b034c24b93284de323,2018-01-27 15:40:00 UTC,heat,auto,727,736,712,ND,Grand Forks,5,False,False,False,Gas
1,d89f6b4e9059b9f5a54136b034c24b93284de323,2018-01-10 15:25:00 UTC,heat,hold,719,810,639,ND,Grand Forks,5,False,False,False,Gas
2,ac4e1ef26fc2901cd9d1bdd1c24a31f90878e363,2018-01-12 18:05:00 UTC,heat,auto,616,774,645,ND,Bismarck,57,False,False,False,Gas
3,ac4e1ef26fc2901cd9d1bdd1c24a31f90878e363,2018-01-26 16:35:00 UTC,heat,auto,677,755,670,ND,Bismarck,57,False,False,False,Gas
4,ac4e1ef26fc2901cd9d1bdd1c24a31f90878e363,2018-01-22 17:25:00 UTC,heat,auto,712,742,684,ND,Bismarck,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41874,c1f3119e2131dc3a7d8869ad6d7d7357b6f0bb82,2018-01-09 15:30:00 UTC,auto,auto,689,760,690,ND,West Fargo,7,True,False,False,Gas
41875,c1f3119e2131dc3a7d8869ad6d7d7357b6f0bb82,2018-01-06 14:20:00 UTC,auto,auto,687,760,690,ND,West Fargo,7,True,False,False,Gas
41876,c1f3119e2131dc3a7d8869ad6d7d7357b6f0bb82,2018-01-13 15:20:00 UTC,auto,auto,686,760,690,ND,West Fargo,7,True,False,False,Gas
41877,c1f3119e2131dc3a7d8869ad6d7d7357b6f0bb82,2018-01-17 15:10:00 UTC,auto,auto,689,760,690,ND,West Fargo,7,True,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/ND/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/ND-day/2019-jan-day-ND.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d705ace877c5b9fe820415e5ccce4f7d92a7ae6a,2019-01-22 19:40:00 UTC,heat,hold,694,700,700,ND,Grand Forks,90,False,False,False,Gas
1,d705ace877c5b9fe820415e5ccce4f7d92a7ae6a,2019-01-05 19:35:00 UTC,heat,hold,697,700,700,ND,Grand Forks,90,False,False,False,Gas
2,d705ace877c5b9fe820415e5ccce4f7d92a7ae6a,2019-01-31 17:35:00 UTC,heat,hold,694,700,700,ND,Grand Forks,90,False,False,False,Gas
3,d705ace877c5b9fe820415e5ccce4f7d92a7ae6a,2019-01-17 10:35:00 UTC,heat,hold,690,700,700,ND,Grand Forks,90,False,False,False,Gas
4,d705ace877c5b9fe820415e5ccce4f7d92a7ae6a,2019-01-23 08:05:00 UTC,heat,hold,692,700,700,ND,Grand Forks,90,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55776,c171e9566395e753d002ead8edbfacf80563248c,2019-01-12 14:10:00 UTC,heat,hold,706,700,700,ND,Minot,40,False,False,False,Gas
55777,c171e9566395e753d002ead8edbfacf80563248c,2019-01-12 13:45:00 UTC,heat,hold,716,700,700,ND,Minot,40,False,False,False,Gas
55778,c171e9566395e753d002ead8edbfacf80563248c,2019-01-12 14:30:00 UTC,heat,hold,700,700,700,ND,Minot,40,False,False,False,Gas
55779,c171e9566395e753d002ead8edbfacf80563248c,2019-01-12 14:15:00 UTC,heat,hold,706,700,700,ND,Minot,40,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/ND/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/ND-day/2020-jan-day-ND.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-01-25 17:00:00 UTC,auto,auto,685,747,697,ND,Bismarck,50,False,False,False,Gas
1,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-01-02 12:40:00 UTC,auto,auto,692,747,697,ND,Bismarck,50,False,False,False,Gas
2,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-01-04 16:55:00 UTC,auto,auto,697,747,697,ND,Bismarck,50,False,False,False,Gas
3,26023071a26724ad4835896554801910a124909a,2020-01-05 19:20:00 UTC,heat,hold,700,684,680,ND,Mandan,9,True,False,False,Gas
4,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-01-25 14:55:00 UTC,auto,auto,700,747,697,ND,Bismarck,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62034,fac4be949f9b030bcaedc0c75666f90d246e942d,2020-01-08 14:00:00 UTC,heat,hold,756,760,760,ND,Bismarck,0,False,False,False,Gas
62035,fac4be949f9b030bcaedc0c75666f90d246e942d,2020-01-13 14:05:00 UTC,heat,hold,755,760,760,ND,Bismarck,0,False,False,False,Gas
62036,fac4be949f9b030bcaedc0c75666f90d246e942d,2020-01-21 17:20:00 UTC,heat,hold,754,760,760,ND,Bismarck,0,False,False,False,Gas
62037,fac4be949f9b030bcaedc0c75666f90d246e942d,2020-01-23 17:30:00 UTC,heat,hold,759,760,760,ND,Bismarck,0,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/ND/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/ND-day/2021-jan-day-ND.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,e9680bd2d018dc7dd7f2f673832572f5767a9b11,2021-01-30 14:40:00 UTC,heat,hold,704,706,706,ND,Horace,5,False,False,False,Gas
1,8f20c0d306b7503704799f8515d37da8e6892a01,2021-01-20 13:20:00 UTC,heat,hold,693,729,686,ND,Fargo,0,False,False,False,Gas
2,e9680bd2d018dc7dd7f2f673832572f5767a9b11,2021-01-12 16:10:00 UTC,heat,hold,708,706,706,ND,Horace,5,False,False,False,Gas
3,5ab66c52e0eac96c1c089419be40dc89262c3cfc,2021-01-03 16:15:00 UTC,heat,hold,675,669,669,ND,Dickinson,5,False,False,False,Gas
4,ac4e1ef26fc2901cd9d1bdd1c24a31f90878e363,2021-01-21 19:40:00 UTC,heat,hold,699,713,713,ND,Bismarck,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40926,fac4be949f9b030bcaedc0c75666f90d246e942d,2021-01-28 12:50:00 UTC,heat,hold,759,760,760,ND,Bismarck,0,False,False,False,Gas
40927,fac4be949f9b030bcaedc0c75666f90d246e942d,2021-01-27 18:25:00 UTC,heat,hold,756,760,760,ND,Bismarck,0,False,False,False,Gas
40928,fac4be949f9b030bcaedc0c75666f90d246e942d,2021-01-27 13:35:00 UTC,heat,hold,755,760,760,ND,Bismarck,0,False,False,False,Gas
40929,fac4be949f9b030bcaedc0c75666f90d246e942d,2021-01-27 14:35:00 UTC,heat,hold,756,760,760,ND,Bismarck,0,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/ND/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/ND/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
ND_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/ND/jan/" + file)
    ND_jan = pd.concat([ND_jan, df])
    
ND_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,078b48dc9011fc73ec28a0464a6e0b260e5a8715,Jan,2017,heat,hold,Williston,702.746835,703.227848,703.227848,5.0,False,False,False
1,0c0bd0b708210af527c4acd1652566b20b9e1113,Jan,2017,heat,auto,Casselton,689.096774,691.645161,690.548387,70.0,False,False,False
2,0c0bd0b708210af527c4acd1652566b20b9e1113,Jan,2017,heat,hold,Casselton,710.290043,713.056277,712.935065,70.0,False,False,False
3,0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,Jan,2017,auto,auto,Bismarck,693.557823,786.394558,693.210884,15.0,False,False,False
4,0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,Jan,2017,auto,hold,Bismarck,691.993802,799.917355,695.762397,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
41,e15510388d74d5085bc80018605965cd82bbbe24,Jan,2021,auto,hold,Arthur,701.000000,746.000000,686.000000,60.0,False,False,False
42,e9680bd2d018dc7dd7f2f673832572f5767a9b11,Jan,2021,heat,hold,Horace,707.037879,709.901515,709.393939,5.0,False,False,False
43,f0824b3f9c6d20fb3772043d37ed892bb295771f,Jan,2021,heat,hold,West Fargo,666.711538,660.576923,660.576923,0.0,False,False,False
44,f739c79ec2a0e3e14e490aac58fac311385e7ec5,Jan,2021,heat,hold,Fargo,702.561069,696.893130,696.893130,0.0,False,False,False


In [34]:
ND_jan.to_csv("Scraper_Output/State_Month_Day/ND/ND_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/ND-day/2017-feb-day-ND.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,31bab83a250783cc0d48f1509a6fdfeb059990b7,2017-02-05T19:25:00Z,auxHeatOnly,auto,649,657,657,ND,Argusville,0,False,False,True,Electric
1,3e1895a4447b3d22741a2a18a91fa946c1562773,2017-02-07T13:30:00Z,heat,hold,693,724,724,ND,Lincoln,45,False,False,False,Gas
2,3e1895a4447b3d22741a2a18a91fa946c1562773,2017-02-15T13:45:00Z,heat,auto,722,707,707,ND,Lincoln,45,False,False,False,Gas
3,3e1895a4447b3d22741a2a18a91fa946c1562773,2017-02-06T13:30:00Z,heat,hold,692,713,713,ND,Lincoln,45,False,False,False,Gas
4,3e1895a4447b3d22741a2a18a91fa946c1562773,2017-02-26T15:15:00Z,heat,hold,696,747,665,ND,Lincoln,45,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17131,5e1bf9472cda935cc1d054d72cc0b8adddfed5df,2017-02-28T15:35:00Z,heat,auto,683,750,680,ND,Fargo,30,False,False,False,Gas
17132,5e1bf9472cda935cc1d054d72cc0b8adddfed5df,2017-02-06T19:00:00Z,heat,auto,687,750,680,ND,Fargo,30,False,False,False,Gas
17133,5e1bf9472cda935cc1d054d72cc0b8adddfed5df,2017-02-28T13:45:00Z,heat,auto,672,750,680,ND,Fargo,30,False,False,False,Gas
17134,6e3464542da5e275eebd42444f08f74e6e90d535,2017-02-07T13:55:00Z,heat,auto,658,750,670,ND,Fargo,56,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/ND/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/ND-day/2018-feb-day-ND.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ac4e1ef26fc2901cd9d1bdd1c24a31f90878e363,2018-02-16 17:45:00 UTC,heat,auto,641,729,698,ND,Bismarck,57,False,False,False,Gas
1,bc815e8955cd332e60edc455a1e15c9cba832102,2018-02-18 18:20:00 UTC,auto,hold,713,781,721,ND,Minot,58,False,False,False,Gas
2,888b6ac298ffde44db4d670dec3030c5ee6d1c0e,2018-02-03 19:05:00 UTC,heat,auto,655,737,652,ND,Fargo,5,False,False,False,Gas
3,ac4e1ef26fc2901cd9d1bdd1c24a31f90878e363,2018-02-16 19:30:00 UTC,heat,auto,682,787,635,ND,Bismarck,57,False,False,False,Gas
4,fe1292241e5cca5462f177eaa54ec9b6256acc87,2018-02-02 12:05:00 UTC,heat,hold,675,704,699,ND,Fargo,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40864,7d0c43eb4ebe84b3f303c289778db00d73e6709c,2018-02-17 14:15:00 UTC,heat,auto,669,720,670,ND,Fargo,5,False,False,False,Gas
40865,7d0c43eb4ebe84b3f303c289778db00d73e6709c,2018-02-24 13:20:00 UTC,heat,auto,669,720,670,ND,Fargo,5,False,False,False,Gas
40866,7d0c43eb4ebe84b3f303c289778db00d73e6709c,2018-02-17 14:40:00 UTC,heat,auto,670,720,670,ND,Fargo,5,False,False,False,Gas
40867,7d0c43eb4ebe84b3f303c289778db00d73e6709c,2018-02-11 14:20:00 UTC,heat,auto,669,720,670,ND,Fargo,5,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/ND/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/ND-day/2019-feb-day-ND.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5ab66c52e0eac96c1c089419be40dc89262c3cfc,2019-02-24 16:55:00 UTC,heat,hold,677,687,680,ND,Dickinson,5,False,False,False,Gas
1,8f20c0d306b7503704799f8515d37da8e6892a01,2019-02-19 11:30:00 UTC,heat,hold,704,702,702,ND,Fargo,0,False,False,False,Gas
2,007c248ef28f2dca0dd638319a0d779a8fa9cdb8,2019-02-04 13:10:00 UTC,auto,hold,734,731,681,ND,Mandan,5,False,False,False,Gas
3,8f20c0d306b7503704799f8515d37da8e6892a01,2019-02-19 12:25:00 UTC,heat,hold,697,702,702,ND,Fargo,0,False,False,False,Gas
4,c4f189b415502f607767b851154dd62f95335fb3,2019-02-21 13:55:00 UTC,heat,auto,633,655,652,ND,Bismarck,10,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
42312,79803592d3696a6e3ebcb31291a8a005e52edd2e,2019-02-13 13:30:00 UTC,heat,auto,760,760,760,ND,Grand Forks,20,False,False,False,Gas
42313,79803592d3696a6e3ebcb31291a8a005e52edd2e,2019-02-13 16:20:00 UTC,heat,auto,757,760,760,ND,Grand Forks,20,False,False,False,Gas
42314,79803592d3696a6e3ebcb31291a8a005e52edd2e,2019-02-09 18:55:00 UTC,heat,auto,758,760,760,ND,Grand Forks,20,False,False,False,Gas
42315,79803592d3696a6e3ebcb31291a8a005e52edd2e,2019-02-14 12:05:00 UTC,heat,auto,752,760,760,ND,Grand Forks,20,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/ND/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/ND-day/2020-feb-day-ND.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3e99a1ed6d48bd710de3ab46d9cd2030a4beec8d,2020-02-20 14:40:00 UTC,auto,hold,695,765,695,ND,Fargo,80,False,False,False,Gas
1,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-02-09 18:45:00 UTC,auto,auto,700,747,697,ND,Bismarck,50,False,False,False,Gas
2,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-02-02 16:45:00 UTC,auto,auto,692,747,697,ND,Bismarck,50,False,False,False,Gas
3,ac4e1ef26fc2901cd9d1bdd1c24a31f90878e363,2020-02-21 19:05:00 UTC,heat,auto,714,723,710,ND,Bismarck,57,False,False,False,Gas
4,ac4e1ef26fc2901cd9d1bdd1c24a31f90878e363,2020-02-15 16:30:00 UTC,heat,auto,700,723,710,ND,Bismarck,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56616,0f282db9c216fb43ec7241fe29cd5c947102a773,2020-02-17 15:25:00 UTC,heat,auto,724,730,730,ND,Grand Forks,0,False,False,False,Gas
56617,0f282db9c216fb43ec7241fe29cd5c947102a773,2020-02-18 13:20:00 UTC,heat,auto,730,730,730,ND,Grand Forks,0,False,False,False,Gas
56618,0f282db9c216fb43ec7241fe29cd5c947102a773,2020-02-11 16:10:00 UTC,heat,auto,739,730,730,ND,Grand Forks,0,False,False,False,Gas
56619,0f282db9c216fb43ec7241fe29cd5c947102a773,2020-02-07 15:00:00 UTC,heat,auto,725,730,730,ND,Grand Forks,0,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/ND/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/ND-day/2021-feb-day-ND.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8f20c0d306b7503704799f8515d37da8e6892a01,2021-02-19 12:20:00 UTC,heat,hold,684,685,685,ND,Fargo,0,False,False,False,Gas
1,e9680bd2d018dc7dd7f2f673832572f5767a9b11,2021-02-13 18:30:00 UTC,heat,hold,689,716,716,ND,Horace,5,False,False,False,Gas
2,e9680bd2d018dc7dd7f2f673832572f5767a9b11,2021-02-13 19:40:00 UTC,heat,hold,714,716,716,ND,Horace,5,False,False,False,Gas
3,26023071a26724ad4835896554801910a124909a,2021-02-02 17:45:00 UTC,heat,hold,700,684,680,ND,Mandan,9,True,False,False,Gas
4,e9680bd2d018dc7dd7f2f673832572f5767a9b11,2021-02-05 14:10:00 UTC,heat,hold,703,706,706,ND,Horace,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
35906,6a5cc37df16f3cc0a24d466864b7235faacdbf7e,2021-02-07 19:50:00 UTC,heat,hold,705,720,720,ND,West Fargo,5,False,False,False,Gas
35907,6a5cc37df16f3cc0a24d466864b7235faacdbf7e,2021-02-07 19:35:00 UTC,heat,hold,701,720,720,ND,West Fargo,5,False,False,False,Gas
35908,6a5cc37df16f3cc0a24d466864b7235faacdbf7e,2021-02-07 19:40:00 UTC,heat,hold,702,720,720,ND,West Fargo,5,False,False,False,Gas
35909,6a5cc37df16f3cc0a24d466864b7235faacdbf7e,2021-02-07 19:20:00 UTC,heat,hold,697,720,720,ND,West Fargo,5,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/ND/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/ND/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
ND_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/ND/feb/" + file)
    ND_feb = pd.concat([ND_feb, df])
    
ND_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0c0bd0b708210af527c4acd1652566b20b9e1113,feb,2017,heat,auto,Casselton,712.750000,820.000000,640.000000,70.0,False,False,False
1,0c0bd0b708210af527c4acd1652566b20b9e1113,feb,2017,heat,hold,Casselton,709.623596,712.359551,712.247191,70.0,False,False,False
2,0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,feb,2017,auto,auto,Bismarck,671.000000,800.000000,687.333333,15.0,False,False,False
3,0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,feb,2017,auto,hold,Bismarck,693.785124,800.000000,696.884298,15.0,False,False,False
4,0f282db9c216fb43ec7241fe29cd5c947102a773,feb,2017,heat,auto,Grand Forks,721.969697,732.727273,731.545455,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
46,e5c1c80206a081adcb88eb34bf136d8ad0e1054b,feb,2021,auto,hold,Minot,698.810526,770.000000,694.157895,15.0,False,False,False
47,e9680bd2d018dc7dd7f2f673832572f5767a9b11,feb,2021,heat,hold,Horace,700.515152,709.015152,708.530303,5.0,False,False,False
48,ed1b757670903b64ecd8d5d09ed17607fdc3dc23,feb,2021,heat,hold,Thompson,674.285714,680.000000,680.000000,50.0,False,False,False
49,f0824b3f9c6d20fb3772043d37ed892bb295771f,feb,2021,heat,hold,West Fargo,661.909091,668.090909,661.545455,0.0,False,False,False


In [67]:
ND_feb.to_csv("Scraper_Output/State_Month_Day/ND/ND_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/ND-day/2017-jun-day-ND.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,72feb8ad8aa140755f1aeca1b83738a54c67001c,2017-06-04T19:55:00Z,cool,auto,821,810,720,ND,Bismarck,40,True,False,False,Gas
1,078b48dc9011fc73ec28a0464a6e0b260e5a8715,2017-06-10T18:05:00Z,cool,hold,765,685,685,ND,Williston,5,False,False,False,Gas
2,72feb8ad8aa140755f1aeca1b83738a54c67001c,2017-06-04T18:35:00Z,cool,auto,811,810,720,ND,Bismarck,40,True,False,False,Gas
3,5cd961d7fce7850ef803f04c65462e23ec55fbc9,2017-06-03T19:55:00Z,cool,hold,731,734,717,ND,Fargo,5,False,False,False,Gas
4,72feb8ad8aa140755f1aeca1b83738a54c67001c,2017-06-04T18:30:00Z,cool,auto,809,810,720,ND,Bismarck,40,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25013,a5dbba7ac3ef4e6339f4127b526d8973991b6745,2017-06-07T11:45:00Z,cool,hold,748,760,760,ND,Fargo,90,False,False,False,Gas
25014,a5dbba7ac3ef4e6339f4127b526d8973991b6745,2017-06-07T12:30:00Z,cool,hold,746,760,760,ND,Fargo,90,False,False,False,Gas
25015,a5dbba7ac3ef4e6339f4127b526d8973991b6745,2017-06-12T12:40:00Z,cool,hold,750,760,760,ND,Fargo,90,False,False,False,Gas
25016,a5dbba7ac3ef4e6339f4127b526d8973991b6745,2017-06-07T12:20:00Z,cool,hold,746,760,760,ND,Fargo,90,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/ND/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/ND-day/2018-jun-day-ND.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,591651a8eb29731436279c2cab4ed8735e2bb4b9,2018-06-16 16:40:00 UTC,cool,hold,701,709,664,ND,Fargo,65,False,False,False,Gas
1,8f20c0d306b7503704799f8515d37da8e6892a01,2018-06-14 11:10:00 UTC,cool,hold,737,742,742,ND,Fargo,0,False,False,False,Gas
2,b5ead3d59ed0d42960c316b72ad99df780e8ea50,2018-06-13 11:20:00 UTC,auto,hold,694,742,682,ND,Fargo,60,False,False,False,Gas
3,f739c79ec2a0e3e14e490aac58fac311385e7ec5,2018-06-02 19:35:00 UTC,cool,hold,720,749,697,ND,Fargo,0,False,False,False,Gas
4,b5ead3d59ed0d42960c316b72ad99df780e8ea50,2018-06-13 10:00:00 UTC,auto,hold,698,742,682,ND,Fargo,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54372,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2018-06-15 18:15:00 UTC,cool,hold,764,760,760,ND,Fargo,15,False,False,False,Gas
54373,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2018-06-21 17:50:00 UTC,cool,hold,765,760,760,ND,Fargo,15,False,False,False,Gas
54374,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2018-06-22 14:00:00 UTC,cool,hold,760,760,760,ND,Fargo,15,False,False,False,Gas
54375,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2018-06-22 11:10:00 UTC,cool,hold,755,760,760,ND,Fargo,15,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/ND/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/ND-day/2019-jun-day-ND.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5ab66c52e0eac96c1c089419be40dc89262c3cfc,2019-06-27 18:35:00 UTC,cool,hold,714,706,706,ND,Dickinson,5,False,False,False,Gas
1,51530537bb399a04d6facf1744c24ce32e0df7a2,2019-06-03 12:05:00 UTC,cool,hold,699,712,712,ND,Fargo,0,True,False,False,Gas
2,51530537bb399a04d6facf1744c24ce32e0df7a2,2019-06-01 19:25:00 UTC,cool,hold,695,712,712,ND,Fargo,0,True,False,False,Gas
3,51530537bb399a04d6facf1744c24ce32e0df7a2,2019-06-01 15:10:00 UTC,cool,hold,686,712,712,ND,Fargo,0,True,False,False,Gas
4,46c6cf7aa119b224a55cf4ce661634d3de1b8aa4,2019-06-27 12:20:00 UTC,auto,hold,707,722,672,ND,Fargo,68,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72627,c171e9566395e753d002ead8edbfacf80563248c,2019-06-17 14:20:00 UTC,auto,hold,701,700,650,ND,Minot,40,False,False,False,Gas
72628,c171e9566395e753d002ead8edbfacf80563248c,2019-06-28 19:05:00 UTC,auto,hold,699,700,630,ND,Minot,40,False,False,False,Gas
72629,c171e9566395e753d002ead8edbfacf80563248c,2019-06-18 14:05:00 UTC,auto,auto,689,700,630,ND,Minot,40,False,False,False,Gas
72630,c171e9566395e753d002ead8edbfacf80563248c,2019-06-27 14:00:00 UTC,auto,hold,703,700,630,ND,Minot,40,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/ND/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/ND-day/2020-jun-day-ND.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5ab66c52e0eac96c1c089419be40dc89262c3cfc,2020-06-07 10:05:00 UTC,cool,hold,680,716,671,ND,Dickinson,5,False,False,False,Gas
1,72feb8ad8aa140755f1aeca1b83738a54c67001c,2020-06-14 19:55:00 UTC,cool,auto,771,810,730,ND,Bismarck,40,True,False,False,Gas
2,5ab66c52e0eac96c1c089419be40dc89262c3cfc,2020-06-02 19:55:00 UTC,cool,hold,721,722,715,ND,Dickinson,5,False,False,False,Gas
3,46c6cf7aa119b224a55cf4ce661634d3de1b8aa4,2020-06-01 13:10:00 UTC,heat,auto,705,742,680,ND,Fargo,68,False,False,False,Gas
4,26023071a26724ad4835896554801910a124909a,2020-06-06 12:45:00 UTC,cool,hold,693,704,693,ND,Mandan,9,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52667,ed1b757670903b64ecd8d5d09ed17607fdc3dc23,2020-06-02 19:45:00 UTC,cool,auto,716,730,610,ND,Thompson,50,False,False,False,Gas
52668,51530537bb399a04d6facf1744c24ce32e0df7a2,2020-06-05 19:05:00 UTC,cool,auto,740,730,690,ND,Fargo,0,True,False,False,Gas
52669,ed1b757670903b64ecd8d5d09ed17607fdc3dc23,2020-06-03 16:10:00 UTC,cool,auto,694,730,610,ND,Thompson,50,False,False,False,Gas
52670,51530537bb399a04d6facf1744c24ce32e0df7a2,2020-06-20 19:05:00 UTC,cool,auto,731,730,690,ND,Fargo,0,True,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/ND/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/ND-day/2021-jun-day-ND.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,8f20c0d306b7503704799f8515d37da8e6892a01,2021-06-28 10:50:00 UTC,cool,hold,728,729,729,ND,Fargo,0,False,False,False,Gas
2,99d5c43427ce0444928113e9ca8afeb6da53c26c,2021-06-04 18:45:00 UTC,auto,hold,748,746,693,ND,Dickinson,10,False,False,False,Gas
3,5ab66c52e0eac96c1c089419be40dc89262c3cfc,2021-06-26 18:05:00 UTC,cool,hold,688,685,685,ND,Dickinson,5,False,False,False,Gas
4,8890426dc46cda2ec46610bb45860cbaceb4e9d3,2021-06-03 19:05:00 UTC,cool,hold,677,708,670,ND,,0,True,False,False,Gas
5,e9680bd2d018dc7dd7f2f673832572f5767a9b11,2021-06-03 12:45:00 UTC,auto,hold,701,686,636,ND,Horace,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46593,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2021-06-10 16:40:00 UTC,cool,hold,769,760,760,ND,Fargo,15,False,False,False,Gas
46594,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2021-06-17 19:40:00 UTC,cool,hold,761,760,760,ND,Fargo,15,False,False,False,Gas
46595,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2021-06-10 13:50:00 UTC,cool,hold,779,760,760,ND,Fargo,15,False,False,False,Gas
46596,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2021-06-10 15:05:00 UTC,cool,hold,776,760,760,ND,Fargo,15,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/ND/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/ND/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
ND_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/ND/jun/" + file)
    ND_jun = pd.concat([ND_jun, df])
    
ND_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,078b48dc9011fc73ec28a0464a6e0b260e5a8715,jun,2017,cool,hold,Williston,716.398496,700.789474,700.789474,5.0,False,False,False
1,0c0bd0b708210af527c4acd1652566b20b9e1113,jun,2017,cool,hold,Casselton,726.313725,730.000000,730.000000,70.0,False,False,False
2,0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,jun,2017,auto,auto,Bismarck,702.068966,749.586207,672.275862,15.0,False,False,False
3,0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,jun,2017,auto,hold,Bismarck,729.784810,737.113924,671.278481,15.0,False,False,False
4,0f282db9c216fb43ec7241fe29cd5c947102a773,jun,2017,auto,hold,Grand Forks,702.583333,754.750000,704.750000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
54,f0824b3f9c6d20fb3772043d37ed892bb295771f,jun,2021,auto,hold,West Fargo,696.061069,685.022901,635.022901,0.0,False,False,False
55,f2ed998b0e9787c31305d2f800360eb9edbee19d,jun,2021,cool,hold,Fargo,750.583333,750.000000,750.000000,5.0,False,False,False
56,f874cb818c5db31bdf731cf69f70e68bf9fc33e4,jun,2021,cool,hold,Fargo,700.857143,670.142857,670.000000,6.0,False,False,False
57,fac4be949f9b030bcaedc0c75666f90d246e942d,jun,2021,cool,hold,Bismarck,751.908479,751.515478,751.516375,0.0,False,False,False


In [100]:
ND_jun.to_csv("Scraper_Output/State_Month_Day/ND/ND_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/ND-day/2017-jul-day-ND.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,70b5208069f2b85787ae39de4f5dd378765b2e06,2017-07-10T13:45:00Z,cool,hold,729,795,745,ND,Thompson,37,True,False,False,Gas
1,70b5208069f2b85787ae39de4f5dd378765b2e06,2017-07-10T18:30:00Z,cool,hold,740,795,745,ND,Thompson,37,True,False,False,Gas
2,70b5208069f2b85787ae39de4f5dd378765b2e06,2017-07-10T17:00:00Z,cool,hold,737,795,745,ND,Thompson,37,True,False,False,Gas
3,70b5208069f2b85787ae39de4f5dd378765b2e06,2017-07-10T18:15:00Z,cool,hold,739,795,745,ND,Thompson,37,True,False,False,Gas
4,70b5208069f2b85787ae39de4f5dd378765b2e06,2017-07-10T15:30:00Z,cool,hold,735,795,745,ND,Thompson,37,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23422,a5dbba7ac3ef4e6339f4127b526d8973991b6745,2017-07-07T13:10:00Z,cool,auto,749,760,620,ND,Fargo,90,False,False,False,Gas
23423,a5dbba7ac3ef4e6339f4127b526d8973991b6745,2017-07-07T13:45:00Z,cool,auto,751,760,620,ND,Fargo,90,False,False,False,Gas
23424,a5dbba7ac3ef4e6339f4127b526d8973991b6745,2017-07-07T11:40:00Z,cool,auto,737,760,620,ND,Fargo,90,False,False,False,Gas
23425,a5dbba7ac3ef4e6339f4127b526d8973991b6745,2017-07-06T12:05:00Z,cool,hold,724,760,760,ND,Fargo,90,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/ND/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/ND-day/2018-jul-day-ND.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ac4e1ef26fc2901cd9d1bdd1c24a31f90878e363,2018-07-15T15:50:00Z,cool,hold,745,733,733,ND,Bismarck,57,False,False,False,Gas
1,b5ead3d59ed0d42960c316b72ad99df780e8ea50,2018-07-24T11:45:00Z,auto,hold,711,722,642,ND,Fargo,60,False,False,False,Gas
2,b5ead3d59ed0d42960c316b72ad99df780e8ea50,2018-07-25T10:15:00Z,auto,hold,717,722,642,ND,Fargo,60,False,False,False,Gas
3,b5ead3d59ed0d42960c316b72ad99df780e8ea50,2018-07-27T12:35:00Z,auto,hold,703,732,642,ND,Fargo,60,False,False,False,Gas
4,b5ead3d59ed0d42960c316b72ad99df780e8ea50,2018-07-26T10:05:00Z,auto,hold,709,722,642,ND,Fargo,60,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
33783,6c8433aa1db0389cd3a0214904e1c8dbe3513686,2018-07-22T16:45:00Z,cool,hold,733,730,730,ND,Dickinson,0,False,False,False,Gas
33784,6c8433aa1db0389cd3a0214904e1c8dbe3513686,2018-07-12T16:55:00Z,cool,hold,733,730,730,ND,Dickinson,0,False,False,False,Gas
33785,6c8433aa1db0389cd3a0214904e1c8dbe3513686,2018-07-27T17:10:00Z,cool,hold,727,730,730,ND,Dickinson,0,False,False,False,Gas
33786,6c8433aa1db0389cd3a0214904e1c8dbe3513686,2018-07-22T17:40:00Z,cool,hold,730,730,730,ND,Dickinson,0,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/ND/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/ND-day/2019-jul-day-ND.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,46c6cf7aa119b224a55cf4ce661634d3de1b8aa4,2019-07-25 11:35:00 UTC,auto,hold,717,722,672,ND,Fargo,68,False,False,False,Gas
1,51530537bb399a04d6facf1744c24ce32e0df7a2,2019-07-15 13:20:00 UTC,cool,hold,718,712,712,ND,Fargo,0,True,False,False,Gas
2,51530537bb399a04d6facf1744c24ce32e0df7a2,2019-07-26 17:40:00 UTC,cool,hold,714,712,712,ND,Fargo,0,True,False,False,Gas
3,46c6cf7aa119b224a55cf4ce661634d3de1b8aa4,2019-07-27 14:55:00 UTC,auto,hold,711,722,672,ND,Fargo,68,False,False,False,Gas
4,51530537bb399a04d6facf1744c24ce32e0df7a2,2019-07-20 13:50:00 UTC,cool,hold,716,712,712,ND,Fargo,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
82812,0f282db9c216fb43ec7241fe29cd5c947102a773,2019-07-24 13:50:00 UTC,cool,auto,734,730,730,ND,Grand Forks,0,False,False,False,Gas
82813,0f282db9c216fb43ec7241fe29cd5c947102a773,2019-07-30 10:30:00 UTC,cool,auto,731,730,730,ND,Grand Forks,0,False,False,False,Gas
82814,0f282db9c216fb43ec7241fe29cd5c947102a773,2019-07-12 15:20:00 UTC,cool,hold,736,730,730,ND,Grand Forks,0,False,False,False,Gas
82815,0f282db9c216fb43ec7241fe29cd5c947102a773,2019-07-26 17:20:00 UTC,cool,auto,733,730,730,ND,Grand Forks,0,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/ND/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/ND-day/2020-jul-day-ND.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-07-04 19:35:00 UTC,auto,auto,735,733,683,ND,Bismarck,50,False,False,False,Gas
1,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-07-01 12:35:00 UTC,auto,auto,709,733,683,ND,Bismarck,50,False,False,False,Gas
2,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-07-26 15:00:00 UTC,auto,auto,719,733,683,ND,Bismarck,50,False,False,False,Gas
3,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-07-03 11:40:00 UTC,auto,auto,715,733,683,ND,Bismarck,50,False,False,False,Gas
4,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-07-01 11:15:00 UTC,auto,auto,710,733,683,ND,Bismarck,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60005,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2020-07-23 19:50:00 UTC,cool,hold,772,760,760,ND,Fargo,15,False,False,False,Gas
60006,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2020-07-16 17:10:00 UTC,cool,hold,771,760,760,ND,Fargo,15,False,False,False,Gas
60007,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2020-07-02 16:35:00 UTC,cool,hold,771,760,760,ND,Fargo,15,False,False,False,Gas
60008,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2020-07-30 14:50:00 UTC,cool,hold,762,760,760,ND,Fargo,15,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/ND/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/ND-day/2021-jul-day-ND.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ac4e1ef26fc2901cd9d1bdd1c24a31f90878e363,2021-07-10 17:25:00 UTC,cool,hold,726,711,711,ND,Bismarck,57,False,False,False,Gas
1,0fac6afda912cd175202e8e2925e028b7d22bbec,2021-07-22 19:10:00 UTC,cool,hold,737,728,728,ND,Thompson,69,True,False,False,Gas
2,5ab66c52e0eac96c1c089419be40dc89262c3cfc,2021-07-09 19:00:00 UTC,cool,hold,686,685,685,ND,Dickinson,5,False,False,False,Gas
3,5ab66c52e0eac96c1c089419be40dc89262c3cfc,2021-07-09 18:00:00 UTC,cool,hold,699,685,685,ND,Dickinson,5,False,False,False,Gas
4,ac4e1ef26fc2901cd9d1bdd1c24a31f90878e363,2021-07-10 18:40:00 UTC,cool,hold,718,711,711,ND,Bismarck,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
54490,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2021-07-22 15:05:00 UTC,cool,hold,764,760,760,ND,Fargo,15,False,False,False,Gas
54491,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2021-07-23 18:00:00 UTC,cool,hold,779,760,760,ND,Fargo,15,False,False,False,Gas
54492,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2021-07-26 19:35:00 UTC,cool,hold,760,760,760,ND,Fargo,15,False,False,False,Gas
54493,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2021-07-28 15:45:00 UTC,cool,hold,763,760,760,ND,Fargo,15,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/ND/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/ND/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
ND_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/ND/jul/" + file)
    ND_jul = pd.concat([ND_jul, df])
    
ND_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,078b48dc9011fc73ec28a0464a6e0b260e5a8715,jul,2017,cool,hold,Williston,729.179310,718.834483,718.613793,5.0,False,False,False
1,0c0bd0b708210af527c4acd1652566b20b9e1113,jul,2017,cool,auto,Casselton,748.318627,735.000000,685.000000,70.0,False,False,False
2,0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,jul,2017,auto,auto,Bismarck,744.750000,740.000000,670.000000,15.0,False,False,False
3,0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,jul,2017,auto,hold,Bismarck,746.151020,745.297959,670.000000,15.0,False,False,False
4,0f282db9c216fb43ec7241fe29cd5c947102a773,jul,2017,cool,hold,Grand Forks,735.337593,740.558040,740.407881,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
50,f0824b3f9c6d20fb3772043d37ed892bb295771f,jul,2021,cool,hold,West Fargo,706.318584,695.265487,684.451327,0.0,False,False,False
51,f739c79ec2a0e3e14e490aac58fac311385e7ec5,jul,2021,cool,hold,Fargo,733.693182,729.272727,729.045455,0.0,False,False,False
52,f874cb818c5db31bdf731cf69f70e68bf9fc33e4,jul,2021,cool,hold,Fargo,671.321429,670.000000,670.000000,6.0,False,False,False
53,fac4be949f9b030bcaedc0c75666f90d246e942d,jul,2021,cool,hold,Bismarck,761.876298,761.469291,761.467993,0.0,False,False,False


In [133]:
ND_jul.to_csv("Scraper_Output/State_Month_Day/ND/ND_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/ND-day/2017-aug-day-ND.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,d29e97bfe3f461113c8a6c12ba8bd1ebb2e2f0a9,2017-08-21T16:25:00Z,auto,hold,697,756,614,ND,Grand Forks,80,False,False,False,Gas
1,d29e97bfe3f461113c8a6c12ba8bd1ebb2e2f0a9,2017-08-21T12:15:00Z,auto,hold,709,737,620,ND,Grand Forks,80,False,False,False,Gas
2,591651a8eb29731436279c2cab4ed8735e2bb4b9,2017-08-20T16:20:00Z,cool,hold,703,712,676,ND,Fargo,65,False,False,False,Gas
3,5ab66c52e0eac96c1c089419be40dc89262c3cfc,2017-08-06T17:05:00Z,cool,auto,678,708,667,ND,Dickinson,5,False,False,False,Gas
4,0f282db9c216fb43ec7241fe29cd5c947102a773,2017-08-07T18:45:00Z,cool,hold,740,765,765,ND,Grand Forks,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
24746,a5dbba7ac3ef4e6339f4127b526d8973991b6745,2017-08-01T12:30:00Z,cool,hold,761,760,760,ND,Fargo,90,False,False,False,Gas
24747,a5dbba7ac3ef4e6339f4127b526d8973991b6745,2017-08-01T13:20:00Z,cool,hold,761,760,760,ND,Fargo,90,False,False,False,Gas
24748,a5dbba7ac3ef4e6339f4127b526d8973991b6745,2017-08-01T12:45:00Z,cool,hold,760,760,760,ND,Fargo,90,False,False,False,Gas
24749,a5dbba7ac3ef4e6339f4127b526d8973991b6745,2017-08-01T12:40:00Z,cool,hold,760,760,760,ND,Fargo,90,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/ND/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/ND-day/2018-aug-day-ND.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,60f5f1ed28f8e421d6dc8d57266a9e5e34393685,2018-08-15 11:35:00 UTC,cool,hold,674,680,680,ND,West Fargo,7,False,False,False,Gas
1,60f5f1ed28f8e421d6dc8d57266a9e5e34393685,2018-08-15 12:00:00 UTC,cool,hold,675,680,680,ND,West Fargo,7,False,False,False,Gas
2,60f5f1ed28f8e421d6dc8d57266a9e5e34393685,2018-08-15 11:40:00 UTC,cool,hold,674,680,680,ND,West Fargo,7,False,False,False,Gas
3,60f5f1ed28f8e421d6dc8d57266a9e5e34393685,2018-08-15 12:15:00 UTC,cool,hold,674,680,680,ND,West Fargo,7,False,False,False,Gas
4,8f4d43dff926ace6ea477d8ccec8a89f32b21c83,2018-08-30 17:50:00 UTC,cool,auto,679,680,680,ND,Gackle,70,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69130,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2018-08-19 15:20:00 UTC,cool,hold,746,760,760,ND,Fargo,15,False,False,False,Gas
69131,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2018-08-19 18:45:00 UTC,cool,hold,748,760,760,ND,Fargo,15,False,False,False,Gas
69132,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2018-08-17 14:55:00 UTC,cool,hold,750,760,760,ND,Fargo,15,False,False,False,Gas
69133,feb83f2c66430dae260de2c2c5e688ff4ab0e013,2018-08-17 19:05:00 UTC,cool,hold,768,760,760,ND,Fargo,15,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/ND/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/ND-day/2019-aug-day-ND.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,51530537bb399a04d6facf1744c24ce32e0df7a2,2019-08-02 14:45:00 UTC,cool,hold,701,702,702,ND,Fargo,0,True,False,False,Gas
1,8217745f571abdaa223df2cbeebcfbf00f3f9557,2019-08-17 18:20:00 UTC,heat,hold,718,733,683,ND,Bismarck,50,False,False,False,Gas
2,51530537bb399a04d6facf1744c24ce32e0df7a2,2019-08-01 16:30:00 UTC,cool,hold,705,702,702,ND,Fargo,0,True,False,False,Gas
3,51530537bb399a04d6facf1744c24ce32e0df7a2,2019-08-01 14:55:00 UTC,cool,hold,705,702,702,ND,Fargo,0,True,False,False,Gas
4,51530537bb399a04d6facf1744c24ce32e0df7a2,2019-08-02 13:05:00 UTC,cool,hold,700,702,702,ND,Fargo,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71910,0f282db9c216fb43ec7241fe29cd5c947102a773,2019-08-23 17:40:00 UTC,cool,hold,733,730,730,ND,Grand Forks,0,False,False,False,Gas
71911,0f282db9c216fb43ec7241fe29cd5c947102a773,2019-08-25 18:00:00 UTC,cool,hold,730,730,730,ND,Grand Forks,0,False,False,False,Gas
71912,0f282db9c216fb43ec7241fe29cd5c947102a773,2019-08-20 18:30:00 UTC,cool,hold,732,730,730,ND,Grand Forks,0,False,False,False,Gas
71913,0f282db9c216fb43ec7241fe29cd5c947102a773,2019-08-27 11:00:00 UTC,cool,hold,716,730,730,ND,Grand Forks,0,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/ND/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/ND-day/2020-aug-day-ND.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-08-10 11:20:00 UTC,auto,auto,713,733,683,ND,Bismarck,50,False,False,False,Gas
1,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-08-10 11:30:00 UTC,auto,auto,713,733,683,ND,Bismarck,50,False,False,False,Gas
2,8890426dc46cda2ec46610bb45860cbaceb4e9d3,2020-08-14 19:40:00 UTC,cool,auto,685,698,674,ND,,0,True,False,False,Gas
3,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-08-10 12:50:00 UTC,auto,auto,712,733,683,ND,Bismarck,50,False,False,False,Gas
4,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-08-08 15:00:00 UTC,auto,auto,720,733,683,ND,Bismarck,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57512,c171e9566395e753d002ead8edbfacf80563248c,2020-08-14 16:00:00 UTC,cool,hold,705,700,700,ND,Minot,40,False,False,False,Gas
57513,c171e9566395e753d002ead8edbfacf80563248c,2020-08-10 12:10:00 UTC,cool,hold,692,700,700,ND,Minot,40,False,False,False,Gas
57514,c171e9566395e753d002ead8edbfacf80563248c,2020-08-10 18:20:00 UTC,cool,hold,707,700,700,ND,Minot,40,False,False,False,Gas
57515,c171e9566395e753d002ead8edbfacf80563248c,2020-08-14 16:25:00 UTC,cool,hold,702,700,700,ND,Minot,40,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/ND/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/ND/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
ND_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/ND/aug/" + file)
    ND_aug = pd.concat([ND_aug, df])
    
ND_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,078b48dc9011fc73ec28a0464a6e0b260e5a8715,aug,2017,cool,hold,Williston,732.026178,705.471204,705.471204,5.0,False,False,False
1,0c0bd0b708210af527c4acd1652566b20b9e1113,aug,2017,cool,auto,Casselton,740.534314,735.000000,685.000000,70.0,False,False,False
2,0c0bd0b708210af527c4acd1652566b20b9e1113,aug,2017,cool,hold,Casselton,719.039216,725.000000,725.000000,70.0,False,False,False
3,0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,aug,2017,auto,auto,Bismarck,717.913043,720.521739,650.260870,15.0,False,False,False
4,0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,aug,2017,auto,hold,Bismarck,702.000000,700.772727,650.000000,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
83,f874cb818c5db31bdf731cf69f70e68bf9fc33e4,aug,2020,cool,hold,Fargo,715.137931,690.000000,690.000000,6.0,False,False,False
84,fac4be949f9b030bcaedc0c75666f90d246e942d,aug,2020,cool,auto,Bismarck,760.163728,771.821159,768.887909,0.0,False,False,False
85,fac4be949f9b030bcaedc0c75666f90d246e942d,aug,2020,cool,hold,Bismarck,761.228753,762.468076,762.475687,0.0,False,False,False
86,feb83f2c66430dae260de2c2c5e688ff4ab0e013,aug,2020,cool,auto,Fargo,755.558824,750.000000,750.000000,15.0,False,False,False


In [160]:
ND_aug.to_csv("Scraper_Output/State_Month_Day/ND/ND_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/ND-day/2017-dec-day-ND.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a7d3bcf73e0768b4a1e8a9e952594a1f67d7998d,2017-12-04 13:25:00 UTC,heat,hold,661,713,643,ND,Fargo,40,False,False,False,Gas
1,5cd961d7fce7850ef803f04c65462e23ec55fbc9,2017-12-23 17:30:00 UTC,heat,hold,700,709,709,ND,Fargo,5,False,False,False,Gas
2,0f282db9c216fb43ec7241fe29cd5c947102a773,2017-12-14 12:50:00 UTC,heat,hold,720,721,721,ND,Grand Forks,0,False,False,False,Gas
3,0f282db9c216fb43ec7241fe29cd5c947102a773,2017-12-15 17:50:00 UTC,heat,hold,714,729,694,ND,Grand Forks,0,False,False,False,Gas
4,0f282db9c216fb43ec7241fe29cd5c947102a773,2017-12-10 13:10:00 UTC,heat,auto,714,721,721,ND,Grand Forks,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32928,c1f3119e2131dc3a7d8869ad6d7d7357b6f0bb82,2017-12-31 12:55:00 UTC,auto,hold,699,760,700,ND,West Fargo,7,True,False,False,Gas
32929,c1f3119e2131dc3a7d8869ad6d7d7357b6f0bb82,2017-12-30 16:30:00 UTC,auto,hold,698,760,700,ND,West Fargo,7,True,False,False,Gas
32930,c1f3119e2131dc3a7d8869ad6d7d7357b6f0bb82,2017-12-30 19:50:00 UTC,auto,hold,698,760,700,ND,West Fargo,7,True,False,False,Gas
32931,c1f3119e2131dc3a7d8869ad6d7d7357b6f0bb82,2017-12-30 16:10:00 UTC,auto,hold,696,760,700,ND,West Fargo,7,True,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/ND/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/ND-day/2018-dec-day-ND.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,46c6cf7aa119b224a55cf4ce661634d3de1b8aa4,2018-12-29 17:45:00 UTC,heat,auto,618,748,637,ND,Fargo,68,False,False,False,Gas
2,6f248a70ab5815dab3ebb2e6ca657d4fbad93a75,2018-12-29 13:15:00 UTC,heat,hold,655,679,671,ND,Fargo,5,False,False,False,Gas
4,3e99a1ed6d48bd710de3ab46d9cd2030a4beec8d,2018-12-03 19:25:00 UTC,heat,hold,668,685,685,ND,Fargo,80,False,False,False,Gas
5,46c6cf7aa119b224a55cf4ce661634d3de1b8aa4,2018-12-09 17:20:00 UTC,heat,auto,694,731,700,ND,Fargo,68,False,False,False,Gas
6,3e99a1ed6d48bd710de3ab46d9cd2030a4beec8d,2018-12-03 17:55:00 UTC,heat,hold,680,685,685,ND,Fargo,80,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56310,e9680bd2d018dc7dd7f2f673832572f5767a9b11,2018-12-31 14:45:00 UTC,auto,hold,708,760,710,ND,Horace,5,False,False,False,Gas
56311,9180931f861b3cf7b2c0594ee52f5114427acf02,2018-12-30 19:05:00 UTC,heat,auto,698,760,700,ND,Bismarck,10,False,False,False,Gas
56312,e9680bd2d018dc7dd7f2f673832572f5767a9b11,2018-12-31 15:30:00 UTC,auto,hold,703,760,710,ND,Horace,5,False,False,False,Gas
56313,e9680bd2d018dc7dd7f2f673832572f5767a9b11,2018-12-31 14:05:00 UTC,auto,hold,693,760,710,ND,Horace,5,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/ND/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/ND-day/2019-dec-day-ND.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8217745f571abdaa223df2cbeebcfbf00f3f9557,2019-12-21 15:05:00 UTC,auto,auto,699,747,697,ND,Bismarck,50,False,False,False,Gas
1,8217745f571abdaa223df2cbeebcfbf00f3f9557,2019-12-21 16:30:00 UTC,auto,auto,691,747,697,ND,Bismarck,50,False,False,False,Gas
2,8217745f571abdaa223df2cbeebcfbf00f3f9557,2019-12-01 17:00:00 UTC,auto,auto,700,747,697,ND,Bismarck,50,False,False,False,Gas
3,8217745f571abdaa223df2cbeebcfbf00f3f9557,2019-12-21 15:00:00 UTC,auto,auto,694,747,697,ND,Bismarck,50,False,False,False,Gas
4,8217745f571abdaa223df2cbeebcfbf00f3f9557,2019-12-01 16:45:00 UTC,auto,auto,689,747,697,ND,Bismarck,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
64644,fac4be949f9b030bcaedc0c75666f90d246e942d,2019-12-29 14:55:00 UTC,heat,hold,759,760,760,ND,Bismarck,0,False,False,False,Gas
64645,fac4be949f9b030bcaedc0c75666f90d246e942d,2019-12-16 16:30:00 UTC,heat,hold,760,760,760,ND,Bismarck,0,False,False,False,Gas
64646,fac4be949f9b030bcaedc0c75666f90d246e942d,2019-12-14 18:30:00 UTC,heat,hold,756,760,760,ND,Bismarck,0,False,False,False,Gas
64647,fac4be949f9b030bcaedc0c75666f90d246e942d,2019-12-12 18:35:00 UTC,heat,hold,758,760,760,ND,Bismarck,0,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/ND/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/ND-day/2020-dec-day-ND.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-12-06 18:00:00 UTC,auto,auto,685,746,696,ND,Bismarck,50,False,False,False,Gas
1,ac4e1ef26fc2901cd9d1bdd1c24a31f90878e363,2020-12-10 15:00:00 UTC,heat,hold,698,713,713,ND,Bismarck,57,False,False,False,Gas
2,8217745f571abdaa223df2cbeebcfbf00f3f9557,2020-12-06 17:25:00 UTC,auto,auto,682,746,696,ND,Bismarck,50,False,False,False,Gas
3,ac4e1ef26fc2901cd9d1bdd1c24a31f90878e363,2020-12-15 18:20:00 UTC,heat,hold,695,703,703,ND,Bismarck,57,False,False,False,Gas
4,ac4e1ef26fc2901cd9d1bdd1c24a31f90878e363,2020-12-18 18:50:00 UTC,heat,hold,687,703,703,ND,Bismarck,57,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57229,7704704fb68e3b5388328d60bc0d285590698ad5,2020-12-31 13:20:00 UTC,heat,hold,718,720,720,ND,Lincoln,5,False,False,False,Gas
57230,7704704fb68e3b5388328d60bc0d285590698ad5,2020-12-30 19:35:00 UTC,heat,hold,721,720,720,ND,Lincoln,5,False,False,False,Gas
57231,7704704fb68e3b5388328d60bc0d285590698ad5,2020-12-31 17:10:00 UTC,heat,hold,720,720,720,ND,Lincoln,5,False,False,False,Gas
57232,7704704fb68e3b5388328d60bc0d285590698ad5,2020-12-31 15:45:00 UTC,heat,hold,718,720,720,ND,Lincoln,5,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/ND/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/ND/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
ND_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/ND/dec/" + file)
    ND_dec = pd.concat([ND_dec, df])
    
ND_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0f282db9c216fb43ec7241fe29cd5c947102a773,dec,2017,heat,auto,Grand Forks,719.867220,721.774550,721.764869,0.0,False,False,False
1,0f282db9c216fb43ec7241fe29cd5c947102a773,dec,2017,heat,hold,Grand Forks,718.236986,720.876054,720.818542,0.0,False,False,False
2,31bab83a250783cc0d48f1509a6fdfeb059990b7,dec,2017,heat,hold,Argusville,657.741344,660.237271,660.232179,0.0,False,False,True
3,3e1895a4447b3d22741a2a18a91fa946c1562773,dec,2017,heat,auto,Lincoln,705.857143,720.779221,720.779221,45.0,False,False,False
4,3e1895a4447b3d22741a2a18a91fa946c1562773,dec,2017,heat,hold,Lincoln,715.217647,729.629412,729.476471,45.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,f2ed998b0e9787c31305d2f800360eb9edbee19d,dec,2020,heat,auto,Fargo,671.722222,680.000000,680.000000,5.0,False,False,False
78,f739c79ec2a0e3e14e490aac58fac311385e7ec5,dec,2020,heat,hold,Fargo,691.150000,695.000000,695.000000,0.0,False,False,False
79,fac4be949f9b030bcaedc0c75666f90d246e942d,dec,2020,heat,auto,Bismarck,755.408642,750.000000,758.343210,0.0,False,False,False
80,fac4be949f9b030bcaedc0c75666f90d246e942d,dec,2020,heat,hold,Bismarck,747.345370,749.776852,749.776852,0.0,False,False,False


In [187]:
ND_dec.to_csv("Scraper_Output/State_Month_Day/ND/ND_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/ND/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
ND_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/ND/" + file)
    ND_all = pd.concat([ND_all, df])
    
ND_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,078b48dc9011fc73ec28a0464a6e0b260e5a8715,aug,2017,cool,hold,Williston,732.026178,705.471204,705.471204,5.0,False,False,False
1,0c0bd0b708210af527c4acd1652566b20b9e1113,aug,2017,cool,auto,Casselton,740.534314,735.000000,685.000000,70.0,False,False,False
2,0c0bd0b708210af527c4acd1652566b20b9e1113,aug,2017,cool,hold,Casselton,719.039216,725.000000,725.000000,70.0,False,False,False
3,0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,aug,2017,auto,auto,Bismarck,717.913043,720.521739,650.260870,15.0,False,False,False
4,0d43d8c1d0fa6575efa5d2901593535d7c2f7daa,aug,2017,auto,hold,Bismarck,702.000000,700.772727,650.000000,15.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
438,f0824b3f9c6d20fb3772043d37ed892bb295771f,jun,2021,auto,hold,West Fargo,696.061069,685.022901,635.022901,0.0,False,False,False
439,f2ed998b0e9787c31305d2f800360eb9edbee19d,jun,2021,cool,hold,Fargo,750.583333,750.000000,750.000000,5.0,False,False,False
440,f874cb818c5db31bdf731cf69f70e68bf9fc33e4,jun,2021,cool,hold,Fargo,700.857143,670.142857,670.000000,6.0,False,False,False
441,fac4be949f9b030bcaedc0c75666f90d246e942d,jun,2021,cool,hold,Bismarck,751.908479,751.515478,751.516375,0.0,False,False,False


In [190]:
ND_all.to_csv("Scraper_Output/State_Month_Day/ND_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mNDe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['ND']
Unique jan_2018: ['ND']
Unique jan_2019: ['ND']
Unique jan_2020: ['ND']
Unique jan_2021: ['ND']
Unique feb_2017: ['ND']
Unique feb_2018: ['ND']
Unique feb_2019: ['ND']
Unique feb_2020: ['ND']
Unique feb_2021: ['ND']
Unique jun_2017: ['ND']
Unique jun_2018: ['ND']
Unique jun_2019: ['ND']
Unique jun_2020: ['ND']
Unique jun_2021: ['ND']
Unique jul_2017: ['ND']
Unique jul_2018: ['ND']
Unique jul_2019: ['ND']
Unique jul_2020: ['ND']
Unique jul_2021: ['ND']
Unique aug_2017: ['ND']
Unique aug_2018: ['ND']
Unique aug_2019: ['ND']
Unique aug_2020: ['ND']
Unique dec_2017: ['ND']
Unique dec_2018: ['ND']
Unique dec_2019: ['ND']
Unique dec_2020: ['ND']
