# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/AK-day/2017-jan-day-AK.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-01-23T08:50:00Z,heat,hold,697,688,688,AK,Anchorage,0,False,False,False,Gas
1,14a6f6af23d454bda9e5b09021f63af861c9ab83,2017-01-16T19:45:00Z,heat,auto,642,806,628,AK,Anchorage,20,False,False,False,Gas
2,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-01-20T09:05:00Z,heat,auto,683,685,681,AK,Anchorage,0,False,False,False,Gas
3,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-01-13T08:40:00Z,heat,auto,665,700,684,AK,Anchorage,0,False,False,False,Gas
4,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-01-01T08:45:00Z,heat,auto,637,741,669,AK,Anchorage,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6598,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2017-01-07T07:00:00Z,heat,auto,738,740,740,AK,Wasilla,5,False,False,False,Gas
6599,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2017-01-07T17:25:00Z,heat,auto,737,740,740,AK,Wasilla,5,False,False,False,Gas
6600,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2017-01-07T16:45:00Z,heat,auto,744,740,740,AK,Wasilla,5,False,False,False,Gas
6601,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2017-01-07T17:20:00Z,heat,auto,739,740,740,AK,Wasilla,5,False,False,False,Gas


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
14a6f6af23d454bda9e5b09021f63af861c9ab83,Jan,2017,heat,auto,Anchorage,642.0,806.0,628.0,20.0,False,False,False
18c482b133fba9f05f7edac61214d7cd1082ff1a,Jan,2017,heat,auto,Anchorage,642.055556,780.0,650.0,5.0,False,False,False
364d90d4305e64cf8a54685a3669aca9ba7667ed,Jan,2017,heat,hold,Palmer,644.954545,710.0,710.0,0.0,False,False,False
6085162893ee544ece3ce04e242a68d7a8afe24d,Jan,2017,heat,auto,Russian Jack Park,629.122517,654.688742,634.831126,20.0,False,False,False
6085162893ee544ece3ce04e242a68d7a8afe24d,Jan,2017,heat,hold,Russian Jack Park,619.231492,650.07168,620.521739,20.0,False,False,False
81a7bd30ff87a8f606e1378fe9117abbded76a9c,Jan,2017,heat,auto,Chugiak,635.125,655.208333,638.916667,10.0,True,False,False
81a7bd30ff87a8f606e1378fe9117abbded76a9c,Jan,2017,heat,hold,Chugiak,635.212766,650.0,640.0,10.0,True,False,False
a799538555e2fb23d035a9aa0004122110571484,Jan,2017,heat,hold,Anchorage,669.842105,672.631579,672.631579,40.0,False,False,False
b2926c336f48d0bdbcd5343bb68676b3a9364748,Jan,2017,heat,auto,Healy,682.669565,700.008696,685.182609,10.0,False,False,False
b2926c336f48d0bdbcd5343bb68676b3a9364748,Jan,2017,heat,hold,Healy,678.985507,687.724638,686.710145,10.0,False,False,False


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/AK/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/AK-day/2018-jan-day-AK.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a799538555e2fb23d035a9aa0004122110571484,2018-01-13T18:40:00Z,heat,hold,698,720,695,AK,Anchorage,40,False,False,False,Gas
1,de96ad05755fc1e2697dff5f2548474733554565,2018-01-14T08:25:00Z,heat,auto,675,798,608,AK,Anchorage,30,False,False,False,Gas
2,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2018-01-22T18:30:00Z,heat,hold,668,669,669,AK,Homer,0,True,False,False,Gas
3,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2018-01-22T18:00:00Z,heat,auto,646,658,658,AK,Homer,0,True,False,False,Gas
4,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2018-01-19T19:35:00Z,heat,hold,679,666,661,AK,Homer,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13251,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2018-01-28T19:30:00Z,heat,auto,731,740,740,AK,Wasilla,5,False,False,False,Gas
13252,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2018-01-25T07:15:00Z,heat,auto,731,740,740,AK,Wasilla,5,False,False,False,Gas
13253,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2018-01-26T07:35:00Z,heat,auto,735,740,740,AK,Wasilla,5,False,False,False,Gas
13254,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2018-01-25T16:00:00Z,heat,auto,744,740,740,AK,Wasilla,5,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/AK/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/AK-day/2019-jan-day-AK.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a799538555e2fb23d035a9aa0004122110571484,2019-01-07T18:30:00Z,heat,hold,647,773,678,AK,Anchorage,40,False,False,False,Gas
1,bdc58f9e6d012d051afe58172dc008bebf29a356,2019-01-06T08:25:00Z,heat,hold,715,724,719,AK,Huffman/O'Malley,40,False,False,False,Gas
2,bdc58f9e6d012d051afe58172dc008bebf29a356,2019-01-05T08:25:00Z,heat,hold,730,743,738,AK,Huffman/O'Malley,40,False,False,False,Gas
3,de96ad05755fc1e2697dff5f2548474733554565,2019-01-20T07:10:00Z,heat,hold,672,720,668,AK,Anchorage,30,False,False,False,Gas
4,de96ad05755fc1e2697dff5f2548474733554565,2019-01-06T08:10:00Z,heat,hold,654,714,662,AK,Anchorage,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23727,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2019-01-05T08:10:00Z,heat,hold,740,740,740,AK,Wasilla,5,False,False,False,Gas
23728,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2019-01-05T07:10:00Z,heat,hold,736,740,740,AK,Wasilla,5,False,False,False,Gas
23729,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2019-01-05T07:20:00Z,heat,hold,732,740,740,AK,Wasilla,5,False,False,False,Gas
23730,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2019-01-04T19:25:00Z,heat,hold,738,740,740,AK,Wasilla,5,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/AK/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/AK-day/2020-jan-day-AK.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3f31b8a9942ce423b863741438287d127c6bc39f,2020-01-10T19:10:00Z,heat,auto,696,780,721,AK,Anchorage,47,False,False,False,Gas
1,391a6ce2e4f243b7965dfc6004d6e8fc4cc7936e,2020-01-28T07:45:00Z,heat,hold,704,773,683,AK,Palmer,25,False,False,False,Gas
2,cd0c6dacc16ca9fd87a2258af6fe8291f9279be7,2020-01-26T08:10:00Z,heat,hold,689,750,750,AK,Anchorage,15,False,False,False,Gas
3,391a6ce2e4f243b7965dfc6004d6e8fc4cc7936e,2020-01-03T07:20:00Z,heat,hold,714,752,692,AK,Palmer,25,False,False,False,Gas
4,cd0c6dacc16ca9fd87a2258af6fe8291f9279be7,2020-01-26T08:15:00Z,heat,hold,690,750,750,AK,Anchorage,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16162,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2020-01-27T16:00:00Z,heat,hold,737,740,740,AK,Anchorage,35,False,False,False,Gas
16163,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2020-01-27T17:05:00Z,heat,hold,731,740,740,AK,Anchorage,35,False,False,False,Gas
16164,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2020-01-27T15:45:00Z,heat,hold,730,740,740,AK,Anchorage,35,False,False,False,Gas
16165,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2020-01-27T15:30:00Z,heat,hold,737,740,740,AK,Anchorage,35,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/AK/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/AK-day/2021-jan-day-AK.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1934269f8f3a7a065318ec9a4e7984c0d9216332,2021-01-26T15:55:00Z,heat,hold,618,650,622,AK,Wasilla,0,False,False,False,Gas
1,1934269f8f3a7a065318ec9a4e7984c0d9216332,2021-01-26T19:20:00Z,heat,hold,649,682,682,AK,Wasilla,0,False,False,False,Gas
2,1934269f8f3a7a065318ec9a4e7984c0d9216332,2021-01-27T18:10:00Z,heat,hold,621,682,682,AK,Wasilla,0,False,False,False,Gas
3,1934269f8f3a7a065318ec9a4e7984c0d9216332,2021-01-26T17:25:00Z,heat,hold,622,650,622,AK,Wasilla,0,False,False,False,Gas
4,1934269f8f3a7a065318ec9a4e7984c0d9216332,2021-01-27T18:35:00Z,heat,hold,641,682,682,AK,Wasilla,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11593,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2021-01-08T19:25:00Z,heat,hold,682,690,690,AK,Anchorage,35,False,False,False,Gas
11594,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2021-01-08T17:15:00Z,heat,hold,680,690,690,AK,Anchorage,35,False,False,False,Gas
11595,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2021-01-08T18:05:00Z,heat,hold,688,690,690,AK,Anchorage,35,False,False,False,Gas
11596,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2021-01-08T16:15:00Z,heat,hold,683,690,690,AK,Anchorage,35,False,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/AK/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AK/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AK_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AK/jan/" + file)
    AK_jan = pd.concat([AK_jan, df])
    
AK_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,14a6f6af23d454bda9e5b09021f63af861c9ab83,Jan,2017,heat,auto,Anchorage,642.000000,806.000000,628.000000,20.0,False,False,False
1,18c482b133fba9f05f7edac61214d7cd1082ff1a,Jan,2017,heat,auto,Anchorage,642.055556,780.000000,650.000000,5.0,False,False,False
2,364d90d4305e64cf8a54685a3669aca9ba7667ed,Jan,2017,heat,hold,Palmer,644.954545,710.000000,710.000000,0.0,False,False,False
3,6085162893ee544ece3ce04e242a68d7a8afe24d,Jan,2017,heat,auto,Russian Jack Park,629.122517,654.688742,634.831126,20.0,False,False,False
4,6085162893ee544ece3ce04e242a68d7a8afe24d,Jan,2017,heat,hold,Russian Jack Park,619.231492,650.071680,620.521739,20.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
17,bdc58f9e6d012d051afe58172dc008bebf29a356,Jan,2021,heat,hold,Huffman/O'Malley,702.300000,708.233333,706.500000,40.0,False,False,False
18,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,Jan,2021,heat,hold,Wasilla,528.027778,654.166667,642.500000,5.0,False,False,False
19,e189043abcdc363a66270079341e45b9be50fe29,Jan,2021,heat,hold,Anchorage,651.500000,664.000000,660.000000,30.0,False,False,False
20,e607ef283e0f21c02dcd7df409ee3e764d229b79,Jan,2021,heat,hold,Juneau,688.586387,717.314136,716.219895,50.0,False,False,False


In [34]:
AK_jan.to_csv("Scraper_Output/State_Month_Day/AK/AK_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/AK-day/2017-feb-day-AK.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,364d90d4305e64cf8a54685a3669aca9ba7667ed,2017-02-04T19:05:00Z,heat,hold,629,650,620,AK,Palmer,0,False,False,False,Gas
1,364d90d4305e64cf8a54685a3669aca9ba7667ed,2017-02-04T19:45:00Z,heat,hold,620,650,620,AK,Palmer,0,False,False,False,Gas
2,364d90d4305e64cf8a54685a3669aca9ba7667ed,2017-02-04T19:35:00Z,heat,hold,623,650,620,AK,Palmer,0,False,False,False,Gas
3,364d90d4305e64cf8a54685a3669aca9ba7667ed,2017-02-04T19:40:00Z,heat,hold,621,650,620,AK,Palmer,0,False,False,False,Gas
4,364d90d4305e64cf8a54685a3669aca9ba7667ed,2017-02-04T18:50:00Z,heat,hold,634,650,620,AK,Palmer,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6178,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-02-21T10:30:00Z,heat,hold,700,740,740,AK,Anchorage,0,False,False,False,Gas
6179,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-02-21T10:50:00Z,heat,hold,704,740,740,AK,Anchorage,0,False,False,False,Gas
6180,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-02-21T11:10:00Z,heat,hold,708,740,740,AK,Anchorage,0,False,False,False,Gas
6181,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-02-21T11:00:00Z,heat,hold,707,740,740,AK,Anchorage,0,False,False,False,Gas


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/AK/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/AK-day/2018-feb-day-AK.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,31ec2a66ddeac26df7dbbeffa7f98c4e2de26aff,2018-02-09T07:05:00Z,heat,auto,689,721,683,AK,Anchorage,25,False,False,False,Gas
1,dccf5d5620ccd2fb708408998d809914ea61e341,2018-02-18T08:20:00Z,heat,auto,676,720,617,AK,Anchorage,45,False,False,False,Gas
2,31ec2a66ddeac26df7dbbeffa7f98c4e2de26aff,2018-02-17T07:00:00Z,heat,hold,671,750,673,AK,Anchorage,25,False,False,False,Gas
3,bd4080916565ff1b12e94f584799f455347b5815,2018-02-03T07:00:00Z,heat,hold,752,771,749,AK,Palmer,8,False,False,False,Gas
4,a799538555e2fb23d035a9aa0004122110571484,2018-02-21T16:15:00Z,heat,auto,695,780,693,AK,Anchorage,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12581,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2018-02-01T19:50:00Z,heat,auto,741,740,740,AK,Wasilla,5,False,False,False,Gas
12582,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2018-02-04T16:45:00Z,heat,auto,742,740,740,AK,Wasilla,5,False,False,False,Gas
12583,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2018-02-04T18:25:00Z,heat,auto,741,740,740,AK,Wasilla,5,False,False,False,Gas
12584,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2018-02-02T07:25:00Z,heat,auto,734,740,740,AK,Wasilla,5,False,False,False,Gas


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/AK/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/AK-day/2019-feb-day-AK.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bdc58f9e6d012d051afe58172dc008bebf29a356,2019-02-03T08:25:00Z,heat,hold,704,728,718,AK,Huffman/O'Malley,40,False,False,False,Gas
2,de96ad05755fc1e2697dff5f2548474733554565,2019-02-17T07:20:00Z,heat,hold,678,673,646,AK,Anchorage,30,False,False,False,Gas
11,de96ad05755fc1e2697dff5f2548474733554565,2019-02-24T08:25:00Z,heat,hold,616,695,605,AK,Anchorage,30,False,False,False,Gas
13,a799538555e2fb23d035a9aa0004122110571484,2019-02-14T18:00:00Z,heat,hold,649,747,677,AK,Anchorage,40,False,False,False,Gas
16,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2019-02-02T18:45:00Z,heat,hold,694,726,693,AK,Homer,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14691,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2019-02-22T07:45:00Z,heat,auto,733,740,740,AK,Wasilla,5,False,False,False,Gas
14692,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2019-02-23T19:15:00Z,heat,auto,732,740,740,AK,Wasilla,5,False,False,False,Gas
14693,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2019-02-22T17:15:00Z,heat,auto,741,740,740,AK,Wasilla,5,False,False,False,Gas
14694,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2019-02-21T15:45:00Z,heat,auto,728,740,740,AK,Wasilla,5,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/AK/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/AK-day/2020-feb-day-AK.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a46459975fa2d0e4a3736887ad3a0e3963c8feaf,2020-02-16T07:25:00Z,heat,hold,701,794,671,AK,Juneau,50,False,False,False,Gas
1,cd0c6dacc16ca9fd87a2258af6fe8291f9279be7,2020-02-03T08:25:00Z,heat,hold,705,764,674,AK,Anchorage,15,False,False,False,Gas
2,cd0c6dacc16ca9fd87a2258af6fe8291f9279be7,2020-02-24T08:25:00Z,heat,auto,698,800,657,AK,Anchorage,15,False,False,False,Gas
3,cd0c6dacc16ca9fd87a2258af6fe8291f9279be7,2020-02-23T07:25:00Z,heat,hold,708,694,688,AK,Anchorage,15,False,False,False,Gas
4,cd0c6dacc16ca9fd87a2258af6fe8291f9279be7,2020-02-02T08:25:00Z,heat,auto,708,768,671,AK,Anchorage,15,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12875,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2020-02-02T07:30:00Z,heat,auto,732,720,740,AK,Wasilla,5,False,False,False,Gas
12876,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2020-02-02T07:40:00Z,heat,auto,728,720,740,AK,Wasilla,5,False,False,False,Gas
12877,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2020-02-02T07:35:00Z,heat,auto,726,720,740,AK,Wasilla,5,False,False,False,Gas
12878,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2020-02-02T08:10:00Z,heat,auto,739,720,740,AK,Wasilla,5,False,False,False,Gas


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/AK/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/AK-day/2021-feb-day-AK.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
423,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2021-02-24T16:45:00Z,heat,hold,627,650,630,AK,Anchorage,35,False,False,False,Gas
424,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2021-02-24T16:15:00Z,heat,hold,630,650,630,AK,Anchorage,35,False,False,False,Gas
425,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2021-02-24T16:10:00Z,heat,hold,633,650,630,AK,Anchorage,35,False,False,False,Gas
426,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2021-02-24T16:40:00Z,heat,hold,631,650,630,AK,Anchorage,35,False,False,False,Gas
427,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2021-02-24T16:55:00Z,heat,hold,624,650,630,AK,Anchorage,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11873,e607ef283e0f21c02dcd7df409ee3e764d229b79,2021-02-12T16:50:00Z,heat,hold,668,740,740,AK,Juneau,50,False,False,False,Gas
11874,e607ef283e0f21c02dcd7df409ee3e764d229b79,2021-02-14T19:00:00Z,heat,hold,660,740,740,AK,Juneau,50,False,False,False,Gas
11875,e607ef283e0f21c02dcd7df409ee3e764d229b79,2021-02-10T17:55:00Z,heat,hold,633,740,740,AK,Juneau,50,False,False,False,Gas
11876,e607ef283e0f21c02dcd7df409ee3e764d229b79,2021-02-14T17:25:00Z,heat,hold,668,740,740,AK,Juneau,50,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/AK/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AK/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AK_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AK/feb/" + file)
    AK_feb = pd.concat([AK_feb, df])
    
AK_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0404ace1bcdf88d917bd860a04cd6c66fb51679f,feb,2017,heat,auto,Homer,683.000000,710.000000,677.000000,0.0,True,False,False
1,0404ace1bcdf88d917bd860a04cd6c66fb51679f,feb,2017,heat,hold,Homer,685.013699,686.835616,686.109589,0.0,True,False,False
2,18c482b133fba9f05f7edac61214d7cd1082ff1a,feb,2017,heat,auto,Anchorage,629.166667,780.000000,650.000000,5.0,False,False,False
3,364d90d4305e64cf8a54685a3669aca9ba7667ed,feb,2017,heat,auto,Palmer,593.161905,759.380952,655.857143,0.0,False,False,False
4,364d90d4305e64cf8a54685a3669aca9ba7667ed,feb,2017,heat,hold,Palmer,628.294118,650.000000,620.000000,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12,bdc58f9e6d012d051afe58172dc008bebf29a356,feb,2021,heat,hold,Huffman/O'Malley,693.233333,695.500000,694.733333,40.0,False,False,False
13,cd0c6dacc16ca9fd87a2258af6fe8291f9279be7,feb,2021,heat,hold,Anchorage,676.222222,683.666667,679.055556,15.0,False,False,False
14,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,feb,2021,heat,hold,Wasilla,620.636364,659.272727,657.090909,5.0,False,False,False
15,e607ef283e0f21c02dcd7df409ee3e764d229b79,feb,2021,heat,hold,Juneau,671.847737,727.781893,726.921811,50.0,False,False,False


In [67]:
AK_feb.to_csv("Scraper_Output/State_Month_Day/AK/AK_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/AK-day/2017-jun-day-AK.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2017-06-24T19:15:00Z,heat,hold,672,650,630,AK,Homer,0,True,False,False,Gas
1,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2017-06-24T19:00:00Z,heat,hold,676,650,630,AK,Homer,0,True,False,False,Gas
2,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2017-06-24T18:35:00Z,heat,hold,678,650,630,AK,Homer,0,True,False,False,Gas
3,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2017-06-24T19:05:00Z,heat,hold,675,650,630,AK,Homer,0,True,False,False,Gas
4,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2017-06-24T19:35:00Z,heat,hold,672,650,630,AK,Homer,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5626,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2017-06-27T13:35:00Z,heat,hold,717,690,690,AK,juneau,27,False,False,False,Gas
5627,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2017-06-27T10:45:00Z,heat,hold,709,690,690,AK,juneau,27,False,False,False,Gas
5628,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2017-06-27T12:25:00Z,heat,hold,691,690,690,AK,juneau,27,False,False,False,Gas
5629,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2017-06-27T14:00:00Z,heat,hold,712,690,690,AK,juneau,27,False,False,False,Gas


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/AK/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/AK-day/2018-jun-day-AK.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2018-06-29T17:10:00Z,heat,hold,685,690,678,AK,Homer,0,True,False,False,Gas
2,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2018-06-13T18:15:00Z,heat,hold,657,767,655,AK,Homer,0,True,False,False,Gas
3,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2018-06-23T19:20:00Z,heat,hold,678,720,672,AK,Homer,0,True,False,False,Gas
4,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2018-06-16T18:55:00Z,heat,hold,677,681,669,AK,Homer,0,True,False,False,Gas
5,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2018-06-08T18:30:00Z,heat,hold,673,663,633,AK,Homer,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10747,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2018-06-01T15:10:00Z,heat,hold,653,680,680,AK,Anchorage,35,False,False,False,Gas
10748,b2926c336f48d0bdbcd5343bb68676b3a9364748,2018-06-16T17:05:00Z,heat,hold,698,680,680,AK,Healy,10,False,False,False,Gas
10749,b2926c336f48d0bdbcd5343bb68676b3a9364748,2018-06-16T17:15:00Z,heat,hold,690,680,680,AK,Healy,10,False,False,False,Gas
10750,b2926c336f48d0bdbcd5343bb68676b3a9364748,2018-06-16T17:10:00Z,heat,hold,693,680,680,AK,Healy,10,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/AK/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/AK-day/2019-jun-day-AK.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
77,4cf15a20ba9d9a39d09e7b978b351754c3dfbe6e,2019-06-01T17:55:00Z,heat,auto,615,780,620,AK,Anchorage,15,True,False,False,Gas
78,4cf15a20ba9d9a39d09e7b978b351754c3dfbe6e,2019-06-01T16:30:00Z,heat,auto,616,780,620,AK,Anchorage,15,True,False,False,Gas
79,4cf15a20ba9d9a39d09e7b978b351754c3dfbe6e,2019-06-01T19:15:00Z,heat,auto,618,780,620,AK,Anchorage,15,True,False,False,Gas
80,4cf15a20ba9d9a39d09e7b978b351754c3dfbe6e,2019-06-01T16:35:00Z,heat,auto,616,780,620,AK,Anchorage,15,True,False,False,Gas
81,4cf15a20ba9d9a39d09e7b978b351754c3dfbe6e,2019-06-01T18:20:00Z,heat,auto,615,780,620,AK,Anchorage,15,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7056,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2019-06-12T15:55:00Z,heat,hold,660,690,690,AK,juneau,27,False,False,False,Gas
7057,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2019-06-12T15:05:00Z,heat,hold,668,690,690,AK,juneau,27,False,False,False,Gas
7058,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2019-06-12T15:00:00Z,heat,hold,671,690,690,AK,juneau,27,False,False,False,Gas
7059,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2019-06-12T15:35:00Z,heat,hold,662,690,690,AK,juneau,27,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/AK/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/AK-day/2020-jun-day-AK.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
470,1934269f8f3a7a065318ec9a4e7984c0d9216332,2020-06-16T13:25:00Z,heat,hold,700,650,620,AK,Wasilla,0,False,False,False,Gas
471,1934269f8f3a7a065318ec9a4e7984c0d9216332,2020-06-16T13:35:00Z,heat,hold,698,650,620,AK,Wasilla,0,False,False,False,Gas
472,1934269f8f3a7a065318ec9a4e7984c0d9216332,2020-06-16T13:10:00Z,heat,hold,700,650,620,AK,Wasilla,0,False,False,False,Gas
473,1934269f8f3a7a065318ec9a4e7984c0d9216332,2020-06-16T13:55:00Z,heat,hold,697,650,620,AK,Wasilla,0,False,False,False,Gas
474,1934269f8f3a7a065318ec9a4e7984c0d9216332,2020-06-16T13:30:00Z,heat,hold,699,650,620,AK,Wasilla,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10478,e607ef283e0f21c02dcd7df409ee3e764d229b79,2020-06-26T17:05:00Z,heat,hold,732,730,730,AK,Juneau,50,False,False,False,Gas
10479,e607ef283e0f21c02dcd7df409ee3e764d229b79,2020-06-25T19:45:00Z,heat,hold,714,730,730,AK,Juneau,50,False,False,False,Gas
10480,e607ef283e0f21c02dcd7df409ee3e764d229b79,2020-06-25T19:25:00Z,heat,hold,721,730,730,AK,Juneau,50,False,False,False,Gas
10481,e607ef283e0f21c02dcd7df409ee3e764d229b79,2020-06-26T16:45:00Z,heat,hold,724,730,730,AK,Juneau,50,False,False,False,Gas


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/AK/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/AK-day/2021-jun-day-AK.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,bdc58f9e6d012d051afe58172dc008bebf29a356,2021-06-09T07:25:00Z,heat,hold,680,696,666,AK,Huffman/O'Malley,40,False,False,False,Gas
1,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2021-06-11T17:50:00Z,heat,hold,712,715,715,AK,Anchorage,20,False,False,False,Gas
2,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2021-06-11T19:20:00Z,heat,hold,712,715,715,AK,Anchorage,20,False,False,False,Gas
3,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2021-06-11T19:05:00Z,heat,hold,715,715,715,AK,Anchorage,20,False,False,False,Gas
4,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2021-06-18T19:55:00Z,heat,hold,712,715,715,AK,Anchorage,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7057,e607ef283e0f21c02dcd7df409ee3e764d229b79,2021-06-03T15:15:00Z,heat,hold,693,720,720,AK,Juneau,50,False,False,False,Gas
7058,e607ef283e0f21c02dcd7df409ee3e764d229b79,2021-06-03T15:35:00Z,heat,hold,697,720,720,AK,Juneau,50,False,False,False,Gas
7059,e607ef283e0f21c02dcd7df409ee3e764d229b79,2021-06-03T15:50:00Z,heat,hold,700,720,720,AK,Juneau,50,False,False,False,Gas
7060,e607ef283e0f21c02dcd7df409ee3e764d229b79,2021-06-03T15:25:00Z,heat,hold,695,720,720,AK,Juneau,50,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/AK/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AK/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AK_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AK/jun/" + file)
    AK_jun = pd.concat([AK_jun, df])
    
AK_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0404ace1bcdf88d917bd860a04cd6c66fb51679f,jun,2017,heat,auto,Homer,647.000000,747.000000,637.000000,0.0,True,False,False
1,0404ace1bcdf88d917bd860a04cd6c66fb51679f,jun,2017,heat,hold,Homer,668.244094,667.244094,660.929134,0.0,True,False,False
2,1544f3165814ae017488964fc29ae302bba75bb7,jun,2017,heat,auto,juneau,692.543478,681.369565,680.652174,27.0,False,False,False
3,1544f3165814ae017488964fc29ae302bba75bb7,jun,2017,heat,hold,juneau,693.415094,689.490566,688.471698,27.0,False,False,False
4,364d90d4305e64cf8a54685a3669aca9ba7667ed,jun,2017,heat,auto,Palmer,705.428571,736.285714,710.571429,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,995e1c96a74ee2cb3509db751eb6310b63e5aa94,jun,2021,heat,hold,Anchorage,680.368490,672.600260,656.790365,35.0,False,False,False
11,bdc58f9e6d012d051afe58172dc008bebf29a356,jun,2021,heat,hold,Huffman/O'Malley,679.000000,674.333333,669.333333,40.0,False,False,False
12,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,jun,2021,heat,hold,juneau,702.277778,653.194444,653.166667,27.0,False,False,False
13,e189043abcdc363a66270079341e45b9be50fe29,jun,2021,heat,hold,Anchorage,650.250000,780.250000,649.062500,30.0,False,False,False


In [100]:
AK_jun.to_csv("Scraper_Output/State_Month_Day/AK/AK_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/AK-day/2017-jul-day-AK.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2017-07-26T18:05:00Z,heat,hold,671,741,657,AK,Homer,0,True,False,False,Gas
1,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2017-07-29T17:20:00Z,heat,hold,673,752,655,AK,Homer,0,True,False,False,Gas
2,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2017-07-13T17:50:00Z,heat,hold,709,702,638,AK,Homer,0,True,False,False,Gas
3,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2017-07-17T15:40:00Z,heat,hold,656,714,655,AK,Homer,0,True,False,False,Gas
4,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2017-07-09T19:00:00Z,heat,hold,676,720,668,AK,Homer,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6184,364d90d4305e64cf8a54685a3669aca9ba7667ed,2017-07-04T15:50:00Z,heat,auto,704,740,740,AK,Palmer,0,False,False,False,Gas
6185,364d90d4305e64cf8a54685a3669aca9ba7667ed,2017-07-04T15:45:00Z,heat,auto,698,740,740,AK,Palmer,0,False,False,False,Gas
6186,364d90d4305e64cf8a54685a3669aca9ba7667ed,2017-07-04T15:55:00Z,heat,auto,708,740,740,AK,Palmer,0,False,False,False,Gas
6187,364d90d4305e64cf8a54685a3669aca9ba7667ed,2017-07-04T15:50:00Z,heat,auto,704,740,740,AK,Palmer,0,False,False,False,Gas


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/AK/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/AK-day/2018-jul-day-AK.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
168,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,2018-07-14T18:30:00Z,heat,hold,634,650,610,AK,Anchorage,0,False,False,False,Gas
169,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,2018-07-28T16:15:00Z,heat,hold,663,650,610,AK,Anchorage,0,False,False,False,Gas
170,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,2018-07-29T17:10:00Z,heat,hold,667,650,610,AK,Anchorage,0,False,False,False,Gas
171,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,2018-07-17T14:50:00Z,heat,hold,655,650,610,AK,Anchorage,0,False,False,False,Gas
172,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,2018-07-26T14:55:00Z,heat,hold,666,650,610,AK,Anchorage,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9840,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2018-07-30T11:50:00Z,heat,auto,683,720,690,AK,juneau,27,False,False,False,Gas
9841,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2018-07-30T11:55:00Z,heat,auto,683,720,690,AK,juneau,27,False,False,False,Gas
9842,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2018-07-29T11:45:00Z,heat,auto,684,720,690,AK,juneau,27,False,False,False,Gas
9843,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2018-07-13T10:25:00Z,heat,auto,674,690,690,AK,juneau,27,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/AK/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/AK-day/2019-jul-day-AK.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
66,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2019-07-28T19:25:00Z,heat,hold,678,687,676,AK,Anchorage,20,False,False,False,Gas
67,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2019-07-25T17:05:00Z,heat,hold,702,732,688,AK,Anchorage,20,False,False,False,Gas
68,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2019-07-28T18:00:00Z,heat,hold,658,689,678,AK,Anchorage,20,False,False,False,Gas
69,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2019-07-11T17:45:00Z,heat,hold,698,792,627,AK,Anchorage,20,False,False,False,Gas
70,68a1a0651c36267368651a504b1a50737727928a,2019-07-16T17:15:00Z,heat,auto,697,792,656,AK,Anchorage,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10286,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2019-07-29T19:05:00Z,heat,auto,684,710,680,AK,Wasilla,5,False,False,False,Gas
10287,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2019-07-20T15:05:00Z,heat,auto,727,710,680,AK,Wasilla,5,False,False,False,Gas
10288,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2019-07-31T15:00:00Z,heat,auto,675,710,680,AK,Wasilla,5,False,False,False,Gas
10289,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,2019-07-12T16:25:00Z,heat,auto,697,710,680,AK,Wasilla,5,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/AK/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/AK-day/2020-jul-day-AK.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
48,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2020-07-03T16:50:00Z,heat,hold,698,752,620,AK,Anchorage,20,False,False,False,Gas
49,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2020-07-06T19:10:00Z,heat,hold,704,795,630,AK,Anchorage,20,False,False,False,Gas
50,a988392d2cf43e32b9b458db27ccb5cc50f335ac,2020-07-09T18:35:00Z,heat,auto,638,650,640,AK,Anchorage,30,False,False,False,Gas
51,a988392d2cf43e32b9b458db27ccb5cc50f335ac,2020-07-09T18:45:00Z,heat,auto,636,650,640,AK,Anchorage,30,False,False,False,Gas
52,a988392d2cf43e32b9b458db27ccb5cc50f335ac,2020-07-09T18:30:00Z,heat,auto,634,650,640,AK,Anchorage,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5157,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2020-07-14T19:30:00Z,heat,hold,730,740,740,AK,Anchorage,35,False,False,False,Gas
5158,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2020-07-21T17:20:00Z,heat,hold,732,740,740,AK,Anchorage,35,False,False,False,Gas
5159,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2020-07-07T16:05:00Z,heat,hold,738,740,740,AK,Anchorage,35,False,False,False,Gas
5160,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2020-07-08T18:40:00Z,heat,hold,742,740,740,AK,Anchorage,35,False,False,False,Gas


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/AK/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/AK-day/2021-jul-day-AK.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2021-07-28T19:15:00Z,heat,hold,716,715,715,AK,Anchorage,20,False,False,False,Gas
1,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2021-07-28T18:00:00Z,heat,hold,710,715,715,AK,Anchorage,20,False,False,False,Gas
2,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2021-07-28T19:20:00Z,heat,hold,715,715,715,AK,Anchorage,20,False,False,False,Gas
3,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2021-07-23T17:50:00Z,heat,hold,713,715,715,AK,Anchorage,20,False,False,False,Gas
4,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2021-07-06T19:45:00Z,heat,hold,697,715,715,AK,Anchorage,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6138,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2021-07-28T19:05:00Z,heat,hold,683,690,690,AK,Anchorage,35,False,False,False,Gas
6139,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2021-07-28T17:10:00Z,heat,hold,685,690,690,AK,Anchorage,35,False,False,False,Gas
6140,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2021-07-28T19:10:00Z,heat,hold,682,690,690,AK,Anchorage,35,False,False,False,Gas
6141,995e1c96a74ee2cb3509db751eb6310b63e5aa94,2021-07-28T18:15:00Z,heat,hold,686,690,690,AK,Anchorage,35,False,False,False,Gas


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/AK/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AK/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AK_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AK/jul/" + file)
    AK_jul = pd.concat([AK_jul, df])
    
AK_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0404ace1bcdf88d917bd860a04cd6c66fb51679f,jul,2017,heat,hold,Homer,675.649123,673.70614,668.429825,0.0,True,False,False
1,1544f3165814ae017488964fc29ae302bba75bb7,jul,2017,heat,hold,juneau,694.796296,690.0,690.0,27.0,False,False,False
2,364d90d4305e64cf8a54685a3669aca9ba7667ed,jul,2017,heat,auto,Palmer,689.735294,705.0,704.411765,0.0,False,False,False
3,364d90d4305e64cf8a54685a3669aca9ba7667ed,jul,2017,heat,hold,Palmer,694.370968,659.693548,652.322581,0.0,False,False,False
4,bdad2c2f85c6d00c7ceea6df8246135cb8da01ae,jul,2017,heat,auto,Anchorage,693.472906,780.083744,619.871921,40.0,False,False,False
5,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,jul,2017,heat,auto,Anchorage,698.142857,700.0,700.0,0.0,False,False,False
6,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,jul,2017,heat,hold,Anchorage,739.740741,740.0,740.0,0.0,False,False,False
7,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,jul,2017,heat,auto,Wasilla,692.985644,650.0,650.0,5.0,False,False,False
8,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,jul,2017,heat,hold,juneau,687.289157,682.771084,682.771084,27.0,False,False,False
9,fda1f67e8423ed49933c45617ee96aef3bae04ba,jul,2017,heat,hold,Wasilla,679.433362,652.52831,644.569251,0.0,False,False,False


In [133]:
AK_jul.to_csv("Scraper_Output/State_Month_Day/AK/AK_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/AK-day/2017-aug-day-AK.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-08-06T07:00:00Z,heat,hold,729,773,695,AK,Anchorage,0,False,False,False,Gas
1,31ec2a66ddeac26df7dbbeffa7f98c4e2de26aff,2017-08-04T15:40:00Z,heat,auto,693,712,692,AK,Anchorage,25,False,False,False,Gas
2,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-08-21T08:40:00Z,heat,auto,721,726,718,AK,Anchorage,0,False,False,False,Gas
3,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-08-05T19:00:00Z,heat,hold,716,721,659,AK,Anchorage,0,False,False,False,Gas
4,0404ace1bcdf88d917bd860a04cd6c66fb51679f,2017-08-30T18:05:00Z,heat,hold,671,775,651,AK,Homer,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6018,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-08-01T07:40:00Z,heat,hold,733,740,740,AK,Anchorage,0,False,False,False,Gas
6019,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-08-01T16:25:00Z,heat,hold,738,740,740,AK,Anchorage,0,False,False,False,Gas
6020,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-08-01T15:40:00Z,heat,hold,734,740,740,AK,Anchorage,0,False,False,False,Gas
6021,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-08-01T07:10:00Z,heat,hold,736,740,740,AK,Anchorage,0,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/AK/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/AK-day/2018-aug-day-AK.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,2018-08-05T19:15:00Z,heat,hold,688,650,610,AK,Anchorage,0,False,False,False,Gas
1,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,2018-08-04T16:00:00Z,heat,hold,682,650,610,AK,Anchorage,0,False,False,False,Gas
2,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,2018-08-05T16:40:00Z,heat,hold,690,650,610,AK,Anchorage,0,False,False,False,Gas
3,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,2018-08-05T16:45:00Z,heat,hold,690,650,610,AK,Anchorage,0,False,False,False,Gas
4,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,2018-08-03T14:50:00Z,heat,hold,670,650,610,AK,Anchorage,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12786,e189043abcdc363a66270079341e45b9be50fe29,2018-08-31T17:15:00Z,heat,hold,658,690,690,AK,Anchorage,30,False,False,False,Gas
12787,e189043abcdc363a66270079341e45b9be50fe29,2018-08-31T17:30:00Z,heat,hold,661,690,690,AK,Anchorage,30,False,False,False,Gas
12788,e189043abcdc363a66270079341e45b9be50fe29,2018-08-31T17:20:00Z,heat,hold,658,690,690,AK,Anchorage,30,False,False,False,Gas
12789,e189043abcdc363a66270079341e45b9be50fe29,2018-08-31T17:25:00Z,heat,hold,658,690,690,AK,Anchorage,30,False,False,False,Gas


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/AK/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/AK-day/2019-aug-day-AK.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3c569abab42071063670722a0b326b0da8738423,2019-08-20T15:25:00Z,heat,hold,707,714,698,AK,Anchorage,25,False,False,False,Gas
1,3c569abab42071063670722a0b326b0da8738423,2019-08-20T15:55:00Z,heat,hold,700,781,669,AK,Anchorage,25,False,False,False,Gas
2,68a1a0651c36267368651a504b1a50737727928a,2019-08-27T17:10:00Z,heat,auto,695,804,632,AK,Anchorage,40,False,False,False,Gas
3,68a1a0651c36267368651a504b1a50737727928a,2019-08-22T19:25:00Z,heat,auto,683,802,636,AK,Anchorage,40,False,False,False,Gas
4,68a1a0651c36267368651a504b1a50737727928a,2019-08-20T16:05:00Z,heat,auto,684,806,628,AK,Anchorage,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9874,b2926c336f48d0bdbcd5343bb68676b3a9364748,2019-08-18T17:25:00Z,heat,hold,666,690,690,AK,Healy,10,False,False,False,Gas
9875,b2926c336f48d0bdbcd5343bb68676b3a9364748,2019-08-18T17:35:00Z,heat,hold,676,690,690,AK,Healy,10,False,False,False,Gas
9876,b2926c336f48d0bdbcd5343bb68676b3a9364748,2019-08-22T15:55:00Z,heat,hold,687,690,690,AK,Healy,10,False,False,False,Gas
9877,b2926c336f48d0bdbcd5343bb68676b3a9364748,2019-08-22T14:50:00Z,heat,hold,677,690,690,AK,Healy,10,False,False,False,Gas


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/AK/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/AK-day/2020-aug-day-AK.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2020-08-06T19:25:00Z,heat,hold,715,715,715,AK,Anchorage,20,False,False,False,Gas
1,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2020-08-06T17:55:00Z,heat,hold,704,715,715,AK,Anchorage,20,False,False,False,Gas
2,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2020-08-11T19:05:00Z,heat,hold,681,776,644,AK,Anchorage,20,False,False,False,Gas
3,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2020-08-06T18:15:00Z,heat,hold,713,715,715,AK,Anchorage,20,False,False,False,Gas
4,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,2020-08-06T18:35:00Z,heat,hold,714,715,715,AK,Anchorage,20,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7777,e607ef283e0f21c02dcd7df409ee3e764d229b79,2020-08-12T18:05:00Z,heat,hold,710,690,690,AK,Juneau,50,False,False,False,Gas
7778,e607ef283e0f21c02dcd7df409ee3e764d229b79,2020-08-12T17:45:00Z,heat,hold,708,690,690,AK,Juneau,50,False,False,False,Gas
7779,e607ef283e0f21c02dcd7df409ee3e764d229b79,2020-08-12T17:40:00Z,heat,hold,706,690,690,AK,Juneau,50,False,False,False,Gas
7780,e607ef283e0f21c02dcd7df409ee3e764d229b79,2020-08-12T18:30:00Z,heat,hold,705,690,690,AK,Juneau,50,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/AK/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AK/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AK_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AK/aug/" + file)
    AK_aug = pd.concat([AK_aug, df])
    
AK_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0404ace1bcdf88d917bd860a04cd6c66fb51679f,aug,2017,heat,auto,Homer,669.857143,684.642857,682.285714,0.0,True,False,False
1,0404ace1bcdf88d917bd860a04cd6c66fb51679f,aug,2017,heat,hold,Homer,666.275641,673.346154,669.134615,0.0,True,False,False
2,1544f3165814ae017488964fc29ae302bba75bb7,aug,2017,heat,auto,juneau,690.291667,690.000000,690.000000,27.0,False,False,False
3,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,aug,2017,heat,auto,Anchorage,689.200000,688.500000,670.900000,0.0,False,False,False
4,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,aug,2017,heat,hold,Anchorage,689.015873,650.000000,630.492063,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
15,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,aug,2020,heat,auto,juneau,693.087500,720.000000,698.500000,27.0,False,False,False
16,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,aug,2020,heat,hold,juneau,682.761905,690.000000,690.000000,27.0,False,False,False
17,e189043abcdc363a66270079341e45b9be50fe29,aug,2020,heat,hold,Anchorage,611.307692,660.000000,660.000000,30.0,False,False,False
18,e607ef283e0f21c02dcd7df409ee3e764d229b79,aug,2020,heat,hold,Juneau,705.327869,707.950820,707.434426,50.0,False,False,False


In [160]:
AK_aug.to_csv("Scraper_Output/State_Month_Day/AK/AK_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/AK-day/2017-dec-day-AK.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,391a6ce2e4f243b7965dfc6004d6e8fc4cc7936e,2017-12-19T16:20:00Z,heat,hold,654,691,666,AK,Palmer,25,False,False,False,Gas
1,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,2017-12-15T09:40:00Z,heat,auto,664,686,674,AK,Anchorage,0,False,False,False,Gas
2,391a6ce2e4f243b7965dfc6004d6e8fc4cc7936e,2017-12-22T07:50:00Z,heat,auto,661,650,609,AK,Palmer,25,False,False,False,Gas
3,391a6ce2e4f243b7965dfc6004d6e8fc4cc7936e,2017-12-22T07:20:00Z,heat,hold,658,677,659,AK,Palmer,25,False,False,False,Gas
4,18c482b133fba9f05f7edac61214d7cd1082ff1a,2017-12-09T17:55:00Z,heat,hold,672,695,678,AK,Anchorage,5,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11703,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2017-12-28T11:50:00Z,heat,hold,722,730,730,AK,juneau,27,False,False,False,Gas
11704,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2017-12-28T13:20:00Z,heat,hold,727,730,730,AK,juneau,27,False,False,False,Gas
11705,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2017-12-28T11:40:00Z,heat,hold,726,730,730,AK,juneau,27,False,False,False,Gas
11706,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,2017-12-28T12:35:00Z,heat,hold,724,730,730,AK,juneau,27,False,False,False,Gas


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/AK/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/AK-day/2018-dec-day-AK.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a799538555e2fb23d035a9aa0004122110571484,2018-12-26T19:05:00Z,heat,hold,688,736,677,AK,Anchorage,40,False,False,False,Gas
1,bdc58f9e6d012d051afe58172dc008bebf29a356,2018-12-15T08:05:00Z,heat,hold,696,727,727,AK,Huffman/O'Malley,40,False,False,False,Gas
2,31ec2a66ddeac26df7dbbeffa7f98c4e2de26aff,2018-12-05T07:20:00Z,heat,hold,695,740,685,AK,Anchorage,25,False,False,False,Gas
3,bdc58f9e6d012d051afe58172dc008bebf29a356,2018-12-04T15:55:00Z,heat,auto,700,700,668,AK,Huffman/O'Malley,40,False,False,False,Gas
4,bdc58f9e6d012d051afe58172dc008bebf29a356,2018-12-10T14:35:00Z,heat,auto,666,662,662,AK,Huffman/O'Malley,40,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17957,c810efa572eb24b685d4ea14eb98d7f79cac0bd3,2018-12-22T07:10:00Z,heat,hold,674,680,680,AK,North Pole,0,False,False,False,Gas
17958,c810efa572eb24b685d4ea14eb98d7f79cac0bd3,2018-12-23T07:10:00Z,heat,hold,671,680,680,AK,North Pole,0,False,False,False,Gas
17959,c810efa572eb24b685d4ea14eb98d7f79cac0bd3,2018-12-24T07:45:00Z,heat,hold,618,680,680,AK,North Pole,0,False,False,False,Gas
17960,c810efa572eb24b685d4ea14eb98d7f79cac0bd3,2018-12-22T17:30:00Z,heat,hold,676,680,680,AK,North Pole,0,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/AK/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/AK-day/2019-dec-day-AK.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,31ec2a66ddeac26df7dbbeffa7f98c4e2de26aff,2019-12-09T18:30:00Z,heat,hold,667,763,653,AK,Anchorage,25,False,False,False,Gas
1,1934269f8f3a7a065318ec9a4e7984c0d9216332,2019-12-15T19:35:00Z,heat,hold,671,672,672,AK,Wasilla,0,False,False,False,Gas
2,1934269f8f3a7a065318ec9a4e7984c0d9216332,2019-12-15T19:30:00Z,heat,hold,671,672,672,AK,Wasilla,0,False,False,False,Gas
3,1934269f8f3a7a065318ec9a4e7984c0d9216332,2019-12-15T19:45:00Z,heat,hold,671,672,672,AK,Wasilla,0,False,False,False,Gas
4,391a6ce2e4f243b7965dfc6004d6e8fc4cc7936e,2019-12-23T14:00:00Z,heat,hold,697,746,686,AK,Palmer,25,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17913,e607ef283e0f21c02dcd7df409ee3e764d229b79,2019-12-21T18:50:00Z,heat,hold,721,740,740,AK,Juneau,50,False,False,False,Gas
17914,e607ef283e0f21c02dcd7df409ee3e764d229b79,2019-12-21T16:25:00Z,heat,hold,729,740,740,AK,Juneau,50,False,False,False,Gas
17915,e607ef283e0f21c02dcd7df409ee3e764d229b79,2019-12-21T18:45:00Z,heat,hold,721,740,740,AK,Juneau,50,False,False,False,Gas
17916,e607ef283e0f21c02dcd7df409ee3e764d229b79,2019-12-22T19:45:00Z,heat,hold,746,740,740,AK,Juneau,50,False,False,False,Gas


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/AK/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/AK-day/2020-dec-day-AK.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,1934269f8f3a7a065318ec9a4e7984c0d9216332,2020-12-18T18:40:00Z,heat,hold,617,682,682,AK,Wasilla,0,False,False,False,Gas
1,1934269f8f3a7a065318ec9a4e7984c0d9216332,2020-12-18T17:25:00Z,heat,hold,617,682,682,AK,Wasilla,0,False,False,False,Gas
2,1934269f8f3a7a065318ec9a4e7984c0d9216332,2020-12-05T18:35:00Z,heat,hold,648,679,679,AK,Wasilla,0,False,False,False,Gas
3,1934269f8f3a7a065318ec9a4e7984c0d9216332,2020-12-05T18:40:00Z,heat,hold,652,679,679,AK,Wasilla,0,False,False,False,Gas
4,1934269f8f3a7a065318ec9a4e7984c0d9216332,2020-12-18T17:50:00Z,heat,hold,633,682,682,AK,Wasilla,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10903,ea9f45a49e23bc298fc582a4ad9e006b53853255,2020-12-17T15:35:00Z,heat,hold,729,730,730,AK,Bethel,49,False,False,False,Gas
10904,ea9f45a49e23bc298fc582a4ad9e006b53853255,2020-12-17T07:10:00Z,heat,hold,741,730,730,AK,Bethel,49,False,False,False,Gas
10905,ea9f45a49e23bc298fc582a4ad9e006b53853255,2020-12-17T16:55:00Z,heat,hold,725,730,730,AK,Bethel,49,False,False,False,Gas
10906,ea9f45a49e23bc298fc582a4ad9e006b53853255,2020-12-17T15:45:00Z,heat,hold,729,730,730,AK,Bethel,49,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/AK/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/AK/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AK_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/AK/dec/" + file)
    AK_dec = pd.concat([AK_dec, df])
    
AK_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0404ace1bcdf88d917bd860a04cd6c66fb51679f,dec,2017,heat,auto,Homer,668.024390,662.439024,622.219512,0.0,True,False,False
1,0404ace1bcdf88d917bd860a04cd6c66fb51679f,dec,2017,heat,hold,Homer,671.319797,663.700508,655.451777,0.0,True,False,False
2,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,dec,2017,heat,auto,Anchorage,650.607843,665.882353,661.176471,20.0,False,False,False
3,0e0d17a6d14f69ec9ccaa6e561c3d01e409ee183,dec,2017,heat,hold,Anchorage,643.820755,656.962264,646.698113,20.0,False,False,False
4,18c482b133fba9f05f7edac61214d7cd1082ff1a,dec,2017,heat,auto,Anchorage,661.787879,728.787879,650.303030,5.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
18,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,dec,2020,heat,hold,Anchorage,687.800000,678.800000,671.600000,0.0,False,False,False
19,d52d6af22f2bcad8215f8eec1a710dd00e190c7b,dec,2020,heat,hold,Wasilla,645.871795,650.000000,650.000000,5.0,False,False,False
20,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,dec,2020,heat,auto,juneau,681.916667,700.000000,700.000000,27.0,False,False,False
21,e607ef283e0f21c02dcd7df409ee3e764d229b79,dec,2020,heat,hold,Juneau,692.929825,715.684211,714.149123,50.0,False,False,False


In [187]:
AK_dec.to_csv("Scraper_Output/State_Month_Day/AK/AK_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/AK/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
AK_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/AK/" + file)
    AK_all = pd.concat([AK_all, df])
    
AK_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,0404ace1bcdf88d917bd860a04cd6c66fb51679f,aug,2017,heat,auto,Homer,669.857143,684.642857,682.285714,0.0,True,False,False
1,0404ace1bcdf88d917bd860a04cd6c66fb51679f,aug,2017,heat,hold,Homer,666.275641,673.346154,669.134615,0.0,True,False,False
2,1544f3165814ae017488964fc29ae302bba75bb7,aug,2017,heat,auto,juneau,690.291667,690.000000,690.000000,27.0,False,False,False
3,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,aug,2017,heat,auto,Anchorage,689.200000,688.500000,670.900000,0.0,False,False,False
4,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,aug,2017,heat,hold,Anchorage,689.015873,650.000000,630.492063,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,995e1c96a74ee2cb3509db751eb6310b63e5aa94,jun,2021,heat,hold,Anchorage,680.368490,672.600260,656.790365,35.0,False,False,False
78,bdc58f9e6d012d051afe58172dc008bebf29a356,jun,2021,heat,hold,Huffman/O'Malley,679.000000,674.333333,669.333333,40.0,False,False,False
79,df7ad0ff0b54c69d3f2cdeda3b12bff79713d572,jun,2021,heat,hold,juneau,702.277778,653.194444,653.166667,27.0,False,False,False
80,e189043abcdc363a66270079341e45b9be50fe29,jun,2021,heat,hold,Anchorage,650.250000,780.250000,649.062500,30.0,False,False,False


In [190]:
AK_all.to_csv("Scraper_Output/State_Month_Day/AK_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to make sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['AK']
Unique jan_2018: ['AK']
Unique jan_2019: ['AK']
Unique jan_2020: ['AK']
Unique jan_2021: ['AK']
Unique feb_2017: ['AK']
Unique feb_2018: ['AK']
Unique feb_2019: ['AK']
Unique feb_2020: ['AK']
Unique feb_2021: ['AK']
Unique jun_2017: ['AK']
Unique jun_2018: ['AK']
Unique jun_2019: ['AK']
Unique jun_2020: ['AK']
Unique jun_2021: ['AK']
Unique jul_2017: ['AK']
Unique jul_2018: ['AK']
Unique jul_2019: ['AK']
Unique jul_2020: ['AK']
Unique jul_2021: ['AK']
Unique aug_2017: ['AK']
Unique aug_2018: ['AK']
Unique aug_2019: ['AK']
Unique aug_2020: ['AK']
Unique dec_2017: ['AK']
Unique dec_2018: ['AK']
Unique dec_2019: ['AK']
Unique dec_2020: ['AK']
