# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in Pandas by month

4. Combine 4 years

5. Group by Identifier



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---
## January

### 2017 January Day

In [2]:
# Read in month csv for state
jan_2017 = pd.read_csv("../data_large/WV-day/2017-jan-day-WV.csv")

# jan_2017

In [3]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2017.drop(jan_2017[jan_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,5b63c7e159b95b51613a8b3c23a6c29d61629a69,2017-01-06 19:45:00 UTC,auto,auto,714,770,690,WV,Huntington,50,False,False,True,Electric
1,60b34ece75cefdc802847ade67821c7f6203de8e,2017-01-07 18:55:00 UTC,heat,auto,716,690,690,WV,Lewisburg,45,True,False,False,Gas
2,60b34ece75cefdc802847ade67821c7f6203de8e,2017-01-08 15:15:00 UTC,heat,hold,691,690,690,WV,Lewisburg,45,True,False,False,Gas
3,60b34ece75cefdc802847ade67821c7f6203de8e,2017-01-02 18:55:00 UTC,heat,hold,719,690,690,WV,Lewisburg,45,True,False,False,Gas
4,60b34ece75cefdc802847ade67821c7f6203de8e,2017-01-07 18:50:00 UTC,heat,auto,719,690,690,WV,Lewisburg,45,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
39281,b96aae920f5ac753b3e6b08a000584c81f361dc1,2017-01-05 19:35:00 UTC,auto,hold,742,790,740,WV,Inwood,15,True,False,True,Electric
39282,b96aae920f5ac753b3e6b08a000584c81f361dc1,2017-01-02 19:35:00 UTC,auto,hold,738,790,740,WV,Inwood,15,True,False,True,Electric
39283,b96aae920f5ac753b3e6b08a000584c81f361dc1,2017-01-06 18:05:00 UTC,auto,hold,737,790,740,WV,Inwood,15,True,False,True,Electric
39284,b96aae920f5ac753b3e6b08a000584c81f361dc1,2017-01-05 15:10:00 UTC,auto,hold,737,790,740,WV,Inwood,15,True,False,True,Electric


In [4]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

In [5]:
# Rename columns to label the aggregates

jan_2017 = jan_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [6]:
jan_2017_ave = jan_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

jan_2017_ave

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
Identifier,Month,Year,HvacMode,CalendarEvent,City,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
015c5b19f842d6361568129bab20f692ecb5ba98,Jan,2017,heat,auto,South Charleston,679.524510,685.000000,685.000000,10.0,True,False,True
0294595998430fa9228ff8b203c54f8378f9560c,Jan,2017,auto,auto,Huntington,723.051348,738.414634,718.566110,0.0,False,False,False
0294595998430fa9228ff8b203c54f8378f9560c,Jan,2017,auto,hold,Huntington,729.707006,746.159236,726.433121,0.0,False,False,False
0294595998430fa9228ff8b203c54f8378f9560c,Jan,2017,cool,auto,Huntington,715.203046,724.822335,724.822335,0.0,False,False,False
0294595998430fa9228ff8b203c54f8378f9560c,Jan,2017,heat,auto,Huntington,724.652985,729.307836,729.108209,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
f3b865a8ea305b2afa22e5280eed5786274340b3,Jan,2017,auto,auto,Lerona,680.205128,750.000000,680.000000,35.0,False,False,True
f3b865a8ea305b2afa22e5280eed5786274340b3,Jan,2017,auto,hold,Lerona,696.716216,750.000000,699.459459,35.0,False,False,True
ffe671086af78d223bc2967b5d8ce02eeb5dbede,Jan,2017,cool,auto,Huntington,682.321429,723.178571,616.785714,20.0,True,True,True
ffe671086af78d223bc2967b5d8ce02eeb5dbede,Jan,2017,heat,auto,Huntington,676.677419,671.645161,669.951613,20.0,True,True,True


In [7]:
# Export CSV file

jan_2017_ave.to_csv("data/day/WV/jan/jan_2017_ave.csv", header=True, index=True)

### 2018 January Day

In [8]:
# Read in month csv for state
jan_2018 = pd.read_csv("../data_large/WV-day/2018-jan-day-WV.csv")

# jan_2018

In [9]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2018.drop(jan_2018[jan_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,23ea815e8d635bf755b4f45c8594fba04bf97e35,2018-01-31 11:20:00 UTC,auto,auto,684,750,690,WV,Morgantown,0,False,False,False,Gas
1,23ea815e8d635bf755b4f45c8594fba04bf97e35,2018-01-28 16:00:00 UTC,auto,auto,690,750,690,WV,Morgantown,0,False,False,False,Gas
2,23ea815e8d635bf755b4f45c8594fba04bf97e35,2018-01-30 19:50:00 UTC,auto,auto,661,750,690,WV,Morgantown,0,False,False,False,Gas
3,23ea815e8d635bf755b4f45c8594fba04bf97e35,2018-01-18 18:50:00 UTC,auto,auto,685,750,690,WV,Morgantown,0,False,False,False,Gas
4,23ea815e8d635bf755b4f45c8594fba04bf97e35,2018-01-17 18:55:00 UTC,auto,auto,691,750,690,WV,Morgantown,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77538,6b32cedc085398935ffbf4b5490abbbf581d1419,2018-01-31 17:20:00 UTC,heat,hold,708,710,710,WV,Huntington,40,False,False,False,Gas
77539,6b32cedc085398935ffbf4b5490abbbf581d1419,2018-01-30 18:15:00 UTC,heat,hold,706,710,710,WV,Huntington,40,False,False,False,Gas
77540,6b32cedc085398935ffbf4b5490abbbf581d1419,2018-01-29 18:35:00 UTC,heat,hold,714,710,710,WV,Huntington,40,False,False,False,Gas
77541,6b32cedc085398935ffbf4b5490abbbf581d1419,2018-01-31 14:25:00 UTC,heat,hold,705,710,710,WV,Huntington,40,False,False,False,Gas


In [10]:
# Add year and month

jan_2018["Year"] = "2018"
jan_2018["Month"] = "Jan"


In [11]:
# Rename columns to label the aggregates

jan_2018 = jan_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [12]:
jan_2018_ave = jan_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2018_ave

In [13]:
# Export CSV file

jan_2018_ave.to_csv("data/day/WV/jan/jan_2018_ave.csv", header=True, index=True)

### 2019 January Day

In [14]:
# Read in month csv for state
jan_2019 = pd.read_csv("../data_large/WV-day/2019-jan-day-WV.csv")

# jan_2019

In [15]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2019.drop(jan_2019[jan_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,0ba5fcecda158565bcbf42d734064ea47ce7cf88,2019-01-26 12:50:00 UTC,auto,hold,687,790,690,WV,Beckley,0,True,False,False,Gas
1,0ba5fcecda158565bcbf42d734064ea47ce7cf88,2019-01-27 14:55:00 UTC,auto,hold,688,790,690,WV,Beckley,0,True,False,False,Gas
2,0ba5fcecda158565bcbf42d734064ea47ce7cf88,2019-01-23 15:30:00 UTC,auto,hold,695,790,690,WV,Beckley,0,True,False,False,Gas
3,0ba5fcecda158565bcbf42d734064ea47ce7cf88,2019-01-29 19:05:00 UTC,auto,hold,684,790,690,WV,Beckley,0,True,False,False,Gas
4,0ba5fcecda158565bcbf42d734064ea47ce7cf88,2019-01-29 11:30:00 UTC,auto,hold,685,790,690,WV,Beckley,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
145739,0a7f75d6e5e9d240f790eab8780e68ffce49e266,2019-01-20 15:20:00 UTC,auto,hold,685,740,690,WV,Martinsburg,15,False,False,True,Electric
145740,9220dd5caed26e23a03d47e2c1c3840a35f30373,2019-01-06 18:35:00 UTC,heat,auto,688,690,690,WV,Morgantown,45,True,False,True,Electric
145741,9220dd5caed26e23a03d47e2c1c3840a35f30373,2019-01-06 16:05:00 UTC,heat,auto,685,690,690,WV,Morgantown,45,True,False,True,Electric
145742,c6d4ab35cb2e64792ebcbe2067181056840b499d,2019-01-09 11:20:00 UTC,heat,auto,691,740,690,WV,Weirton,45,False,False,False,Gas


In [16]:
# Add year and month

jan_2019["Year"] = "2019"
jan_2019["Month"] = "Jan"


In [17]:
# Rename columns to label the aggregates

jan_2019 = jan_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [18]:
jan_2019_ave = jan_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2019_ave

In [19]:
# Export CSV file

jan_2019_ave.to_csv("data/day/WV/jan/jan_2019_ave.csv", header=True, index=True)

### 2020 January Day

In [20]:
# Read in month csv for state
jan_2020 = pd.read_csv("../data_large/WV-day/2020-jan-day-WV.csv")

# jan_2020

In [21]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2020.drop(jan_2020[jan_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
1,179e7b68e5ee4f361b7af804ec2f82a0a80e3837,2020-01-25 19:10:00 UTC,heat,auto,711,730,719,WV,Scott Depot,19,True,False,False,Gas
2,5f1e1283b79671ac8a3450f3e26b8fceda2237b0,2020-01-01 12:25:00 UTC,heat,hold,699,701,701,WV,Sutton,30,True,False,True,Electric
3,ebe4d1dc76665deeab680bc515f1cca9649b3c27,2020-01-02 15:30:00 UTC,auxHeatOnly,auto,769,770,770,WV,Huntington,90,True,False,True,Electric
4,a171c25bbd2535b4f8ae71c49e51da2108b0915c,2020-01-31 14:40:00 UTC,auto,hold,669,723,663,WV,Charleston,0,False,False,True,Electric
5,ebe4d1dc76665deeab680bc515f1cca9649b3c27,2020-01-22 16:00:00 UTC,heat,auto,770,810,810,WV,Huntington,90,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
169395,e5cf22c5da34b175089028c4e5ad769e7c0ebe41,2020-01-09 15:35:00 UTC,heat,auto,770,760,760,WV,Inwood,10,False,False,False,Gas
169396,e5cf22c5da34b175089028c4e5ad769e7c0ebe41,2020-01-09 14:50:00 UTC,heat,auto,780,760,760,WV,Inwood,10,False,False,False,Gas
169397,e5cf22c5da34b175089028c4e5ad769e7c0ebe41,2020-01-09 13:50:00 UTC,heat,auto,754,760,760,WV,Inwood,10,False,False,False,Gas
169398,e5cf22c5da34b175089028c4e5ad769e7c0ebe41,2020-01-09 13:40:00 UTC,heat,auto,753,760,760,WV,Inwood,10,False,False,False,Gas


In [22]:
# Add year and month

jan_2020["Year"] = "2020"
jan_2020["Month"] = "Jan"


In [23]:
# Rename columns to label the aggregates

jan_2020 = jan_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [24]:
jan_2020_ave = jan_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2020_ave

In [25]:
# Export CSV file

jan_2020_ave.to_csv("data/day/WV/jan/jan_2020_ave.csv", header=True, index=True)

### 2021 January Day

In [26]:
# Read in month csv for state
jan_2021 = pd.read_csv("../data_large/WV-day/2021-jan-day-WV.csv")

# jan_2021

In [27]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jan_2021.drop(jan_2021[jan_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jan_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2021-01-06 13:55:00 UTC,heat,hold,753,746,746,WV,Charles Town,15,True,False,False,Gas
1,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2021-01-25 15:10:00 UTC,heat,hold,740,746,746,WV,Charles Town,15,True,False,False,Gas
2,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2021-01-20 16:20:00 UTC,heat,hold,755,686,686,WV,Charles Town,15,True,False,False,Gas
3,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2021-01-30 17:00:00 UTC,heat,hold,740,746,746,WV,Charles Town,15,True,False,False,Gas
5,4d2d2b09b53a479ced061b0225d0e510345371bc,2021-01-31 18:10:00 UTC,heat,hold,667,678,678,WV,Huntington,80,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99810,06aa059ee123e439072fb8813fdc5e89c4ab8067,2021-01-24 13:15:00 UTC,heat,hold,684,690,690,WV,Morgantown,60,True,False,False,Gas
99811,06aa059ee123e439072fb8813fdc5e89c4ab8067,2021-01-24 13:45:00 UTC,heat,hold,681,690,690,WV,Morgantown,60,True,False,False,Gas
99812,06aa059ee123e439072fb8813fdc5e89c4ab8067,2021-01-24 12:20:00 UTC,heat,hold,683,690,690,WV,Morgantown,60,True,False,False,Gas
99813,06aa059ee123e439072fb8813fdc5e89c4ab8067,2021-01-24 14:50:00 UTC,heat,hold,680,690,690,WV,Morgantown,60,True,False,False,Gas


In [28]:
# Add year and month

jan_2021["Year"] = "2021"
jan_2021["Month"] = "Jan"


In [29]:
# Rename columns to label the aggregates

jan_2021 = jan_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [30]:
jan_2021_ave = jan_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jan_2021_ave

In [31]:
# Export CSV file

jan_2021_ave.to_csv("data/day/WV/jan/jan_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [32]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WV/jan/") if f.endswith(".csv")]

# files

In [33]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WV_jan = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WV/jan/" + file)
    WV_jan = pd.concat([WV_jan, df])
    
WV_jan

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,015c5b19f842d6361568129bab20f692ecb5ba98,Jan,2017,heat,auto,South Charleston,679.524510,685.000000,685.000000,10.0,True,False,True
1,0294595998430fa9228ff8b203c54f8378f9560c,Jan,2017,auto,auto,Huntington,723.051348,738.414634,718.566110,0.0,False,False,False
2,0294595998430fa9228ff8b203c54f8378f9560c,Jan,2017,auto,hold,Huntington,729.707006,746.159236,726.433121,0.0,False,False,False
3,0294595998430fa9228ff8b203c54f8378f9560c,Jan,2017,cool,auto,Huntington,715.203046,724.822335,724.822335,0.0,False,False,False
4,0294595998430fa9228ff8b203c54f8378f9560c,Jan,2017,heat,auto,Huntington,724.652985,729.307836,729.108209,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,f6e7e4dd6c51273fccdc55750816b0f89ee83bd7,Jan,2021,heat,hold,Wheeling,743.666667,789.083333,788.208333,20.0,False,False,False
106,f710bb7d9a4a4033f351d3fe907edc4564fe1892,Jan,2021,auto,hold,Morgantown,712.457143,766.314286,716.314286,0.0,False,False,False
107,fa5679199538cdd2ee0a15f69119a2a23a46fe31,Jan,2021,heat,hold,Mathias,696.174603,704.920635,704.920635,10.0,False,False,False
108,fd8ed83e097742d35fbb3379c61cf75319d0dcd3,Jan,2021,heat,hold,Weirton,670.825132,676.108963,676.123023,50.0,False,False,False


In [34]:
WV_jan.to_csv("Scraper_Output/State_Month_Day/WV/WV_jan.csv", header=True, index=False)

---

## February

### 2017 February Day

In [35]:
# Read in month csv for state
feb_2017 = pd.read_csv("../data_large/WV-day/2017-feb-day-WV.csv")

# feb_2017

In [36]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2017.drop(feb_2017[feb_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b96aae920f5ac753b3e6b08a000584c81f361dc1,2017-02-04T18:35:00Z,auto,auto,730,785,735,WV,Inwood,15,True,False,True,Electric
1,0294595998430fa9228ff8b203c54f8378f9560c,2017-02-06T15:25:00Z,heat,hold,724,727,727,WV,Huntington,0,False,False,False,Gas
2,0294595998430fa9228ff8b203c54f8378f9560c,2017-02-06T15:10:00Z,heat,hold,730,727,727,WV,Huntington,0,False,False,False,Gas
3,0294595998430fa9228ff8b203c54f8378f9560c,2017-02-01T16:30:00Z,auto,hold,710,734,714,WV,Huntington,0,False,False,False,Gas
4,0294595998430fa9228ff8b203c54f8378f9560c,2017-02-06T16:50:00Z,heat,hold,726,727,727,WV,Huntington,0,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30701,0294595998430fa9228ff8b203c54f8378f9560c,2017-02-07T19:00:00Z,cool,hold,745,740,740,WV,Huntington,0,False,False,False,Gas
30702,0294595998430fa9228ff8b203c54f8378f9560c,2017-02-07T19:20:00Z,cool,hold,746,740,740,WV,Huntington,0,False,False,False,Gas
30703,0294595998430fa9228ff8b203c54f8378f9560c,2017-02-22T19:15:00Z,heat,auto,729,740,740,WV,Huntington,0,False,False,False,Gas
30704,b96aae920f5ac753b3e6b08a000584c81f361dc1,2017-02-15T19:00:00Z,auto,hold,741,790,740,WV,Inwood,15,True,False,True,Electric


In [37]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "feb"

In [38]:
# Rename columns to label the aggregates

feb_2017 = feb_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [39]:
feb_2017_ave = feb_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2017_ave

In [40]:
# Export CSV file

feb_2017_ave.to_csv("data/day/WV/feb/feb_2017_ave.csv", header=True, index=True)

### 2018 February Day

In [41]:
# Read in month csv for state
feb_2018 = pd.read_csv("../data_large/WV-day/2018-feb-day-WV.csv")

# feb_2018

In [42]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2018.drop(feb_2018[feb_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,246860e7d957e2c4ba4215019099373118da02e8,2018-02-09 19:35:00 UTC,heat,hold,713,719,719,WV,Martinsburg,15,False,False,True,Electric
1,96fda217ff45ec5cad56a534cf78e371f1947627,2018-02-17 18:55:00 UTC,heat,auto,720,750,717,WV,Vienna,50,False,False,False,Gas
2,f94ba7ecbabe3b6442f48938bf249136c43bca9e,2018-02-04 17:45:00 UTC,heat,hold,681,780,780,WV,Martinsburg,15,True,False,True,Electric
3,f94ba7ecbabe3b6442f48938bf249136c43bca9e,2018-02-18 12:45:00 UTC,heat,hold,686,688,688,WV,Martinsburg,15,True,False,True,Electric
4,246860e7d957e2c4ba4215019099373118da02e8,2018-02-10 14:35:00 UTC,heat,hold,700,721,701,WV,Martinsburg,15,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74451,0f606a12afa80283c537ba9a19e7623dec24c331,2018-02-25 12:55:00 UTC,auxHeatOnly,auto,759,760,760,WV,Letart,30,True,False,True,Electric
74452,0f606a12afa80283c537ba9a19e7623dec24c331,2018-02-28 13:45:00 UTC,auxHeatOnly,auto,754,760,760,WV,Letart,30,True,False,True,Electric
74453,0f606a12afa80283c537ba9a19e7623dec24c331,2018-02-28 13:15:00 UTC,auxHeatOnly,auto,755,760,760,WV,Letart,30,True,False,True,Electric
74454,0f606a12afa80283c537ba9a19e7623dec24c331,2018-02-25 13:25:00 UTC,auxHeatOnly,auto,754,760,760,WV,Letart,30,True,False,True,Electric


In [43]:
# Add year and month

feb_2018["Year"] = "2018"
feb_2018["Month"] = "feb"


In [44]:
# Rename columns to label the aggregates

feb_2018 = feb_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [45]:
feb_2018_ave = feb_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2018_ave

In [46]:
# Export CSV file

feb_2018_ave.to_csv("data/day/WV/feb/feb_2018_ave.csv", header=True, index=True)

### 2019 February Day

In [47]:
# Read in month csv for state
feb_2019 = pd.read_csv("../data_large/WV-day/2019-feb-day-WV.csv")

# feb_2019

In [48]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2019.drop(feb_2019[feb_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ee7a5c06322219523adae4b97f4b87c8deecf05e,2019-02-16 16:25:00 UTC,auto,hold,739,782,722,WV,Elkins,5,False,False,False,Gas
1,4c898fa516d2e4b170860b51c41d1b0522646212,2019-02-23 17:55:00 UTC,heat,hold,698,655,655,WV,Charleston,55,True,False,False,Gas
2,db60ca4227cecfc18b9d78165cc67a2fcbe53615,2019-02-01 16:20:00 UTC,heat,hold,696,701,701,WV,Morgantown,20,True,False,False,Gas
3,ebe4d1dc76665deeab680bc515f1cca9649b3c27,2019-02-08 15:20:00 UTC,heat,auto,738,780,780,WV,Huntington,90,True,False,True,Electric
4,ebe4d1dc76665deeab680bc515f1cca9649b3c27,2019-02-02 19:55:00 UTC,heat,auto,771,770,770,WV,Huntington,90,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110765,68e0d3728ad81530fc1339c3d4e62b92b1337a3f,2019-02-03 18:55:00 UTC,heat,auto,756,700,760,WV,Mannington,40,False,False,False,Gas
110766,68e0d3728ad81530fc1339c3d4e62b92b1337a3f,2019-02-02 12:10:00 UTC,heat,auto,754,700,760,WV,Mannington,40,False,False,False,Gas
110767,68e0d3728ad81530fc1339c3d4e62b92b1337a3f,2019-02-02 13:00:00 UTC,heat,auto,758,700,760,WV,Mannington,40,False,False,False,Gas
110768,68e0d3728ad81530fc1339c3d4e62b92b1337a3f,2019-02-02 13:40:00 UTC,heat,auto,755,700,760,WV,Mannington,40,False,False,False,Gas


In [49]:
# Add year and month

feb_2019["Year"] = "2019"
feb_2019["Month"] = "feb"


In [50]:
# Rename columns to label the aggregates

feb_2019 = feb_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [51]:
feb_2019_ave = feb_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2019_ave

In [52]:
# Export CSV file

feb_2019_ave.to_csv("data/day/WV/feb/feb_2019_ave.csv", header=True, index=True)

### 2020 February Day

In [53]:
# Read in month csv for state
feb_2020 = pd.read_csv("../data_large/WV-day/2020-feb-day-WV.csv")

# feb_2020

In [54]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2020.drop(feb_2020[feb_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a171c25bbd2535b4f8ae71c49e51da2108b0915c,2020-02-23 18:50:00 UTC,auto,hold,682,723,663,WV,Charleston,0,False,False,True,Electric
1,179e7b68e5ee4f361b7af804ec2f82a0a80e3837,2020-02-13 14:00:00 UTC,heat,auto,720,730,719,WV,Scott Depot,19,True,False,False,Gas
3,2b584ae8a7a7dc1c609a72f83458b092367c2398,2020-02-10 18:30:00 UTC,heat,hold,679,655,655,WV,Morgantown,50,True,False,False,Gas
4,179e7b68e5ee4f361b7af804ec2f82a0a80e3837,2020-02-13 16:40:00 UTC,heat,auto,712,730,719,WV,Scott Depot,19,True,False,False,Gas
5,db60ca4227cecfc18b9d78165cc67a2fcbe53615,2020-02-01 18:25:00 UTC,auto,hold,688,744,694,WV,Morgantown,20,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146315,ebe4d1dc76665deeab680bc515f1cca9649b3c27,2020-02-19 16:30:00 UTC,auxHeatOnly,auto,774,826,760,WV,Huntington,90,True,False,True,Electric
146316,ebe4d1dc76665deeab680bc515f1cca9649b3c27,2020-02-19 19:30:00 UTC,auxHeatOnly,auto,754,826,760,WV,Huntington,90,True,False,True,Electric
146317,ebe4d1dc76665deeab680bc515f1cca9649b3c27,2020-02-19 17:55:00 UTC,auxHeatOnly,auto,757,826,760,WV,Huntington,90,True,False,True,Electric
146318,ebe4d1dc76665deeab680bc515f1cca9649b3c27,2020-02-19 16:55:00 UTC,auxHeatOnly,auto,754,826,760,WV,Huntington,90,True,False,True,Electric


In [55]:
# Add year and month

feb_2020["Year"] = "2020"
feb_2020["Month"] = "feb"


In [56]:
# Rename columns to label the aggregates

feb_2020 = feb_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [57]:
feb_2020_ave = feb_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2020_ave

In [58]:
# Export CSV file

feb_2020_ave.to_csv("data/day/WV/feb/feb_2020_ave.csv", header=True, index=True)

### 2021 February Day

In [59]:
# Read in month csv for state
feb_2021 = pd.read_csv("../data_large/WV-day/2021-feb-day-WV.csv")

# feb_2021

In [60]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
feb_2021.drop(feb_2021[feb_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

feb_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2021-02-09 19:35:00 UTC,heat,hold,741,746,746,WV,Charles Town,15,True,False,False,Gas
1,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2021-02-06 19:00:00 UTC,heat,hold,765,746,746,WV,Charles Town,15,True,False,False,Gas
2,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2021-02-14 19:25:00 UTC,heat,hold,749,746,746,WV,Charles Town,15,True,False,False,Gas
3,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2021-02-12 11:30:00 UTC,heat,hold,745,746,746,WV,Charles Town,15,True,False,False,Gas
4,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2021-02-06 16:30:00 UTC,heat,hold,763,746,746,WV,Charles Town,15,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77378,68e0d3728ad81530fc1339c3d4e62b92b1337a3f,2021-02-02 15:55:00 UTC,heat,hold,736,720,740,WV,Mannington,40,False,False,False,Gas
77379,68e0d3728ad81530fc1339c3d4e62b92b1337a3f,2021-02-03 17:25:00 UTC,heat,hold,741,720,740,WV,Mannington,40,False,False,False,Gas
77380,68e0d3728ad81530fc1339c3d4e62b92b1337a3f,2021-02-05 14:15:00 UTC,heat,hold,740,720,740,WV,Mannington,40,False,False,False,Gas
77381,68e0d3728ad81530fc1339c3d4e62b92b1337a3f,2021-02-03 12:50:00 UTC,heat,hold,736,720,740,WV,Mannington,40,False,False,False,Gas


In [61]:
# Add year and month

feb_2021["Year"] = "2021"
feb_2021["Month"] = "feb"


In [62]:
# Rename columns to label the aggregates

feb_2021 = feb_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [63]:
feb_2021_ave = feb_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# feb_2021_ave

In [64]:
# Export CSV file

feb_2021_ave.to_csv("data/day/WV/feb/feb_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [65]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WV/feb/") if f.endswith(".csv")]

# files

In [66]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WV_feb = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WV/feb/" + file)
    WV_feb = pd.concat([WV_feb, df])
    
WV_feb

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,015c5b19f842d6361568129bab20f692ecb5ba98,feb,2017,heat,auto,South Charleston,682.633333,680.266667,679.900000,10.0,True,False,True
1,015c5b19f842d6361568129bab20f692ecb5ba98,feb,2017,heat,hold,South Charleston,684.245098,680.000000,680.000000,10.0,True,False,True
2,0294595998430fa9228ff8b203c54f8378f9560c,feb,2017,auto,auto,Huntington,719.593291,727.259958,707.297694,0.0,False,False,False
3,0294595998430fa9228ff8b203c54f8378f9560c,feb,2017,auto,hold,Huntington,714.891026,736.826923,717.057692,0.0,False,False,False
4,0294595998430fa9228ff8b203c54f8378f9560c,feb,2017,cool,auto,Huntington,715.694737,712.757895,702.378947,0.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
89,f3ed599782fcb2f419d0db8910fdfe68556f0e1c,feb,2021,heat,hold,Princeton,644.322334,650.791009,650.570540,70.0,True,False,True
90,f5d7fbc4dcd128066df4635577735b436edde304,feb,2021,heat,hold,Clarksburg,732.166667,740.000000,740.000000,80.0,False,False,False
91,f710bb7d9a4a4033f351d3fe907edc4564fe1892,feb,2021,auto,hold,Morgantown,712.864407,767.000000,717.000000,0.0,False,False,False
92,fd8ed83e097742d35fbb3379c61cf75319d0dcd3,feb,2021,heat,hold,Weirton,667.111958,671.353978,671.339686,50.0,False,False,False


In [67]:
WV_feb.to_csv("Scraper_Output/State_Month_Day/WV/WV_feb.csv", header=True, index=False)

---

## March

---

## April

---

## May

---

## June

### 2017 June Day

In [68]:
# Read in month csv for state
jun_2017 = pd.read_csv("../data_large/WV-day/2017-jun-day-WV.csv")

# jun_2017

In [69]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2017.drop(jun_2017[jun_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f710bb7d9a4a4033f351d3fe907edc4564fe1892,2017-06-04 14:50:00 UTC,auto,hold,692,695,645,WV,Morgantown,0,False,False,False,Gas
1,246860e7d957e2c4ba4215019099373118da02e8,2017-06-18 14:25:00 UTC,cool,hold,730,729,729,WV,Martinsburg,15,False,False,True,Electric
2,f710bb7d9a4a4033f351d3fe907edc4564fe1892,2017-06-10 10:35:00 UTC,auto,hold,691,695,645,WV,Morgantown,0,False,False,False,Gas
3,f710bb7d9a4a4033f351d3fe907edc4564fe1892,2017-06-10 13:00:00 UTC,auto,hold,702,695,645,WV,Morgantown,0,False,False,False,Gas
4,3b074e7845f1ba41db95040d361a0e57ef376561,2017-06-24 17:30:00 UTC,auto,hold,706,705,655,WV,Charleston,25,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47253,aa3477a4b3a775a6e355053997c041d07f4398f2,2017-06-25 10:35:00 UTC,cool,auto,709,710,760,WV,Charles Town,15,False,False,True,Electric
47254,aa3477a4b3a775a6e355053997c041d07f4398f2,2017-06-23 15:30:00 UTC,cool,hold,749,760,760,WV,Charles Town,15,False,False,True,Electric
47255,aa3477a4b3a775a6e355053997c041d07f4398f2,2017-06-29 14:15:00 UTC,cool,auto,735,730,760,WV,Charles Town,15,False,False,True,Electric
47256,aa3477a4b3a775a6e355053997c041d07f4398f2,2017-06-26 12:15:00 UTC,cool,auto,691,730,760,WV,Charles Town,15,False,False,True,Electric


In [70]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "jun"

In [71]:
# Rename columns to label the aggregates

jun_2017 = jun_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [72]:
jun_2017_ave = jun_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2017_ave

In [73]:
# Export CSV file

jun_2017_ave.to_csv("data/day/WV/jun/jun_2017_ave.csv", header=True, index=True)

### 2018 June Day

In [74]:
# Read in month csv for state
jun_2018 = pd.read_csv("../data_large/WV-day/2018-jun-day-WV.csv")

# jun_2018

In [75]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2018.drop(jun_2018[jun_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,246860e7d957e2c4ba4215019099373118da02e8,2018-06-15 14:15:00 UTC,auto,hold,739,736,686,WV,Martinsburg,15,False,False,True,Electric
1,246860e7d957e2c4ba4215019099373118da02e8,2018-06-06 16:00:00 UTC,auto,hold,724,727,677,WV,Martinsburg,15,False,False,True,Electric
2,3b074e7845f1ba41db95040d361a0e57ef376561,2018-06-16 15:15:00 UTC,cool,hold,722,712,712,WV,Charleston,25,False,False,True,Electric
3,246860e7d957e2c4ba4215019099373118da02e8,2018-06-14 19:00:00 UTC,auto,hold,733,737,667,WV,Martinsburg,15,False,False,True,Electric
5,246860e7d957e2c4ba4215019099373118da02e8,2018-06-10 19:30:00 UTC,auto,hold,729,737,667,WV,Martinsburg,15,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
108361,e3ac2724c88ea899b680fbe05317441945a88774,2018-06-21 09:50:00 UTC,cool,hold,764,760,760,WV,Weirton,50,False,False,False,Gas
108362,e3ac2724c88ea899b680fbe05317441945a88774,2018-06-02 13:55:00 UTC,cool,hold,750,760,760,WV,Weirton,50,False,False,False,Gas
108363,e3ac2724c88ea899b680fbe05317441945a88774,2018-06-19 10:55:00 UTC,cool,hold,764,760,760,WV,Weirton,50,False,False,False,Gas
108364,e3ac2724c88ea899b680fbe05317441945a88774,2018-06-19 10:35:00 UTC,cool,hold,756,760,760,WV,Weirton,50,False,False,False,Gas


In [76]:
# Add year and month

jun_2018["Year"] = "2018"
jun_2018["Month"] = "jun"

In [77]:
# Rename columns to label the aggregates

jun_2018 = jun_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [78]:
jun_2018_ave = jun_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2018_ave

In [79]:
# Export CSV file

jun_2018_ave.to_csv("data/day/WV/jun/jun_2018_ave.csv", header=True, index=True)

### 2019 June Day

In [80]:
# Read in month csv for state
jun_2019 = pd.read_csv("../data_large/WV-day/2019-jun-day-WV.csv")

# jun_2019

In [81]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2019.drop(jun_2019[jun_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,7700ba5a26dfa2218fea7186125a44b299ed2645,2019-06-13 11:40:00 UTC,cool,hold,700,770,770,WV,Clarksburg,99,False,False,False,Gas
1,5f1e1283b79671ac8a3450f3e26b8fceda2237b0,2019-06-20 12:05:00 UTC,cool,hold,727,721,721,WV,Sutton,30,True,False,True,Electric
2,72795723da86f1de852fd9e9e511c73b95fa7eab,2019-06-23 13:30:00 UTC,auto,hold,703,795,745,WV,Harpers Ferry,0,True,False,True,Electric
3,7700ba5a26dfa2218fea7186125a44b299ed2645,2019-06-07 15:20:00 UTC,cool,hold,723,770,770,WV,Clarksburg,99,False,False,False,Gas
4,7700ba5a26dfa2218fea7186125a44b299ed2645,2019-06-17 17:05:00 UTC,cool,hold,745,770,770,WV,Clarksburg,99,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183465,fd8ed83e097742d35fbb3379c61cf75319d0dcd3,2019-06-21 17:20:00 UTC,cool,hold,734,760,760,WV,Weirton,50,False,False,False,Gas
183466,fd8ed83e097742d35fbb3379c61cf75319d0dcd3,2019-06-21 19:45:00 UTC,cool,hold,742,760,760,WV,Weirton,50,False,False,False,Gas
183467,fd8ed83e097742d35fbb3379c61cf75319d0dcd3,2019-06-22 16:30:00 UTC,cool,hold,730,760,760,WV,Weirton,50,False,False,False,Gas
183468,fd8ed83e097742d35fbb3379c61cf75319d0dcd3,2019-06-21 16:20:00 UTC,cool,hold,732,760,760,WV,Weirton,50,False,False,False,Gas


In [82]:
# Add year and month

jun_2019["Year"] = "2019"
jun_2019["Month"] = "jun"

In [83]:
# Rename columns to label the aggregates

jun_2019 = jun_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [84]:
jun_2019_ave = jun_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2019_ave

In [85]:
# Export CSV file

jun_2019_ave.to_csv("data/day/WV/jun/jun_2019_ave.csv", header=True, index=True)

### 2020 June Day

In [86]:
# Read in month csv for state
jun_2020 = pd.read_csv("../data_large/WV-day/2020-jun-day-WV.csv")

# jun_2020

In [87]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2020.drop(jun_2020[jun_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,06aa059ee123e439072fb8813fdc5e89c4ab8067,2020-06-26 17:00:00 UTC,cool,hold,693,690,690,WV,Morgantown,60,True,False,False,Gas
1,06aa059ee123e439072fb8813fdc5e89c4ab8067,2020-06-27 16:10:00 UTC,cool,hold,690,690,690,WV,Morgantown,60,True,False,False,Gas
2,0f606a12afa80283c537ba9a19e7623dec24c331,2020-06-03 15:50:00 UTC,auto,auto,741,740,690,WV,Letart,30,True,False,True,Electric
3,0f606a12afa80283c537ba9a19e7623dec24c331,2020-06-03 15:45:00 UTC,auto,auto,742,740,690,WV,Letart,30,True,False,True,Electric
4,0f606a12afa80283c537ba9a19e7623dec24c331,2020-06-03 15:05:00 UTC,auto,auto,738,740,690,WV,Letart,30,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165099,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,2020-06-01 11:40:00 UTC,cool,hold,638,760,760,WV,Grafton,9,False,False,True,Electric
165100,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,2020-06-01 11:50:00 UTC,cool,hold,638,760,760,WV,Grafton,9,False,False,True,Electric
165101,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,2020-06-01 11:30:00 UTC,cool,hold,638,760,760,WV,Grafton,9,False,False,True,Electric
165102,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,2020-06-01 12:40:00 UTC,cool,hold,640,760,760,WV,Grafton,9,False,False,True,Electric


In [88]:
# Add year and month

jun_2020["Year"] = "2020"
jun_2020["Month"] = "jun"

In [89]:
# Rename columns to label the aggregates

jun_2020 = jun_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [90]:
jun_2020_ave = jun_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2020_ave

In [91]:
# Export CSV file

jun_2020_ave.to_csv("data/day/WV/jun/jun_2020_ave.csv", header=True, index=True)

### 2021 June Day

In [92]:
# Read in month csv for state
jun_2021 = pd.read_csv("../data_large/WV-day/2021-jun-day-WV.csv")

# jun_2021

In [93]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)
jun_2021.drop(jun_2021[jun_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jun_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,2b584ae8a7a7dc1c609a72f83458b092367c2398,2021-06-13 12:45:00 UTC,cool,hold,664,658,658,WV,Morgantown,50,True,False,False,Gas
1,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2021-06-30 17:25:00 UTC,cool,hold,813,768,746,WV,Charles Town,15,True,False,False,Gas
2,28075a6b2baf76e152caf52cfe385b486759daa0,2021-06-01 18:10:00 UTC,cool,hold,731,780,780,WV,Huntington,9,True,False,True,Electric
3,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2021-06-07 13:10:00 UTC,cool,hold,749,733,733,WV,Charles Town,15,True,False,False,Gas
4,ab26d4155d39b2b879c9b6316bbb37a5df3fccf7,2021-06-05 19:40:00 UTC,auto,hold,725,695,645,WV,Martinsburg,10,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98674,e3ac2724c88ea899b680fbe05317441945a88774,2021-06-21 12:30:00 UTC,cool,hold,753,760,760,WV,Weirton,50,False,False,False,Gas
98675,e3ac2724c88ea899b680fbe05317441945a88774,2021-06-21 12:20:00 UTC,cool,hold,751,760,760,WV,Weirton,50,False,False,False,Gas
98676,e3ac2724c88ea899b680fbe05317441945a88774,2021-06-25 12:10:00 UTC,cool,hold,740,760,760,WV,Weirton,50,False,False,False,Gas
98677,e3ac2724c88ea899b680fbe05317441945a88774,2021-06-21 12:05:00 UTC,cool,hold,750,760,760,WV,Weirton,50,False,False,False,Gas


In [94]:
# Add year and month

jun_2021["Year"] = "2021"
jun_2021["Month"] = "jun"

In [95]:
# Rename columns to label the aggregates

jun_2021 = jun_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [96]:
jun_2021_ave = jun_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jun_2021_ave

In [97]:
# Export CSV file

jun_2021_ave.to_csv("data/day/WV/jun/jun_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [98]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WV/jun/") if f.endswith(".csv")]

# files

In [99]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WV_jun = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WV/jun/" + file)
    WV_jun = pd.concat([WV_jun, df])
    
WV_jun

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,015c5b19f842d6361568129bab20f692ecb5ba98,jun,2017,cool,auto,South Charleston,773.790297,804.883817,610.830514,10.0,True,False,True
1,015c5b19f842d6361568129bab20f692ecb5ba98,jun,2017,cool,hold,South Charleston,754.591837,732.816327,732.816327,10.0,True,False,True
2,03929acf744f37ee18e539d3a2a04775de903a79,jun,2017,cool,hold,Beckley,655.092593,662.222222,662.222222,16.0,False,False,False
3,23ea815e8d635bf755b4f45c8594fba04bf97e35,jun,2017,auto,hold,Morgantown,720.463436,720.000000,670.000000,0.0,False,False,False
4,246860e7d957e2c4ba4215019099373118da02e8,jun,2017,auto,hold,Martinsburg,728.372881,728.508475,675.796610,15.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
126,fa5679199538cdd2ee0a15f69119a2a23a46fe31,jun,2021,cool,hold,Mathias,727.700000,720.000000,720.000000,10.0,False,False,False
127,fd8ed83e097742d35fbb3379c61cf75319d0dcd3,jun,2021,cool,hold,Weirton,730.302570,728.516355,728.516355,50.0,False,False,False
128,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,jun,2021,cool,hold,Grafton,741.885314,762.298821,762.298821,9.0,False,False,True
129,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,jun,2021,heat,hold,Grafton,725.367647,684.000000,684.000000,9.0,False,False,True


In [100]:
WV_jun.to_csv("Scraper_Output/State_Month_Day/WV/WV_jun.csv", header=True, index=False)

---

## July

### 2017 July Day

In [101]:
# Read in month csv for state
jul_2017 = pd.read_csv("../data_large/WV-day/2017-jul-day-WV.csv")

# jul_2017

In [102]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2017.drop(jul_2017[jul_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,ffe671086af78d223bc2967b5d8ce02eeb5dbede,2017-07-22 18:55:00 UTC,cool,hold,688,698,696,WV,Huntington,20,True,True,True,Electric
1,b493616349a34d1689fea2a75832e13c15f3b538,2017-07-30 15:20:00 UTC,auto,hold,703,706,656,WV,Charleston,70,False,False,False,Gas
2,2d5a84e2350dbd03dd8468c69aaa48113ea63a63,2017-07-23 16:50:00 UTC,cool,hold,723,731,723,WV,Martinsburg,5,False,False,True,Electric
3,ffe671086af78d223bc2967b5d8ce02eeb5dbede,2017-07-22 17:05:00 UTC,cool,auto,723,712,616,WV,Huntington,20,True,True,True,Electric
4,4dce5928bbfcbe0aa3325f543471a41c701d65b2,2017-07-15 14:10:00 UTC,cool,hold,758,770,770,WV,Morgantown,50,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59804,5b63c7e159b95b51613a8b3c23a6c29d61629a69,2017-07-24 14:25:00 UTC,cool,auto,733,740,710,WV,Huntington,50,False,False,True,Electric
59805,5b63c7e159b95b51613a8b3c23a6c29d61629a69,2017-07-15 19:05:00 UTC,cool,auto,705,700,710,WV,Huntington,50,False,False,True,Electric
59806,5b63c7e159b95b51613a8b3c23a6c29d61629a69,2017-07-15 18:35:00 UTC,cool,auto,713,700,710,WV,Huntington,50,False,False,True,Electric
59807,5b63c7e159b95b51613a8b3c23a6c29d61629a69,2017-07-18 11:40:00 UTC,cool,auto,707,740,710,WV,Huntington,50,False,False,True,Electric


In [103]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "jul"

In [104]:
# Rename columns to label the aggregates

jul_2017 = jul_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [105]:
jul_2017_ave = jul_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2017_ave

In [106]:
# Export CSV file

jul_2017_ave.to_csv("data/day/WV/jul/jul_2017_ave.csv", header=True, index=True)

### 2018 July Day

In [107]:
# Read in month csv for state
jul_2018 = pd.read_csv("../data_large/WV-day/2018-jul-day-WV.csv")

# jul_2018

In [108]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2018.drop(jul_2018[jul_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,246860e7d957e2c4ba4215019099373118da02e8,2018-07-29 08:40:00 UTC,auto,hold,726,727,677,WV,Martinsburg,15,False,False,True,Electric
3,246860e7d957e2c4ba4215019099373118da02e8,2018-07-06 15:55:00 UTC,auto,hold,726,727,677,WV,Martinsburg,15,False,False,True,Electric
5,246860e7d957e2c4ba4215019099373118da02e8,2018-07-29 10:30:00 UTC,auto,hold,719,727,677,WV,Martinsburg,15,False,False,True,Electric
11,246860e7d957e2c4ba4215019099373118da02e8,2018-07-04 13:10:00 UTC,auto,hold,738,739,689,WV,Martinsburg,15,False,False,True,Electric
14,246860e7d957e2c4ba4215019099373118da02e8,2018-07-03 17:25:00 UTC,auto,hold,748,747,687,WV,Martinsburg,15,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
132651,66a8d24e5995abee37f1f10f8b2e2c06fdbcc977,2018-07-06 13:50:00 UTC,cool,hold,715,710,710,WV,Pleasant Valley,50,False,False,False,Gas
132652,66a8d24e5995abee37f1f10f8b2e2c06fdbcc977,2018-07-06 13:25:00 UTC,cool,hold,714,710,710,WV,Pleasant Valley,50,False,False,False,Gas
132653,66a8d24e5995abee37f1f10f8b2e2c06fdbcc977,2018-07-06 13:30:00 UTC,cool,hold,710,710,710,WV,Pleasant Valley,50,False,False,False,Gas
132654,66a8d24e5995abee37f1f10f8b2e2c06fdbcc977,2018-07-06 11:35:00 UTC,cool,hold,710,710,710,WV,Pleasant Valley,50,False,False,False,Gas


In [109]:
# Add year and month

jul_2018["Year"] = "2018"
jul_2018["Month"] = "jul"

In [110]:
# Rename columns to label the aggregates

jul_2018 = jul_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [111]:
jul_2018_ave = jul_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2018_ave

In [112]:
# Export CSV file

jul_2018_ave.to_csv("data/day/WV/jul/jul_2018_ave.csv", header=True, index=True)

### 2019 July Day

In [113]:
# Read in month csv for state
jul_2019 = pd.read_csv("../data_large/WV-day/2019-jul-day-WV.csv")

# jul_2019

In [114]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2019.drop(jul_2019[jul_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b96aae920f5ac753b3e6b08a000584c81f361dc1,2019-07-01 14:20:00 UTC,auto,hold,711,742,672,WV,Inwood,15,True,False,True,Electric
1,e2efad7eb27818bcfc8e036afa7359fc87b73735,2019-07-25 16:20:00 UTC,cool,hold,665,659,659,WV,Morgantown,9,False,False,False,Gas
2,9263ae10b5a817975d8875fc3d87540b8a3b12fb,2019-07-20 15:35:00 UTC,cool,hold,756,735,735,WV,Bridgeport,35,False,False,False,Gas
3,2ed159791a2d463af1b1d453dee321477cf5819f,2019-07-15 14:05:00 UTC,cool,hold,783,780,780,WV,Waverly,0,True,False,False,Gas
4,2ed159791a2d463af1b1d453dee321477cf5819f,2019-07-11 12:25:00 UTC,cool,auto,743,740,770,WV,Waverly,0,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
197084,66a8d24e5995abee37f1f10f8b2e2c06fdbcc977,2019-07-18 17:00:00 UTC,cool,auto,712,710,710,WV,Pleasant Valley,50,False,False,False,Gas
197085,66a8d24e5995abee37f1f10f8b2e2c06fdbcc977,2019-07-20 15:00:00 UTC,cool,auto,727,710,710,WV,Pleasant Valley,50,False,False,False,Gas
197086,66a8d24e5995abee37f1f10f8b2e2c06fdbcc977,2019-07-18 15:30:00 UTC,cool,auto,718,710,710,WV,Pleasant Valley,50,False,False,False,Gas
197087,66a8d24e5995abee37f1f10f8b2e2c06fdbcc977,2019-07-16 12:00:00 UTC,cool,auto,706,710,710,WV,Pleasant Valley,50,False,False,False,Gas


In [115]:
# Add year and month

jul_2019["Year"] = "2019"
jul_2019["Month"] = "jul"

In [116]:
# Rename columns to label the aggregates

jul_2019 = jul_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [117]:
jul_2019_ave = jul_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2019_ave

In [118]:
# Export CSV file

jul_2019_ave.to_csv("data/day/WV/jul/jul_2019_ave.csv", header=True, index=True)

### 2020 July Day

In [119]:
# Read in month csv for state
jul_2020 = pd.read_csv("../data_large/WV-day/2020-jul-day-WV.csv")

# jul_2020

In [120]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
jul_2020.drop(jul_2020[jul_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

jul_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,b32d78606517a82a17c14dede48dcef3f731cde4,2020-07-29 19:05:00 UTC,cool,hold,777,770,770,WV,Harpers Ferry,25,True,False,True,Electric
1,0e519efc4973c8757fb15f70f1183ac111109c4c,2020-07-25 16:30:00 UTC,auto,hold,730,705,655,WV,Buckhannon,29,True,False,False,Gas
2,a3ab3b9db52ef8217d835cf6745a1e41f6c41777,2020-07-31 09:35:00 UTC,cool,auto,665,690,655,WV,Elkins,30,True,False,False,Gas
3,a171c25bbd2535b4f8ae71c49e51da2108b0915c,2020-07-30 13:05:00 UTC,auto,hold,706,703,653,WV,Charleston,0,False,False,True,Electric
5,9263ae10b5a817975d8875fc3d87540b8a3b12fb,2020-07-06 10:00:00 UTC,cool,hold,746,745,745,WV,Bridgeport,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181016,ebef48ea1dc3b5809b8b872e507ca1e1eb83f8a7,2020-07-17 19:05:00 UTC,cool,hold,795,760,760,WV,Snowshoe,10,True,False,True,Electric
181017,ebef48ea1dc3b5809b8b872e507ca1e1eb83f8a7,2020-07-17 18:30:00 UTC,cool,hold,797,760,760,WV,Snowshoe,10,True,False,True,Electric
181018,ebef48ea1dc3b5809b8b872e507ca1e1eb83f8a7,2020-07-17 17:40:00 UTC,cool,hold,819,760,760,WV,Snowshoe,10,True,False,True,Electric
181019,ebef48ea1dc3b5809b8b872e507ca1e1eb83f8a7,2020-07-17 18:45:00 UTC,cool,hold,797,760,760,WV,Snowshoe,10,True,False,True,Electric


In [121]:
# Add year and month

jul_2020["Year"] = "2020"
jul_2020["Month"] = "jul"

In [122]:
# Rename columns to label the aggregates

jul_2020 = jul_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [123]:
jul_2020_ave = jul_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2020_ave

In [124]:
# Export CSV file

jul_2020_ave.to_csv("data/day/WV/jul/jul_2020_ave.csv", header=True, index=True)

### 2021 July Day

In [125]:
# Read in month csv for state
jul_2021 = pd.read_csv("../data_large/WV-day/2021-jul-day-WV.csv")

# jul_2021

In [126]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] >= 850].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedHeat'] <= 600].index, inplace = True)
jul_2021.drop(jul_2021[jul_2021['TemperatureExpectedCool'] <= 600].index, inplace = True)

jul_2021

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2021-07-02 18:40:00 UTC,auto,hold,773,768,718,WV,Charles Town,15,True,False,False,Gas
1,5f1e1283b79671ac8a3450f3e26b8fceda2237b0,2021-07-15 17:45:00 UTC,auto,hold,723,721,671,WV,Sutton,30,True,False,True,Electric
2,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2021-07-13 12:50:00 UTC,cool,hold,736,703,653,WV,Charles Town,15,True,False,False,Gas
3,5f1e1283b79671ac8a3450f3e26b8fceda2237b0,2021-07-13 16:30:00 UTC,auto,hold,726,721,671,WV,Sutton,30,True,False,True,Electric
4,5f1e1283b79671ac8a3450f3e26b8fceda2237b0,2021-07-29 18:40:00 UTC,auto,hold,726,721,671,WV,Sutton,30,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92822,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,2021-07-29 16:30:00 UTC,cool,hold,758,760,760,WV,Grafton,9,False,False,True,Electric
92823,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,2021-07-24 16:40:00 UTC,cool,hold,747,760,760,WV,Grafton,9,False,False,True,Electric
92824,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,2021-07-26 15:35:00 UTC,cool,hold,757,760,760,WV,Grafton,9,False,False,True,Electric
92825,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,2021-07-26 14:15:00 UTC,cool,hold,752,760,760,WV,Grafton,9,False,False,True,Electric


In [127]:
# Add year and month

jul_2021["Year"] = "2021"
jul_2021["Month"] = "jul"

In [128]:
# Rename columns to label the aggregates

jul_2021 = jul_2021.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [129]:
jul_2021_ave = jul_2021.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# jul_2021_ave

In [130]:
# Export CSV file

jul_2021_ave.to_csv("data/day/WV/jul/jul_2021_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [131]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WV/jul/") if f.endswith(".csv")]

# files

In [132]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WV_jul = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WV/jul/" + file)
    WV_jul = pd.concat([WV_jul, df])
    
WV_jul

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,015c5b19f842d6361568129bab20f692ecb5ba98,jul,2017,cool,auto,South Charleston,775.959038,782.171564,651.988721,10.0,True,False,True
1,03929acf744f37ee18e539d3a2a04775de903a79,jul,2017,cool,hold,Beckley,684.388889,690.000000,690.000000,16.0,False,False,False
2,0bba27d3be343a725816c693e1fb53486f65c7b9,jul,2017,auto,hold,Morgantown,714.148148,710.000000,650.018519,45.0,False,False,False
3,1681fb4bfda8b5dc8e0858a217b4edbfb0cb3da2,jul,2017,auto,auto,Charles Town,753.385965,800.000000,610.000000,10.0,False,False,True
4,1681fb4bfda8b5dc8e0858a217b4edbfb0cb3da2,jul,2017,auto,hold,Charles Town,733.205128,730.000000,680.000000,10.0,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,f5d7fbc4dcd128066df4635577735b436edde304,jul,2021,cool,hold,Clarksburg,731.350365,730.094891,730.094891,80.0,False,False,False
96,f710bb7d9a4a4033f351d3fe907edc4564fe1892,jul,2021,auto,hold,Morgantown,719.856655,716.221843,666.221843,0.0,False,False,False
97,fa5679199538cdd2ee0a15f69119a2a23a46fe31,jul,2021,cool,hold,Mathias,702.057377,736.344262,736.344262,10.0,False,False,False
98,fd8ed83e097742d35fbb3379c61cf75319d0dcd3,jul,2021,cool,hold,Weirton,732.991329,731.318642,731.318642,50.0,False,False,False


In [133]:
WV_jul.to_csv("Scraper_Output/State_Month_Day/WV/WV_jul.csv", header=True, index=False)

---

## August

### 2017 August Day

In [134]:
# Read in month csv for state
aug_2017 = pd.read_csv("../data_large/WV-day/2017-aug-day-WV.csv")

# aug_2017

In [135]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2017.drop(aug_2017[aug_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,94e19c81e3c5f7a7c65a67be100d161ea4f7fa50,2017-08-05 16:05:00 UTC,cool,hold,734,770,770,WV,Charleston,6,True,False,True,Electric
1,94e19c81e3c5f7a7c65a67be100d161ea4f7fa50,2017-08-05 16:40:00 UTC,cool,hold,734,770,770,WV,Charleston,6,True,False,True,Electric
2,94e19c81e3c5f7a7c65a67be100d161ea4f7fa50,2017-08-05 16:45:00 UTC,cool,hold,736,770,770,WV,Charleston,6,True,False,True,Electric
3,b493616349a34d1689fea2a75832e13c15f3b538,2017-08-13 12:55:00 UTC,auto,hold,755,750,669,WV,Charleston,70,False,False,False,Gas
4,94e19c81e3c5f7a7c65a67be100d161ea4f7fa50,2017-08-05 15:15:00 UTC,cool,hold,735,770,770,WV,Charleston,6,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60172,e3ac2724c88ea899b680fbe05317441945a88774,2017-08-05 13:15:00 UTC,cool,hold,715,750,750,WV,Weirton,50,False,False,False,Gas
60173,e3ac2724c88ea899b680fbe05317441945a88774,2017-08-05 11:15:00 UTC,cool,hold,727,750,750,WV,Weirton,50,False,False,False,Gas
60174,e3ac2724c88ea899b680fbe05317441945a88774,2017-08-04 09:05:00 UTC,cool,hold,748,750,750,WV,Weirton,50,False,False,False,Gas
60175,e3ac2724c88ea899b680fbe05317441945a88774,2017-08-17 09:15:00 UTC,cool,auto,747,750,750,WV,Weirton,50,False,False,False,Gas


In [136]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "aug"

In [137]:
# Rename columns to label the aggregates

aug_2017 = aug_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [138]:
aug_2017_ave = aug_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2017_ave

In [139]:
# Export CSV file

aug_2017_ave.to_csv("data/day/WV/aug/aug_2017_ave.csv", header=True, index=True)

### 2018 August Day

In [140]:
# Read in month csv for state
aug_2018 = pd.read_csv("../data_large/WV-day/2018-aug-day-WV.csv")

# aug_2018

In [141]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2018.drop(aug_2018[aug_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,246860e7d957e2c4ba4215019099373118da02e8,2018-08-01 15:55:00 UTC,auto,hold,743,739,689,WV,Martinsburg,15,False,False,True,Electric
1,246860e7d957e2c4ba4215019099373118da02e8,2018-08-08 15:15:00 UTC,auto,hold,741,739,689,WV,Martinsburg,15,False,False,True,Electric
2,9263ae10b5a817975d8875fc3d87540b8a3b12fb,2018-08-03 13:20:00 UTC,cool,hold,735,780,780,WV,Bridgeport,35,False,False,False,Gas
3,246860e7d957e2c4ba4215019099373118da02e8,2018-08-08 16:35:00 UTC,auto,hold,734,739,689,WV,Martinsburg,15,False,False,True,Electric
4,9263ae10b5a817975d8875fc3d87540b8a3b12fb,2018-08-03 19:50:00 UTC,cool,hold,747,780,780,WV,Bridgeport,35,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143246,63556f5cb7de21663d24daa27209c6919282954e,2018-08-22 13:35:00 UTC,cool,auto,713,710,710,WV,Martinsburg,5,True,False,True,Electric
143247,63556f5cb7de21663d24daa27209c6919282954e,2018-08-02 16:00:00 UTC,cool,auto,708,710,710,WV,Martinsburg,5,True,False,True,Electric
143248,63556f5cb7de21663d24daa27209c6919282954e,2018-08-22 13:50:00 UTC,cool,auto,710,710,710,WV,Martinsburg,5,True,False,True,Electric
143249,63556f5cb7de21663d24daa27209c6919282954e,2018-08-02 13:55:00 UTC,cool,auto,698,710,710,WV,Martinsburg,5,True,False,True,Electric


In [142]:
# Add year and month

aug_2018["Year"] = "2018"
aug_2018["Month"] = "aug"

In [143]:
# Rename columns to label the aggregates

aug_2018 = aug_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [144]:
aug_2018_ave = aug_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2018_ave

In [145]:
# Export CSV file

aug_2018_ave.to_csv("data/day/WV/aug/aug_2018_ave.csv", header=True, index=True)

### 2019 August Day

In [146]:
# Read in month csv for state
aug_2019 = pd.read_csv("../data_large/WV-day/2019-aug-day-WV.csv")

# aug_2019

In [147]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2019.drop(aug_2019[aug_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,f4fb6e08d6ada03b266cf5956cc240fe75fa6aa8,2019-08-18 16:00:00 UTC,auto,hold,726,722,672,WV,Scott Depot,0,True,False,True,Electric
1,f4fb6e08d6ada03b266cf5956cc240fe75fa6aa8,2019-08-17 18:45:00 UTC,auto,hold,725,722,672,WV,Scott Depot,0,True,False,True,Electric
2,9263ae10b5a817975d8875fc3d87540b8a3b12fb,2019-08-19 18:00:00 UTC,cool,hold,755,735,735,WV,Bridgeport,35,False,False,False,Gas
3,50d744a2a3e3290d451f1af80688b8e295ac16d4,2019-08-07 17:50:00 UTC,cool,hold,758,755,755,WV,Parkersburg,49,False,False,False,Gas
4,1fd7def9414b33a56954346cad6ad53d4e72558b,2019-08-13 12:55:00 UTC,cool,hold,728,713,707,WV,Charleston,25,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
178040,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,2019-08-13 13:05:00 UTC,cool,hold,760,760,760,WV,Grafton,9,False,False,True,Electric
178041,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,2019-08-11 13:10:00 UTC,cool,hold,713,760,760,WV,Grafton,9,False,False,True,Electric
178042,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,2019-08-11 13:35:00 UTC,cool,hold,714,760,760,WV,Grafton,9,False,False,True,Electric
178043,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,2019-08-12 17:10:00 UTC,cool,hold,742,760,760,WV,Grafton,9,False,False,True,Electric


In [148]:
# Add year and month

aug_2019["Year"] = "2019"
aug_2019["Month"] = "aug"

In [149]:
# Rename columns to label the aggregates

aug_2019 = aug_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [150]:
aug_2019_ave = aug_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2019_ave

In [151]:
# Export CSV file

aug_2019_ave.to_csv("data/day/WV/aug/aug_2019_ave.csv", header=True, index=True)

### 2020 August Day

In [152]:
# Read in month csv for state
aug_2020 = pd.read_csv("../data_large/WV-day/2020-aug-day-WV.csv")

# aug_2020

In [153]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
aug_2020.drop(aug_2020[aug_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

aug_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,9821a3beb603b06956ca7a8b27c9b53c5883e15a,2020-08-30 19:15:00 UTC,cool,hold,755,752,752,WV,St. Albans,35,False,False,True,Electric
1,a171c25bbd2535b4f8ae71c49e51da2108b0915c,2020-08-08 17:05:00 UTC,auto,hold,706,703,653,WV,Charleston,0,False,False,True,Electric
2,a3ab3b9db52ef8217d835cf6745a1e41f6c41777,2020-08-04 07:15:00 UTC,cool,hold,658,655,655,WV,Elkins,30,True,False,False,Gas
3,a171c25bbd2535b4f8ae71c49e51da2108b0915c,2020-08-09 13:55:00 UTC,auto,hold,696,693,643,WV,Charleston,0,False,False,True,Electric
4,a3ab3b9db52ef8217d835cf6745a1e41f6c41777,2020-08-04 10:20:00 UTC,cool,hold,656,655,655,WV,Elkins,30,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172176,e3ac2724c88ea899b680fbe05317441945a88774,2020-08-01 15:35:00 UTC,cool,hold,759,760,760,WV,Weirton,50,False,False,False,Gas
172177,e3ac2724c88ea899b680fbe05317441945a88774,2020-08-02 19:00:00 UTC,cool,hold,759,760,760,WV,Weirton,50,False,False,False,Gas
172178,e3ac2724c88ea899b680fbe05317441945a88774,2020-08-12 09:45:00 UTC,cool,hold,758,760,760,WV,Weirton,50,False,False,False,Gas
172179,e3ac2724c88ea899b680fbe05317441945a88774,2020-08-10 11:40:00 UTC,cool,hold,758,760,760,WV,Weirton,50,False,False,False,Gas


In [154]:
# Add year and month

aug_2020["Year"] = "2020"
aug_2020["Month"] = "aug"

In [155]:
# Rename columns to label the aggregates

aug_2020 = aug_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [156]:
aug_2020_ave = aug_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# aug_2020_ave

In [157]:
# Export CSV file

aug_2020_ave.to_csv("data/day/WV/aug/aug_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [158]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WV/aug/") if f.endswith(".csv")]

# files

In [159]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WV_aug = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WV/aug/" + file)
    WV_aug = pd.concat([WV_aug, df])
    
WV_aug

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,015c5b19f842d6361568129bab20f692ecb5ba98,aug,2017,cool,auto,South Charleston,759.700877,778.666667,620.000000,10.0,True,False,True
1,03929acf744f37ee18e539d3a2a04775de903a79,aug,2017,cool,auto,Beckley,681.958333,680.000000,640.000000,16.0,False,False,False
2,03929acf744f37ee18e539d3a2a04775de903a79,aug,2017,cool,hold,Beckley,684.310345,700.017241,699.965517,16.0,False,False,False
3,0bba27d3be343a725816c693e1fb53486f65c7b9,aug,2017,auto,auto,Morgantown,712.468750,710.000000,660.000000,45.0,False,False,False
4,0bba27d3be343a725816c693e1fb53486f65c7b9,aug,2017,auto,hold,Morgantown,710.577039,710.051360,660.021148,45.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,fa5679199538cdd2ee0a15f69119a2a23a46fe31,aug,2020,cool,hold,Mathias,721.765957,729.255319,729.255319,10.0,False,False,False
204,fd8ed83e097742d35fbb3379c61cf75319d0dcd3,aug,2020,cool,auto,Weirton,731.090258,729.656160,729.965616,50.0,False,False,False
205,fd8ed83e097742d35fbb3379c61cf75319d0dcd3,aug,2020,cool,hold,Weirton,731.458333,730.000000,730.000000,50.0,False,False,False
206,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,aug,2020,cool,auto,Grafton,697.233083,696.178303,696.178303,9.0,False,False,True


In [160]:
WV_aug.to_csv("Scraper_Output/State_Month_Day/WV/WV_aug.csv", header=True, index=False)

---

## September

---

## October

---

## November

---

## December

### 2017 December Day

In [161]:
# Read in month csv for state
dec_2017 = pd.read_csv("../data_large/WV-day/2017-dec-day-WV.csv")

# dec_2017

In [162]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2017.drop(dec_2017[dec_2017['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2017

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3a639704b31532e83117b9c9f4c7c3fd0b48e20d,2017-12-28 14:50:00 UTC,heat,hold,670,735,735,WV,Charleston,30,False,False,False,Gas
1,ee7a5c06322219523adae4b97f4b87c8deecf05e,2017-12-27 15:45:00 UTC,auto,auto,700,760,703,WV,Elkins,5,False,False,False,Gas
2,246860e7d957e2c4ba4215019099373118da02e8,2017-12-19 17:05:00 UTC,cool,hold,725,729,729,WV,Martinsburg,15,False,False,True,Electric
3,e9ed8e4d3dfed58bcfc5c134ef335dc1a39bbbc7,2017-12-27 11:45:00 UTC,heat,auto,683,696,688,WV,South Charleston,10,False,False,False,Gas
4,3a639704b31532e83117b9c9f4c7c3fd0b48e20d,2017-12-27 10:15:00 UTC,heat,hold,698,735,735,WV,Charleston,30,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79375,3eddb198e585c599f80a2cbef3e9135760567c15,2017-12-16 11:45:00 UTC,heat,auto,749,750,750,WV,Bunker Hill,10,False,False,True,Electric
79376,3eddb198e585c599f80a2cbef3e9135760567c15,2017-12-15 12:35:00 UTC,heat,auto,747,750,750,WV,Bunker Hill,10,False,False,True,Electric
79377,3eddb198e585c599f80a2cbef3e9135760567c15,2017-12-16 12:30:00 UTC,heat,auto,748,750,750,WV,Bunker Hill,10,False,False,True,Electric
79378,3eddb198e585c599f80a2cbef3e9135760567c15,2017-12-14 19:05:00 UTC,heat,auto,739,750,750,WV,Bunker Hill,10,False,False,True,Electric


In [163]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "dec"

In [164]:
# Rename columns to label the aggregates

dec_2017 = dec_2017.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [165]:
dec_2017_ave = dec_2017.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2017_ave

In [166]:
# Export CSV file

dec_2017_ave.to_csv("data/day/WV/dec/dec_2017_ave.csv", header=True, index=True)

### 2018 December Day

In [167]:
# Read in month csv for state
dec_2018 = pd.read_csv("../data_large/WV-day/2018-dec-day-WV.csv")

# dec_2018

In [168]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2018.drop(dec_2018[dec_2018['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2018

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,246860e7d957e2c4ba4215019099373118da02e8,2018-12-09 11:50:00 UTC,auto,hold,697,747,697,WV,Martinsburg,15,False,False,True,Electric
1,246860e7d957e2c4ba4215019099373118da02e8,2018-12-23 14:10:00 UTC,auto,hold,715,759,709,WV,Martinsburg,15,False,False,True,Electric
2,246860e7d957e2c4ba4215019099373118da02e8,2018-12-24 16:20:00 UTC,auto,hold,696,769,689,WV,Martinsburg,15,False,False,True,Electric
3,c6d4ab35cb2e64792ebcbe2067181056840b499d,2018-12-01 19:05:00 UTC,heat,hold,696,732,684,WV,Weirton,45,False,False,False,Gas
4,246860e7d957e2c4ba4215019099373118da02e8,2018-12-26 15:25:00 UTC,auto,hold,700,747,697,WV,Martinsburg,15,False,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
149233,6b32cedc085398935ffbf4b5490abbbf581d1419,2018-12-01 14:45:00 UTC,heat,hold,715,710,710,WV,Huntington,40,False,False,False,Gas
149234,6b32cedc085398935ffbf4b5490abbbf581d1419,2018-12-07 17:50:00 UTC,heat,hold,708,710,710,WV,Huntington,40,False,False,False,Gas
149235,6b32cedc085398935ffbf4b5490abbbf581d1419,2018-12-21 12:35:00 UTC,heat,hold,707,710,710,WV,Huntington,40,False,False,False,Gas
149236,6b32cedc085398935ffbf4b5490abbbf581d1419,2018-12-12 13:30:00 UTC,heat,hold,706,710,710,WV,Huntington,40,False,False,False,Gas


In [169]:
# Add year and month

dec_2018["Year"] = "2018"
dec_2018["Month"] = "dec"

In [170]:
# Rename columns to label the aggregates

dec_2018 = dec_2018.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [171]:
dec_2018_ave = dec_2018.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2018_ave

In [172]:
# Export CSV file

dec_2018_ave.to_csv("data/day/WV/dec/dec_2018_ave.csv", header=True, index=True)

### 2019 December Day

In [173]:
# Read in month csv for state
dec_2019 = pd.read_csv("../data_large/WV-day/2019-dec-day-WV.csv")

# dec_2019

In [174]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2019.drop(dec_2019[dec_2019['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2019

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,a171c25bbd2535b4f8ae71c49e51da2108b0915c,2019-12-03 15:35:00 UTC,auto,hold,674,733,683,WV,Charleston,0,False,False,True,Electric
1,36a6eea1de28b712fe8c429a3f35722814041d48,2019-12-21 19:00:00 UTC,heat,hold,684,693,687,WV,Beckley,16,False,False,False,Gas
3,6e2181547d2ec897347a39aa3ea81bc364ffd86e,2019-12-27 14:10:00 UTC,heat,hold,736,735,735,WV,Martinsburg,9,True,False,True,Electric
4,179e7b68e5ee4f361b7af804ec2f82a0a80e3837,2019-12-21 13:10:00 UTC,heat,auto,718,730,719,WV,Scott Depot,19,True,False,False,Gas
5,ebe4d1dc76665deeab680bc515f1cca9649b3c27,2019-12-26 15:50:00 UTC,heat,auto,753,770,770,WV,Huntington,90,True,False,True,Electric
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
182247,ebef48ea1dc3b5809b8b872e507ca1e1eb83f8a7,2019-12-27 15:35:00 UTC,cool,hold,746,760,760,WV,Snowshoe,10,True,False,True,Electric
182248,ebef48ea1dc3b5809b8b872e507ca1e1eb83f8a7,2019-12-27 19:50:00 UTC,cool,hold,742,760,760,WV,Snowshoe,10,True,False,True,Electric
182249,ebef48ea1dc3b5809b8b872e507ca1e1eb83f8a7,2019-12-27 19:30:00 UTC,cool,hold,742,760,760,WV,Snowshoe,10,True,False,True,Electric
182250,ebef48ea1dc3b5809b8b872e507ca1e1eb83f8a7,2019-12-27 11:05:00 UTC,cool,hold,739,760,760,WV,Snowshoe,10,True,False,True,Electric


In [175]:
# Add year and month

dec_2019["Year"] = "2019"
dec_2019["Month"] = "dec"

In [176]:
# Rename columns to label the aggregates

dec_2019 = dec_2019.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [177]:
dec_2019_ave = dec_2019.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2019_ave

In [178]:
# Export CSV file

dec_2019_ave.to_csv("data/day/WV/dec/dec_2019_ave.csv", header=True, index=True)

### 2020 December Day

In [179]:
# Read in month csv for state
dec_2020 = pd.read_csv("../data_large/WV-day/2020-dec-day-WV.csv")

# dec_2020

In [180]:
# Remove predetermined outliers before aggregating.
# TemperatureExpectedCool >= 850
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] >= 850].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] >= 850].index, inplace = True)

# TemperatureExpectedHeat <= 600
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedCool'] <= 600].index, inplace = True)
dec_2020.drop(dec_2020[dec_2020['TemperatureExpectedHeat'] <= 600].index, inplace = True)

dec_2020

Unnamed: 0,Identifier,date_time,HvacMode,CalendarEvent,Temperature_ctrl,TemperatureExpectedCool,TemperatureExpectedHeat,ProvinceState,City,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2020-12-28 11:45:00 UTC,heat,hold,731,686,686,WV,Charles Town,15,True,False,False,Gas
1,3ab0f299b591b7f461a1c30ded0e5181f288f52d,2020-12-09 19:25:00 UTC,heat,hold,712,696,696,WV,Charles Town,15,True,False,False,Gas
2,4d2d2b09b53a479ced061b0225d0e510345371bc,2020-12-23 12:35:00 UTC,heat,hold,667,668,668,WV,Huntington,80,False,False,False,Gas
3,2b584ae8a7a7dc1c609a72f83458b092367c2398,2020-12-21 16:10:00 UTC,heat,hold,701,697,697,WV,Morgantown,50,True,False,False,Gas
4,2b584ae8a7a7dc1c609a72f83458b092367c2398,2020-12-03 18:35:00 UTC,heat,hold,693,697,697,WV,Morgantown,50,True,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146992,8d07d3fa29ef90a0243d6dd6cc3781acfe7f8797,2020-12-26 17:40:00 UTC,heat,hold,701,700,700,WV,Huntington,80,False,False,False,Gas
146993,8d07d3fa29ef90a0243d6dd6cc3781acfe7f8797,2020-12-20 15:55:00 UTC,heat,hold,689,700,700,WV,Huntington,80,False,False,False,Gas
146994,8d07d3fa29ef90a0243d6dd6cc3781acfe7f8797,2020-12-25 19:05:00 UTC,heat,hold,698,700,700,WV,Huntington,80,False,False,False,Gas
146995,8d07d3fa29ef90a0243d6dd6cc3781acfe7f8797,2020-12-16 11:55:00 UTC,heat,auto,696,700,700,WV,Huntington,80,False,False,False,Gas


In [181]:
# Add year and month

dec_2020["Year"] = "2020"
dec_2020["Month"] = "dec"

In [182]:
# Rename columns to label the aggregates

dec_2020 = dec_2020.rename(columns={"Temperature_ctrl":"Temperature_ctrl_ave", 
                         "TemperatureExpectedCool":"TemperatureExpectedCool_ave", 
                         "TemperatureExpectedHeat":"TemperatureExpectedHeat_ave"})

In [183]:
dec_2020_ave = dec_2020.groupby(['Identifier', 'Month', 'Year','HvacMode','CalendarEvent', 'City']).mean()

# dec_2020_ave

In [184]:
# Export CSV file

dec_2020_ave.to_csv("data/day/WV/dec/dec_2020_ave.csv", header=True, index=True)

---

### Combine month CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [185]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/WV/dec/") if f.endswith(".csv")]

# files

In [186]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WV_dec = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/WV/dec/" + file)
    WV_dec = pd.concat([WV_dec, df])
    
WV_dec

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,015c5b19f842d6361568129bab20f692ecb5ba98,dec,2017,heat,auto,South Charleston,637.836187,651.702453,632.767237,10.0,True,False,True
1,01878b6476cf734f4d41081dfd2a989223288eb0,dec,2017,auto,hold,milton,659.925926,704.408163,651.182918,80.0,False,False,False
2,01878b6476cf734f4d41081dfd2a989223288eb0,dec,2017,heat,hold,milton,669.926667,671.533333,671.533333,80.0,False,False,False
3,03929acf744f37ee18e539d3a2a04775de903a79,dec,2017,auto,auto,Beckley,645.800000,695.000000,645.000000,16.0,False,False,False
4,03929acf744f37ee18e539d3a2a04775de903a79,dec,2017,heat,auto,Beckley,639.000000,707.000000,657.000000,16.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
212,fa5679199538cdd2ee0a15f69119a2a23a46fe31,dec,2020,heat,hold,Mathias,693.323770,706.434426,706.434426,10.0,False,False,False
213,fd8ed83e097742d35fbb3379c61cf75319d0dcd3,dec,2020,heat,auto,Weirton,661.813192,667.178248,667.178248,50.0,False,False,False
214,fd8ed83e097742d35fbb3379c61cf75319d0dcd3,dec,2020,heat,hold,Weirton,666.358033,671.682133,671.565097,50.0,False,False,False
215,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,dec,2020,heat,auto,Grafton,656.305882,660.047059,660.047059,9.0,False,False,True


In [187]:
WV_dec.to_csv("Scraper_Output/State_Month_Day/WV/WV_dec.csv", header=True, index=False)

----

----

---

### Combine state CSV Files 
1. Read in files in folders for each state
2. Export as combined CSV

In [188]:
# Create variable for files in directory
files = [f for f in os.listdir("Scraper_Output/State_Month_Day/WV/") if f.endswith(".csv")]

# files

In [189]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize an empty dataframe
WV_all = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("Scraper_Output/State_Month_Day/WV/" + file)
    WV_all = pd.concat([WV_all, df])
    
WV_all

Unnamed: 0,Identifier,Month,Year,HvacMode,CalendarEvent,City,Temperature_ctrl_ave,TemperatureExpectedCool_ave,TemperatureExpectedHeat_ave,Age_of_Home__years_,allowCompWithAux,Has_Electric,Has_a_Heat_Pump
0,015c5b19f842d6361568129bab20f692ecb5ba98,aug,2017,cool,auto,South Charleston,759.700877,778.666667,620.000000,10.0,True,False,True
1,03929acf744f37ee18e539d3a2a04775de903a79,aug,2017,cool,auto,Beckley,681.958333,680.000000,640.000000,16.0,False,False,False
2,03929acf744f37ee18e539d3a2a04775de903a79,aug,2017,cool,hold,Beckley,684.310345,700.017241,699.965517,16.0,False,False,False
3,0bba27d3be343a725816c693e1fb53486f65c7b9,aug,2017,auto,auto,Morgantown,712.468750,710.000000,660.000000,45.0,False,False,False
4,0bba27d3be343a725816c693e1fb53486f65c7b9,aug,2017,auto,hold,Morgantown,710.577039,710.051360,660.021148,45.0,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
847,fa5679199538cdd2ee0a15f69119a2a23a46fe31,jun,2021,cool,hold,Mathias,727.700000,720.000000,720.000000,10.0,False,False,False
848,fd8ed83e097742d35fbb3379c61cf75319d0dcd3,jun,2021,cool,hold,Weirton,730.302570,728.516355,728.516355,50.0,False,False,False
849,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,jun,2021,cool,hold,Grafton,741.885314,762.298821,762.298821,9.0,False,False,True
850,ff8a4bb610de573c93eeaa00f357ba5d15d31d72,jun,2021,heat,hold,Grafton,725.367647,684.000000,684.000000,9.0,False,False,True


In [190]:
WV_all.to_csv("Scraper_Output/State_Month_Day/WV_all_day.csv", header=True, index=False)

In [191]:
# Datacheck to mWVe sure state was selected correctly in BQ sql queries

print(f"Unique jan_2017: {jan_2017['ProvinceState'].unique()}")
print(f"Unique jan_2018: {jan_2018['ProvinceState'].unique()}")
print(f"Unique jan_2019: {jan_2019['ProvinceState'].unique()}")
print(f"Unique jan_2020: {jan_2020['ProvinceState'].unique()}")
print(f"Unique jan_2021: {jan_2021['ProvinceState'].unique()}")
print(f"Unique feb_2017: {feb_2017['ProvinceState'].unique()}")
print(f"Unique feb_2018: {feb_2018['ProvinceState'].unique()}")
print(f"Unique feb_2019: {feb_2019['ProvinceState'].unique()}")
print(f"Unique feb_2020: {feb_2020['ProvinceState'].unique()}")
print(f"Unique feb_2021: {feb_2021['ProvinceState'].unique()}")
print(f"Unique jun_2017: {jun_2017['ProvinceState'].unique()}")
print(f"Unique jun_2018: {jun_2018['ProvinceState'].unique()}")
print(f"Unique jun_2019: {jun_2019['ProvinceState'].unique()}")
print(f"Unique jun_2020: {jun_2020['ProvinceState'].unique()}")
print(f"Unique jun_2021: {jun_2021['ProvinceState'].unique()}")
print(f"Unique jul_2017: {jul_2017['ProvinceState'].unique()}")
print(f"Unique jul_2018: {jul_2018['ProvinceState'].unique()}")
print(f"Unique jul_2019: {jul_2019['ProvinceState'].unique()}")
print(f"Unique jul_2020: {jul_2020['ProvinceState'].unique()}")
print(f"Unique jul_2021: {jul_2021['ProvinceState'].unique()}")
print(f"Unique aug_2017: {aug_2017['ProvinceState'].unique()}")
print(f"Unique aug_2018: {aug_2018['ProvinceState'].unique()}")
print(f"Unique aug_2019: {aug_2019['ProvinceState'].unique()}")
print(f"Unique aug_2020: {aug_2020['ProvinceState'].unique()}")
print(f"Unique dec_2017: {dec_2017['ProvinceState'].unique()}")
print(f"Unique dec_2018: {dec_2018['ProvinceState'].unique()}")
print(f"Unique dec_2019: {dec_2019['ProvinceState'].unique()}")
print(f"Unique dec_2020: {dec_2020['ProvinceState'].unique()}")

Unique jan_2017: ['WV']
Unique jan_2018: ['WV']
Unique jan_2019: ['WV']
Unique jan_2020: ['WV']
Unique jan_2021: ['WV']
Unique feb_2017: ['WV']
Unique feb_2018: ['WV']
Unique feb_2019: ['WV']
Unique feb_2020: ['WV']
Unique feb_2021: ['WV']
Unique jun_2017: ['WV']
Unique jun_2018: ['WV']
Unique jun_2019: ['WV']
Unique jun_2020: ['WV']
Unique jun_2021: ['WV']
Unique jul_2017: ['WV']
Unique jul_2018: ['WV']
Unique jul_2019: ['WV']
Unique jul_2020: ['WV']
Unique jul_2021: ['WV']
Unique aug_2017: ['WV']
Unique aug_2018: ['WV']
Unique aug_2019: ['WV']
Unique aug_2020: ['WV']
Unique dec_2017: ['WV']
Unique dec_2018: ['WV']
Unique dec_2019: ['WV']
Unique dec_2020: ['WV']
