# DYD Thermostat Data 

## Preprocess

1. Generated CSV file from queries in BigQueary

2. Data separated into states

3. Aggregated the data in BigQuery by vintage of home (every 5 years) and by month



In [1]:
# Dependencies
import pandas as pd
import os
import numpy as np
from pathlib import Path
from datetime import datetime

---

## Combine state CSV Files 
1. Read in files in folders by year and month
2. Add year and month to each merged file

### 2017 January Day

In [2]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/2017/jan/") if f.endswith(".csv")]

# files

In [3]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize and empty dataframe
jan_2017 = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/2017/jan/" + file)
    jan_2017 = pd.concat([jan_2017, df])
    
jan_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,674.860140,671.552448,665.972028,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0,heat,auto,Home,False,False,False,Gas
1,700.400000,697.600000,697.600000,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0,heat,hold,Home,False,False,False,Gas
2,689.749196,766.485531,685.254019,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,auto,Home,False,False,False,Gas
3,700.015385,703.780000,703.647692,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,hold,Home,False,False,False,Gas
4,644.954545,710.000000,710.000000,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,hold,Home,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,675.750000,730.000000,680.000000,7249632b05298378ea9944b850b084262a1c3cf3,US,WV,Maidsville,0,auto,hold,Home,True,False,True,Electric
10,715.203046,724.822335,724.822335,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,cool,auto,Home,False,False,False,Gas
11,722.718750,775.000000,725.000000,3a38dd3de8441afd6215d6a91bf033c2c646b786,US,WV,Morgantown,0,auto,auto,Home,True,False,True,Electric
12,690.000000,722.000000,672.000000,527196dd67df36085c7eb0f7137ef298e0bb60b1,US,WV,Charles Town,0,auto,auto,Home,False,False,False,Gas


In [4]:
# See all the unique names for states

# jan_2017.ProvinceState.unique()

In [5]:
# Add year and month

jan_2017["Year"] = "2017"
jan_2017["Month"] = "Jan"

jan_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type,Year,Month
0,674.860140,671.552448,665.972028,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0,heat,auto,Home,False,False,False,Gas,2017,Jan
1,700.400000,697.600000,697.600000,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0,heat,hold,Home,False,False,False,Gas,2017,Jan
2,689.749196,766.485531,685.254019,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,auto,Home,False,False,False,Gas,2017,Jan
3,700.015385,703.780000,703.647692,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,hold,Home,False,False,False,Gas,2017,Jan
4,644.954545,710.000000,710.000000,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,hold,Home,False,False,False,Gas,2017,Jan
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,675.750000,730.000000,680.000000,7249632b05298378ea9944b850b084262a1c3cf3,US,WV,Maidsville,0,auto,hold,Home,True,False,True,Electric,2017,Jan
10,715.203046,724.822335,724.822335,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,cool,auto,Home,False,False,False,Gas,2017,Jan
11,722.718750,775.000000,725.000000,3a38dd3de8441afd6215d6a91bf033c2c646b786,US,WV,Morgantown,0,auto,auto,Home,True,False,True,Electric,2017,Jan
12,690.000000,722.000000,672.000000,527196dd67df36085c7eb0f7137ef298e0bb60b1,US,WV,Charles Town,0,auto,auto,Home,False,False,False,Gas,2017,Jan


### 2017 February Day

In [6]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/2017/feb/") if f.endswith(".csv")]

# files

In [7]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize and empty dataframe
feb_2017 = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/2017/feb/" + file)
    feb_2017 = pd.concat([feb_2017, df])
    
feb_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,694.970588,682.500000,680.735294,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0,heat,auto,Home,False,False,False,Gas
1,685.013699,686.835616,686.109589,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,hold,Home,True,False,False,Gas
2,593.161905,759.380952,655.857143,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,auto,Home,False,False,False,Gas
3,628.294118,650.000000,620.000000,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,hold,Home,False,False,False,Gas
4,718.041270,720.095238,719.950794,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,hold,Home,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7,719.593291,727.259958,707.297694,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,auto,auto,Home,False,False,False,Gas
8,729.894097,776.666667,726.666667,3a38dd3de8441afd6215d6a91bf033c2c646b786,US,WV,Morgantown,0,auto,hold,Home,True,False,True,Electric
9,705.145455,775.000000,725.000000,3a38dd3de8441afd6215d6a91bf033c2c646b786,US,WV,Morgantown,0,auto,auto,Home,True,False,True,Electric
10,741.208333,738.500000,738.500000,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,cool,hold,Home,False,False,False,Gas


In [8]:
# Add year and month

feb_2017["Year"] = "2017"
feb_2017["Month"] = "Feb"

feb_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type,Year,Month
0,694.970588,682.500000,680.735294,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0,heat,auto,Home,False,False,False,Gas,2017,Feb
1,685.013699,686.835616,686.109589,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,hold,Home,True,False,False,Gas,2017,Feb
2,593.161905,759.380952,655.857143,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,auto,Home,False,False,False,Gas,2017,Feb
3,628.294118,650.000000,620.000000,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,hold,Home,False,False,False,Gas,2017,Feb
4,718.041270,720.095238,719.950794,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,hold,Home,False,False,False,Gas,2017,Feb
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7,719.593291,727.259958,707.297694,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,auto,auto,Home,False,False,False,Gas,2017,Feb
8,729.894097,776.666667,726.666667,3a38dd3de8441afd6215d6a91bf033c2c646b786,US,WV,Morgantown,0,auto,hold,Home,True,False,True,Electric,2017,Feb
9,705.145455,775.000000,725.000000,3a38dd3de8441afd6215d6a91bf033c2c646b786,US,WV,Morgantown,0,auto,auto,Home,True,False,True,Electric,2017,Feb
10,741.208333,738.500000,738.500000,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,cool,hold,Home,False,False,False,Gas,2017,Feb


### 2017 March Day

In [9]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/2017/mar/") if f.endswith(".csv")]

# files

In [10]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize and empty dataframe
mar_2017 = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/2017/mar/" + file)
    mar_2017 = pd.concat([mar_2017, df])
    
mar_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,683.496144,681.197943,679.760925,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,hold,Home,True,False,False,Gas
1,686.531250,690.187500,683.406250,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,auto,Home,True,False,False,Gas
2,702.750000,650.000000,646.250000,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,off,hold,Home,True,False,False,Gas
3,595.928571,733.301587,663.563492,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,auto,Home,False,False,False,Gas
4,694.961397,697.871324,694.862132,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,hold,Home,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,729.905797,778.043478,725.434783,3a38dd3de8441afd6215d6a91bf033c2c646b786,US,WV,Morgantown,0,auto,hold,Home,True,False,True,Electric
10,724.534577,728.379802,728.230516,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,heat,auto,Home,False,False,False,Gas
11,722.788136,720.338983,726.262712,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,cool,auto,Home,False,False,False,Gas
12,723.314286,724.653061,724.653061,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,cool,hold,Home,False,False,False,Gas


In [11]:
# Add year and month

mar_2017["Year"] = "2017"
mar_2017["Month"] = "Mar"

mar_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type,Year,Month
0,683.496144,681.197943,679.760925,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,hold,Home,True,False,False,Gas,2017,Mar
1,686.531250,690.187500,683.406250,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,auto,Home,True,False,False,Gas,2017,Mar
2,702.750000,650.000000,646.250000,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,off,hold,Home,True,False,False,Gas,2017,Mar
3,595.928571,733.301587,663.563492,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,auto,Home,False,False,False,Gas,2017,Mar
4,694.961397,697.871324,694.862132,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,hold,Home,False,False,False,Gas,2017,Mar
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,729.905797,778.043478,725.434783,3a38dd3de8441afd6215d6a91bf033c2c646b786,US,WV,Morgantown,0,auto,hold,Home,True,False,True,Electric,2017,Mar
10,724.534577,728.379802,728.230516,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,heat,auto,Home,False,False,False,Gas,2017,Mar
11,722.788136,720.338983,726.262712,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,cool,auto,Home,False,False,False,Gas,2017,Mar
12,723.314286,724.653061,724.653061,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,cool,hold,Home,False,False,False,Gas,2017,Mar


### 2017 April Day

In [12]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/2017/apr/") if f.endswith(".csv")]

# files

In [13]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize and empty dataframe
apr_2017 = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/2017/apr/" + file)
    apr_2017 = pd.concat([apr_2017, df])
    
apr_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,687.312500,664.000000,659.500000,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,auto,Home,True,False,False,Gas
1,681.337531,680.911839,678.256927,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,hold,Home,True,False,False,Gas
2,666.000000,752.000000,685.000000,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,hold,Home,False,False,False,Gas
3,717.121622,720.608108,719.695946,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,auto,Home,False,False,False,Gas
4,634.017857,650.000000,620.000000,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,hold,Home,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13,720.231092,763.571429,713.571429,3a38dd3de8441afd6215d6a91bf033c2c646b786,US,WV,Morgantown,0,auto,hold,Home,True,False,True,Electric
14,728.316964,729.084821,709.361607,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,auto,auto,Home,False,False,False,Gas
15,723.423188,726.275362,720.278261,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,cool,auto,Home,False,False,False,Gas
0,740.763033,750.056872,735.658768,a3a3fd48e1b69634ca3859998f7684d1593abcda,US,WY,Cheyenne,0,heat,auto,Home,False,False,False,Gas


In [14]:
# Add year and month

apr_2017["Year"] = "2017"
apr_2017["Month"] = "Apr"

apr_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type,Year,Month
0,687.312500,664.000000,659.500000,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,auto,Home,True,False,False,Gas,2017,Apr
1,681.337531,680.911839,678.256927,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,hold,Home,True,False,False,Gas,2017,Apr
2,666.000000,752.000000,685.000000,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,hold,Home,False,False,False,Gas,2017,Apr
3,717.121622,720.608108,719.695946,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,auto,Home,False,False,False,Gas,2017,Apr
4,634.017857,650.000000,620.000000,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,hold,Home,False,False,False,Gas,2017,Apr
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13,720.231092,763.571429,713.571429,3a38dd3de8441afd6215d6a91bf033c2c646b786,US,WV,Morgantown,0,auto,hold,Home,True,False,True,Electric,2017,Apr
14,728.316964,729.084821,709.361607,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,auto,auto,Home,False,False,False,Gas,2017,Apr
15,723.423188,726.275362,720.278261,0294595998430fa9228ff8b203c54f8378f9560c,US,WV,Huntington,0,cool,auto,Home,False,False,False,Gas,2017,Apr
0,740.763033,750.056872,735.658768,a3a3fd48e1b69634ca3859998f7684d1593abcda,US,WY,Cheyenne,0,heat,auto,Home,False,False,False,Gas,2017,Apr


### 2017 May Day

In [15]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/2017/may/") if f.endswith(".csv")]

# files

In [16]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize and empty dataframe
may_2017 = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/2017/may/" + file)
    may_2017 = pd.concat([may_2017, df])
    
may_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,687.872619,671.776190,671.626190,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,hold,Home,False,False,False,Gas
1,673.013605,672.510204,662.193878,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,hold,Home,True,False,False,Gas
2,680.991497,670.000000,670.000000,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,auto,Home,False,False,False,Gas
3,679.666667,680.000000,680.000000,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0,heat,hold,Home,False,False,False,Gas
4,692.717391,699.913043,696.326087,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0,heat,auto,Home,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,712.457031,716.871094,666.871094,c5b31dacb5cc3a1c181dbf7a905df96d0dfdc347,US,WV,Morgantown,0,auto,auto,Home,False,False,False,Gas
7,726.533520,741.502793,677.004190,c5b31dacb5cc3a1c181dbf7a905df96d0dfdc347,US,WV,Morgantown,0,auto,hold,Home,False,False,False,Gas
8,756.384615,740.000000,740.000000,3a38dd3de8441afd6215d6a91bf033c2c646b786,US,WV,Morgantown,0,cool,hold,Home,True,False,True,Electric
0,682.579139,762.199515,681.561552,9d6a98350302417c9b2f10ae3c235d52a3bf7097,US,WY,Sheridan,0,auto,hold,Home,False,False,False,Gas


In [17]:
# Add year and month

may_2017["Year"] = "2017"
may_2017["Month"] = "May"

may_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type,Year,Month
0,687.872619,671.776190,671.626190,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,hold,Home,False,False,False,Gas,2017,May
1,673.013605,672.510204,662.193878,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,hold,Home,True,False,False,Gas,2017,May
2,680.991497,670.000000,670.000000,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,auto,Home,False,False,False,Gas,2017,May
3,679.666667,680.000000,680.000000,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0,heat,hold,Home,False,False,False,Gas,2017,May
4,692.717391,699.913043,696.326087,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0,heat,auto,Home,False,False,False,Gas,2017,May
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6,712.457031,716.871094,666.871094,c5b31dacb5cc3a1c181dbf7a905df96d0dfdc347,US,WV,Morgantown,0,auto,auto,Home,False,False,False,Gas,2017,May
7,726.533520,741.502793,677.004190,c5b31dacb5cc3a1c181dbf7a905df96d0dfdc347,US,WV,Morgantown,0,auto,hold,Home,False,False,False,Gas,2017,May
8,756.384615,740.000000,740.000000,3a38dd3de8441afd6215d6a91bf033c2c646b786,US,WV,Morgantown,0,cool,hold,Home,True,False,True,Electric,2017,May
0,682.579139,762.199515,681.561552,9d6a98350302417c9b2f10ae3c235d52a3bf7097,US,WY,Sheridan,0,auto,hold,Home,False,False,False,Gas,2017,May


### 2017 June Day

In [18]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/2017/jun/") if f.endswith(".csv")]

# files

In [19]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize and empty dataframe
jun_2017 = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/2017/jun/" + file)
    jun_2017 = pd.concat([jun_2017, df])
    
jun_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,691.066038,650.000000,650.000000,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,auto,Home,False,False,False,Gas
1,685.781657,654.857738,654.857738,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,hold,Home,False,False,False,Gas
2,668.244094,667.244094,660.929134,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,hold,Home,True,False,False,Gas
3,705.428571,736.285714,710.571429,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,auto,Home,False,False,False,Gas
4,695.549020,684.098039,683.823529,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,hold,Home,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,686.971429,689.714286,629.428571,f710bb7d9a4a4033f351d3fe907edc4564fe1892,US,WV,Morgantown,0,auto,auto,Home,False,False,False,Gas
5,704.833333,680.000000,630.000000,527196dd67df36085c7eb0f7137ef298e0bb60b1,US,WV,Charles Town,0,auto,hold,Home,False,False,False,Gas
6,692.975490,692.941176,630.000000,7249632b05298378ea9944b850b084262a1c3cf3,US,WV,Maidsville,0,auto,hold,Home,True,False,True,Electric
7,720.463436,720.000000,670.000000,23ea815e8d635bf755b4f45c8594fba04bf97e35,US,WV,Morgantown,0,auto,hold,Home,False,False,False,Gas


In [20]:
# Add year and month

jun_2017["Year"] = "2017"
jun_2017["Month"] = "Jun"

jun_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type,Year,Month
0,691.066038,650.000000,650.000000,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,auto,Home,False,False,False,Gas,2017,Jun
1,685.781657,654.857738,654.857738,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,hold,Home,False,False,False,Gas,2017,Jun
2,668.244094,667.244094,660.929134,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,hold,Home,True,False,False,Gas,2017,Jun
3,705.428571,736.285714,710.571429,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,auto,Home,False,False,False,Gas,2017,Jun
4,695.549020,684.098039,683.823529,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,hold,Home,False,False,False,Gas,2017,Jun
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,686.971429,689.714286,629.428571,f710bb7d9a4a4033f351d3fe907edc4564fe1892,US,WV,Morgantown,0,auto,auto,Home,False,False,False,Gas,2017,Jun
5,704.833333,680.000000,630.000000,527196dd67df36085c7eb0f7137ef298e0bb60b1,US,WV,Charles Town,0,auto,hold,Home,False,False,False,Gas,2017,Jun
6,692.975490,692.941176,630.000000,7249632b05298378ea9944b850b084262a1c3cf3,US,WV,Maidsville,0,auto,hold,Home,True,False,True,Electric,2017,Jun
7,720.463436,720.000000,670.000000,23ea815e8d635bf755b4f45c8594fba04bf97e35,US,WV,Morgantown,0,auto,hold,Home,False,False,False,Gas,2017,Jun


### 2017 July Day

In [21]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/2017/jul/") if f.endswith(".csv")]

# files

In [22]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize and empty dataframe
jul_2017 = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/2017/jul/" + file)
    jul_2017 = pd.concat([jul_2017, df])
    
jul_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,675.649123,673.706140,668.429825,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,hold,Home,True,False,False,Gas
1,694.370968,659.693548,652.322581,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,hold,Home,False,False,False,Gas
2,679.433362,652.528310,644.569251,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,hold,Home,False,False,False,Gas
3,686.586957,690.652174,677.173913,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,auto,Home,False,False,False,Gas
4,712.683215,650.000000,600.000000,c810efa572eb24b685d4ea14eb98d7f79cac0bd3,US,AK,North Pole,0,heat,hold,Home,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7,692.583841,690.759146,690.741616,362159cb260bc35da7592691d97629ce9b8889ae,US,WV,Martinsburg,0,cool,hold,Home,False,False,True,Electric
8,690.450000,690.000000,690.000000,7249632b05298378ea9944b850b084262a1c3cf3,US,WV,Maidsville,0,cool,hold,Home,True,False,True,Electric
9,686.192771,674.638554,690.000000,7249632b05298378ea9944b850b084262a1c3cf3,US,WV,Maidsville,0,cool,auto,Home,True,False,True,Electric
0,751.487603,773.363636,698.429752,a3a3fd48e1b69634ca3859998f7684d1593abcda,US,WY,Cheyenne,0,cool,auto,Home,False,False,False,Gas


In [23]:
# Add year and month

jul_2017["Year"] = "2017"
jul_2017["Month"] = "July"

jul_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type,Year,Month
0,675.649123,673.706140,668.429825,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,hold,Home,True,False,False,Gas,2017,July
1,694.370968,659.693548,652.322581,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,hold,Home,False,False,False,Gas,2017,July
2,679.433362,652.528310,644.569251,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0,heat,hold,Home,False,False,False,Gas,2017,July
3,686.586957,690.652174,677.173913,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0,heat,auto,Home,False,False,False,Gas,2017,July
4,712.683215,650.000000,600.000000,c810efa572eb24b685d4ea14eb98d7f79cac0bd3,US,AK,North Pole,0,heat,hold,Home,False,False,False,Gas,2017,July
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7,692.583841,690.759146,690.741616,362159cb260bc35da7592691d97629ce9b8889ae,US,WV,Martinsburg,0,cool,hold,Home,False,False,True,Electric,2017,July
8,690.450000,690.000000,690.000000,7249632b05298378ea9944b850b084262a1c3cf3,US,WV,Maidsville,0,cool,hold,Home,True,False,True,Electric,2017,July
9,686.192771,674.638554,690.000000,7249632b05298378ea9944b850b084262a1c3cf3,US,WV,Maidsville,0,cool,auto,Home,True,False,True,Electric,2017,July
0,751.487603,773.363636,698.429752,a3a3fd48e1b69634ca3859998f7684d1593abcda,US,WY,Cheyenne,0,cool,auto,Home,False,False,False,Gas,2017,July


### 2017 August Day

In [24]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/2017/aug/") if f.endswith(".csv")]

# files

In [25]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize and empty dataframe
aug_2017 = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/2017/aug/" + file)
    aug_2017 = pd.concat([aug_2017, df])
    
aug_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type
0,721.333333,722.000000,719.333333,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0,heat,auto,Home,False,False,False,Gas
1,725.642857,692.035714,668.107143,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0,heat,hold,Home,False,False,False,Gas
2,669.857143,684.642857,682.285714,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,auto,Home,True,False,False,Gas
3,666.275641,673.346154,669.134615,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,hold,Home,True,False,False,Gas
4,689.200000,688.500000,670.900000,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,US,AK,Anchorage,0,heat,auto,Home,False,False,False,Gas
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2,694.679325,692.274262,679.983122,ca71fafde08a023074581a58259413f3618418fb,US,WV,Morgantown,0,cool,auto,Home,False,False,True,Electric
3,690.153662,689.515127,689.515127,7249632b05298378ea9944b850b084262a1c3cf3,US,WV,Maidsville,0,cool,hold,Home,True,False,True,Electric
4,691.965002,690.441437,670.000000,362159cb260bc35da7592691d97629ce9b8889ae,US,WV,Martinsburg,0,cool,auto,Home,False,False,True,Electric
5,721.794473,720.690846,670.000000,23ea815e8d635bf755b4f45c8594fba04bf97e35,US,WV,Morgantown,0,auto,hold,Home,False,False,False,Gas


In [26]:
# Add year and month

aug_2017["Year"] = "2017"
aug_2017["Month"] = "Aug"

aug_2017

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type,Year,Month
0,721.333333,722.000000,719.333333,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0,heat,auto,Home,False,False,False,Gas,2017,Aug
1,725.642857,692.035714,668.107143,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0,heat,hold,Home,False,False,False,Gas,2017,Aug
2,669.857143,684.642857,682.285714,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,auto,Home,True,False,False,Gas,2017,Aug
3,666.275641,673.346154,669.134615,0404ace1bcdf88d917bd860a04cd6c66fb51679f,US,AK,Homer,0,heat,hold,Home,True,False,False,Gas,2017,Aug
4,689.200000,688.500000,670.900000,2a539ed50bd05f9b142b5b5a0c9cce50f5cf0e7a,US,AK,Anchorage,0,heat,auto,Home,False,False,False,Gas,2017,Aug
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2,694.679325,692.274262,679.983122,ca71fafde08a023074581a58259413f3618418fb,US,WV,Morgantown,0,cool,auto,Home,False,False,True,Electric,2017,Aug
3,690.153662,689.515127,689.515127,7249632b05298378ea9944b850b084262a1c3cf3,US,WV,Maidsville,0,cool,hold,Home,True,False,True,Electric,2017,Aug
4,691.965002,690.441437,670.000000,362159cb260bc35da7592691d97629ce9b8889ae,US,WV,Martinsburg,0,cool,auto,Home,False,False,True,Electric,2017,Aug
5,721.794473,720.690846,670.000000,23ea815e8d635bf755b4f45c8594fba04bf97e35,US,WV,Morgantown,0,auto,hold,Home,False,False,False,Gas,2017,Aug


### 2017 September Day

In [27]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/2017/sep/") if f.endswith(".csv")]

# files

In [28]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize and empty dataframe
sep_2017 = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/2017/sep/" + file)
    sep_2017 = pd.concat([sep_2017, df])
    
sep_2017

In [29]:
# Add year and month

sep_2017["Year"] = "2017"
sep_2017["Month"] = "Sep"

sep_2017

Unnamed: 0,Year,Month


### 2017 October Day

In [30]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/2017/oct/") if f.endswith(".csv")]

# files

In [31]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize and empty dataframe
oct_2017 = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/2017/oct/" + file)
    oct_2017 = pd.concat([oct_2017, df])
    
oct_2017

In [32]:
# Add year and month

oct_2017["Year"] = "2017"
oct_2017["Month"] = "Oct"

oct_2017

Unnamed: 0,Year,Month


### 2017 November Day

In [33]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/2017/nov/") if f.endswith(".csv")]

# files

In [34]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize and empty dataframe
nov_2017 = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/2017/nov/" + file)
    nov_2017 = pd.concat([nov_2017, df])
    
nov_2017

In [35]:
# Add year and month

nov_2017["Year"] = "2017"
nov_2017["Month"] = "Nov"

nov_2017

Unnamed: 0,Year,Month


### 2017 December Day

In [36]:
# Create variable for files in directory
files = [f for f in os.listdir("data/day/2017/dec/") if f.endswith(".csv")]

# files

In [37]:
# https://stackoverflow.com/questions/63886787/how-to-create-a-dataframe-from-multiple-csv-files
# Initialize and empty dataframe
dec_2017 = pd.DataFrame()

# Iterate through files and contents, then concatenate the data into the dataframe
for file in files:
    df = pd.read_csv("data/day/2017/dec/" + file)
    dec_2017 = pd.concat([dec_2017, df])
    
dec_2017

In [38]:
# Add year and month

dec_2017["Year"] = "2017"
dec_2017["Month"] = "Dec"

dec_2017

Unnamed: 0,Year,Month


### Merge all 2017

In [39]:
All2017Day = jan_2017.append(feb_2017, ignore_index=True)
All2017Day = All2017Day.append(mar_2017, ignore_index=True)
All2017Day = All2017Day.append(apr_2017, ignore_index=True)
All2017Day = All2017Day.append(may_2017, ignore_index=True)
All2017Day = All2017Day.append(jun_2017, ignore_index=True)
All2017Day = All2017Day.append(jul_2017, ignore_index=True)
All2017Day = All2017Day.append(aug_2017, ignore_index=True)
All2017Day = All2017Day.append(sep_2017, ignore_index=True)
All2017Day = All2017Day.append(oct_2017, ignore_index=True)
All2017Day = All2017Day.append(nov_2017, ignore_index=True)
All2017Day = All2017Day.append(dec_2017, ignore_index=True)


In [40]:
All2017Day

Unnamed: 0,AvgTempCtrl,AvgCool,AvgHeat,Identifier,Country,ProvinceState,City,Age_of_Home__years_,HvacMode,CalendarEvent,Climate,allowCompWithAux,Has_Electric,Has_a_Heat_Pump,Auxilliary_Heat_Fuel_Type,Year,Month
0,674.860140,671.552448,665.972028,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0.0,heat,auto,Home,False,False,False,Gas,2017,Jan
1,700.400000,697.600000,697.600000,be4f2a63b7719c2a69aa285e8a38038c29df8fa6,US,AK,Anchorage,0.0,heat,hold,Home,False,False,False,Gas,2017,Jan
2,689.749196,766.485531,685.254019,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0.0,heat,auto,Home,False,False,False,Gas,2017,Jan
3,700.015385,703.780000,703.647692,fda1f67e8423ed49933c45617ee96aef3bae04ba,US,AK,Wasilla,0.0,heat,hold,Home,False,False,False,Gas,2017,Jan
4,644.954545,710.000000,710.000000,364d90d4305e64cf8a54685a3669aca9ba7667ed,US,AK,Palmer,0.0,heat,hold,Home,False,False,False,Gas,2017,Jan
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59286,694.679325,692.274262,679.983122,ca71fafde08a023074581a58259413f3618418fb,US,WV,Morgantown,0.0,cool,auto,Home,False,False,True,Electric,2017,Aug
59287,690.153662,689.515127,689.515127,7249632b05298378ea9944b850b084262a1c3cf3,US,WV,Maidsville,0.0,cool,hold,Home,True,False,True,Electric,2017,Aug
59288,691.965002,690.441437,670.000000,362159cb260bc35da7592691d97629ce9b8889ae,US,WV,Martinsburg,0.0,cool,auto,Home,False,False,True,Electric,2017,Aug
59289,721.794473,720.690846,670.000000,23ea815e8d635bf755b4f45c8594fba04bf97e35,US,WV,Morgantown,0.0,auto,hold,Home,False,False,False,Gas,2017,Aug


In [41]:
All2017Day.to_csv("Scraper_Output/2017-states-0.csv", header=True, index=False)