# Energy Consumption

In [None]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import re

In [None]:
!mkdir data

!curl https://www.eia.gov/state/seds/sep_update/use_all_phy_update.csv -o ./data/consumption_phy.csv
!curl https://www.eia.gov/state/seds/sep_use/total/csv/use_all_btu.csv -o ./data/consumption_btu.csv

## Importing EIA data in physical units

The following imports eia.gov energy consumption data in physical units. This contains data for the following indicators (as well as their units):

* **CLRCP** = Coal consumed by the residential sector - thousand short tons
* **DFRCP** = Distillate fuel oil consumed by the residential sector - thousand barrels
* **ESRCP** = Electricity consumed by (i.e., sold to) the residential sector - million kilowatthours
* **KSRCP** = Kerosene consumed by the residential sector - thousand barrels
* **LGRCP** = LPG consumed by the residential sector - thousand barrels
* **NGRCP** = Natural gas consumed by (delivered to) the residential sector (including supplemental gaseous fuels) - million cubic feet
* **PARCP** = All petroleum products consumed by the residential sector - thousand barrels
* **WDRCP** = Wood consumed by the residential sector - thousand cords

The following indicators are not necessarily residential, but may be interesting to look at.

* **HYTCP** = Hydroelectricity, total net generation - million kilowatthours

In [None]:
# Imports EIA energy consumption data and deletes unnecessary columns and rows.
consumption_phy = pd.read_csv('./data/consumption_phy.csv')
consumption_phy = consumption_phy.drop(['Data_Status', '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968', '1969'], axis=1)
consumption_phy = consumption_phy.drop(consumption_phy.index[910:1040])
consumption_phy = consumption_phy.reset_index()
consumption_phy = consumption_phy.drop(consumption_phy.index[5590:5723])
consumption_phy = consumption_phy.drop('index', axis=1)

# Melts the data so that the years are in a single column.
consumption_phy = pd.melt(frame=consumption_phy, id_vars=['State', 'MSN'], var_name='Year', value_name='Reading')

In [None]:
consumption_phy.head()

## Importing EIA data in BTU

The following imports eia.gov energy consumption data in BTU. This contains data for the following indicators:

* **GERCB** = Geothermal energy consumed by the residential sector.
* **LORCB** = The residential sector's share of electrical system energy losses.
* **SFRCB** = Supplemental gaseous fuels consumed by the residential sector.
* **SORCB** = Solar energy consumed by the residential sector.
* **TERCB** = Total energy consumed by the residential sector.
* **TERPB** = Total energy consumption per capita in the residential sector.
* **TNRCB** = Total energy consumed by the residential sector excluding the sector's share of electrical system energy losses.


The following indicators are not necessarily residential, but may be interesting to look at.

* **FFTCB** = Fossil fuels, total consumption.
* **HYTXB** = Hydropower, total end-use consumption.
* **NUETB** = Nuclear energy consumed for electricity generation, total.
* **RETCB** = Renewable energy total consumption.
* **TETCB** = Total energy consumption.
* **WYEGB** = Wind energy consumed for electricity generation by the electric power sector.
* **WYTXB** = Wind energy, total end-use consumption.

In [None]:
# Imports EIA energy consumption data and deletes unnecessary columns and rows.
consumption_btu = pd.read_csv('./data/consumption_btu.csv')
consumption_btu = consumption_btu.drop(['Data_Status', '1960', '1961', '1962', '1963', '1964', '1965', '1966', '1967', '1968', '1969'], axis=1)
consumption_btu = consumption_btu.drop(consumption_btu.index[1337:1528])
consumption_btu = consumption_btu.reset_index()
consumption_btu = consumption_btu.drop(consumption_btu.index[8213:8407])
consumption_btu = consumption_btu.drop('index', axis=1)

# Melts the data so that the years are in a single column.
consumption_btu = pd.melt(frame=consumption_btu, id_vars=['State', 'MSN'], var_name='Year', value_name='Reading')

In [None]:
consumption_btu.head()

## Importing the state populations data

The following imports the state populations data which was imported and cleaned from another notebook.

In [None]:
# state_dict used for remapping the state names in us_state_pop to state abbreviations.
state_dict = {'Alaska': 'AK', 'Alabama': 'AL', 'Arkansas': 'AR', 'Arizona': 'AZ', 'California': 'CA', 'Colorado': 'CO',
'Connecticut': 'CT', 'Delaware': 'DE', 'Florida': 'FL', 'Georgia': 'GA', 'Hawaii': 'HI', 'Iowa': 'IA', 'Idaho': 'ID',
'Illinois': 'IL', 'Indiana': 'IN', 'Kansas': 'KS', 'Kentucky': 'KY', 'Louisiana': 'LA', 'Massachusetts': 'MA',
'Maryland': 'MD', 'Maine': 'ME', 'Michigan': 'MI', 'Minnesota': 'MN', 'Missouri': 'MO', 'Mississippi': 'MS',
'Montana': 'MT', 'North Carolina': 'NC','North Dakota': 'ND', 'Nebraska': 'NE', 'New Hampshire': 'NH',
'New Jersey': 'NJ', 'New Mexico': 'NM', 'Nevada': 'NV', 'NY': 'New York', 'Ohio': 'OH', 'Oklahoma': 'OK',
'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI', 'South Carolina': 'SC', 'South Dakota': 'SD',
'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT', 'Virginia': 'VA', 'Vermont': 'VT', 'Washington': 'WA',
'Wisconsin': 'WI', 'West Virginia': 'WV', 'Wyoming': 'WY'}

us_state_pop = pd.read_csv('./data/US State Populations (1970-2016).csv')
us_state_pop = us_state_pop.drop('Unnamed: 0', axis=1)
us_state_pop = us_state_pop.replace(to_replace=state_dict)
us_state_pop = us_state_pop.set_index(['State', 'Year'])
us_state_pop = us_state_pop.sort_index()
us_state_pop = us_state_pop.reset_index()

In [None]:
us_state_pop.head()

## Calculating per capita consumption

The following calculates per capital consumption for each indicator by dividing the reading for a given state in a given year by the population for that year.

In [None]:
def per_capita_phy(MSN) :
    # CLRCP = coal consumed by the residential sector - thousand short tons
    df = consumption_phy.set_index('MSN')
    df = df.loc[MSN]
    df = df.reset_index()
    df = df.set_index(['State', 'Year'])
    df = df.sort_index()
    df = df.reset_index()
    df['Population'] = us_state_pop['Population']
    df['Per capita consumption'] = df['Reading']/df['Population']
    df = df[['State', 'Year', 'Population', 'MSN', 'Reading', 'Per capita consumption']]
    df = df.set_index(['State', 'Year'])
    return df

# CLRCP = coal consumed by the residential sector - thousand short tons
CLRCP = per_capita_phy('CLRCP')

# DFRCP = distillate fuel oil consumed by the residential sector - thousand barrels
DFRCP = per_capita_phy('DFRCP')

# ESRCP = electrivity consumed by the residential sector (ie sold to) - million kilowatthours
ESRCP = per_capita_phy('ESRCP')

# KSRCP = kerosene consumed by the residential sector - thousand barrels
ESRCP = per_capita_phy('ESRCP')

# LGRCP = LPG consumed by the residential sector - thousand barrels
LGRCP = per_capita_phy('LGRCP')

# NGRCP = natural gas consumed by the residential sector (including supplemental gaseous fuel) - million cubic feet
NGRCP = per_capita_phy('NGRCP')

# PARCP = all petroleum products consumed by the residential sector - thousand barrels
PARCP = per_capita_phy('PARCP')

# WDRCP = wood consumed by the residential sector - thousand cords
WDRCP = per_capita_phy('WDRCP')

# Look at the following only if necessary.

# HYTCP = Hydroelectricity, total net generation - million kilowatthours
HYTCP = per_capita_phy('HYTCP')

You can look up data for each source of energy by loading up the appropriate dataframe (the name is the same as the MSN code).

In [None]:
HYTCP.head()

In [None]:
def per_capita_btu(MSN) :
    df = consumption_btu.set_index('MSN')
    df = df.loc[MSN]
    df = df.reset_index()
    df = df.set_index(['State', 'Year'])
    df = df.sort_index()
    df = df.reset_index()
    df['Population'] = us_state_pop['Population']
    df['Per capita consumption'] = df['Reading']/df['Population']
    df = df[['State', 'Year', 'Population', 'MSN', 'Reading', 'Per capita consumption']]
    df = df.set_index(['State', 'Year'])
    return df

# GERCB = Geothermal energy consumed by the residential sector.

GERCB = per_capita_btu('GERCB')

# LORCB = The residential sector's share of electrical system energy losses.
LORCB = per_capita_btu('LORCB')

# SFRCB = Supplemental gaseous fuels consumed by the residential sector.
SFRCB = per_capita_btu('SFRCB') 

# SORCB = Solar energy consumed by the residential sector.
SORCB = per_capita_btu('SORCB') 

# TERCB = Total energy consumed by the residential sector.
TERCB = per_capita_btu('TERCB') 

# TERPB = Total energy consumption per capita in the residential sector.
TERPB = per_capita_btu('TERPB') 

# TNRCB = Total energy consumed by the residential sector excluding the sector's share of
# electrical system energy losses.
TNRCB = per_capita_btu('TNRCB') 

# Look at the following only if necessary.

# FFTCB = Fossil fuels, total consumption.
FFTCB = per_capita_btu('FFTCB') 

# HYTXB = Hydropower, total end-use consumption.
HYTXB = per_capita_btu('HYTXB') 

# NUETB = Nuclear energy consumed for electricity generation, total.
NUETB = per_capita_btu('NUETB') 

# RETCB = Renewable energy total consumption.
RETCB = per_capita_btu('RETCB') 

# TETCB = Total energy consumption.
TETCB = per_capita_btu('TETCB') 

# WYEGB = Wind energy consumed for electricity generation by the electric power sector.
WYEGB = per_capita_btu('WYEGB') 

# WYTXB = Wind energy, total end-use consumption.
WYTXB = per_capita_btu('WYTXB') 

You can look up data for each source of energy by loading up the appropriate dataframe (the name is the same as the MSN code).

In [None]:
WYTXB.head()