# US Energy Analysis

## Import libraries

In [1]:
# Import dependencies
import requests
import json
from pprint import pprint
import pandas as pd

from config import api_key

import os

In [5]:
# Import state coordinates
state_path = os.path.join("static", "data", "state_coordinates", "statelatlong.csv")
EIA_state = pd.read_csv(state_path)
EIA_state.head()

Unnamed: 0,State,Latitude,Longitude,City
0,US,37.0902,-95.7129,United States
1,AL,32.601011,-86.680736,Alabama
2,AK,61.302501,-158.77502,Alaska
3,AZ,34.168219,-111.930907,Arizona
4,AR,34.751928,-92.131378,Arkansas


## Total Energy average price :
https://www.eia.gov/opendata/qb.php?category=40244

In [3]:
#Create an empty DataFrame to store each state price
final_price_state = pd.DataFrame()
final_price_state["State"] = ""
final_price_state["Year"] = ""
final_price_state["Average_Price"] = ""
final_price_state["Price_Unit"] = ""

#Using for loop to get data from each url
for i in range(len(EIA_state["State"].values)):
    
    #Make url of average price for each state
    Price_first_link = "http://api.eia.gov/series/?api_key="    
    API_KEY = api_key
    Price_Middle_link = "&series_id=SEDS.TETCD."
    state = EIA_state["State"].values[i]
    Price_last_link = ".A"
    price_url = Price_first_link+API_KEY+Price_Middle_link+state+Price_last_link
    
    #Get the data from url
    price_state = requests.get(price_url).json()
    
    #Create a temp. dataframe to store data of each state
    temp_price_state = pd.DataFrame()
    temp_price_state["State"] = ""
    temp_price_state["Year"] = ""
    temp_price_state["Average_Price"] = ""
    temp_price_state["Price_Unit"] = ""
    
    for  j in range(len(price_state["series"][0]["data"])) :
        temp_price_state.loc[j, "State"] = EIA_state["State"].values[i]
        temp_price_state.loc[j, "Year"] = price_state["series"][0]["data"][j][0]
        temp_price_state.loc[j, "Average_Price"] = price_state["series"][0]["data"][j][1]
        temp_price_state.loc[j, "Price_Unit"] = price_state["series"][0]["units"]
    
    #Concat each state into one dataframe
    final_price_state = pd.concat([final_price_state,temp_price_state], axis=0)
    final_price_state.reset_index(drop=True,inplace=True)
    

In [4]:
final_price_state.head()

Unnamed: 0,State,Year,Average_Price,Price_Unit
0,US,2017,17.3,Dollars per million Btu
1,US,2016,15.94,Dollars per million Btu
2,US,2015,17.3,Dollars per million Btu
3,US,2014,21.33,Dollars per million Btu
4,US,2013,21.42,Dollars per million Btu


In [5]:
# Export average price data to csv file
price_path = os.path.join("static", "data", "EIA_Project_Data", "energy_average_price.csv")
final_price_state.to_csv(price_path,index=False)

## Total Energy production
https://www.eia.gov/opendata/qb.php?category=40207

In [6]:
#Create an empty DataFrame to store each state total energy production
final_energy_state = pd.DataFrame()
final_energy_state["State"] = ""
final_energy_state["Year"] = ""
final_energy_state["Total_energy"] = ""
final_energy_state["ENERGY_Unit"] = ""

#Using for loop to get data from each url
for i in range(len(EIA_state["State"].values)):
    
    #Make url of total energy for each state
    energy_first_link = "http://api.eia.gov/series/?api_key="    
    API_KEY = api_key
    energy_Middle_link = "&series_id=SEDS.TEPRB."
    state = EIA_state["State"].values[i]
    energy_last_link = ".A"
    energy_url = energy_first_link+API_KEY+energy_Middle_link+state+energy_last_link
    
    #Get the data from url
    energy_state = requests.get(energy_url).json()
    
    #Create a temp. dataframe to store data of each state
    temp_energy_state = pd.DataFrame()
    temp_energy_state["State"] = ""
    temp_energy_state["Year"] = ""
    temp_energy_state["Total_energy"] = ""
    temp_energy_state["ENERGY_Unit"] = ""
    
    for  j in range(len(energy_state["series"][0]["data"])) :
        temp_energy_state.loc[j, "State"] = EIA_state["State"].values[i]
        temp_energy_state.loc[j, "Year"] = energy_state["series"][0]["data"][j][0]
        temp_energy_state.loc[j, "Total_energy"] = energy_state["series"][0]["data"][j][1]
        temp_energy_state.loc[j, "ENERGY_Unit"] = energy_state["series"][0]["units"]
    
    #Concat each state into one dataframe
    final_energy_state = pd.concat([final_energy_state,temp_energy_state], axis=0)
    final_energy_state.reset_index(drop=True,inplace=True)
    

In [7]:
final_energy_state.head()

Unnamed: 0,State,Year,Total_energy,ENERGY_Unit
0,US,2016,84247690,Billion Btu
1,US,2015,88197690,Billion Btu
2,US,2014,87613590,Billion Btu
3,US,2013,81705691,Billion Btu
4,US,2012,79131447,Billion Btu


In [8]:
# Export energy production data to csv file
energy_path = os.path.join("static", "data", "EIA_Project_Data", "total_energy_production.csv")
final_energy_state.to_csv(energy_path,index=False)

## Renewable Energy production
https://www.eia.gov/opendata/qb.php?category=40398

In [9]:
#Create an empty DataFrame to store each state renewable energy production
final_renewable_state = pd.DataFrame()
final_renewable_state["State"] = ""
final_renewable_state["Year"] = ""
final_renewable_state["Total_renewable_energy"] = ""
final_renewable_state["Renew_Unit"] = ""

#Using for loop to get data from each url
for i in range(len(EIA_state["State"].values)):
    
    #Make url of total renewable energy for each state
    renewable_first_link = "http://api.eia.gov/series/?api_key="    
    API_KEY = api_key
    renewable_Middle_link = "&series_id=SEDS.ROPRB."
    state = EIA_state["State"].values[i]
    renewable_last_link = ".A"
    renewable_url = renewable_first_link+API_KEY+renewable_Middle_link+state+renewable_last_link
    
    #Get the data from url
    renewable_state = requests.get(renewable_url).json()
    
    #Create a temp. dataframe to store data of each state
    temp_renewable_state = pd.DataFrame()
    temp_renewable_state["State"] = ""
    temp_renewable_state["Year"] = ""
    temp_renewable_state["Total_renewable_energy"] = ""
    temp_renewable_state["Renew_Unit"] = ""
    
    for  j in range(len(renewable_state["series"][0]["data"])) :
        temp_renewable_state.loc[j, "State"] = EIA_state["State"].values[i]
        temp_renewable_state.loc[j, "Year"] = renewable_state["series"][0]["data"][j][0]
        temp_renewable_state.loc[j, "Total_renewable_energy"] = renewable_state["series"][0]["data"][j][1]
        temp_renewable_state.loc[j, "Renew_Unit"] = renewable_state["series"][0]["units"]
    
    #Concat each state into one dataframe
    final_renewable_state = pd.concat([final_renewable_state,temp_renewable_state], axis=0)
    final_renewable_state.reset_index(drop=True,inplace=True)
    

In [10]:
final_renewable_state.head()

Unnamed: 0,State,Year,Total_renewable_energy,Renew_Unit
0,US,2016,8053314,Billion Btu
1,US,2015,7488974,Billion Btu
2,US,2014,7670927,Billion Btu
3,US,2013,7445307,Billion Btu
4,US,2012,6936925,Billion Btu


In [11]:
# Export renewable energy production data to csv file
renewable_path = os.path.join("static", "data", "EIA_Project_Data", "renewable_energy_production.csv")
final_renewable_state.to_csv(renewable_path,index=False)

## Resident population:
https://www.eia.gov/opendata/qb.php?category=40367

In [12]:
#Create an empty DataFrame to store each state resident population
final_pop_state = pd.DataFrame()
final_pop_state["State"] = ""
final_pop_state["Year"] = ""
final_pop_state["resident_population"] = ""
final_pop_state["Pop_Unit"] = ""

#Using for loop to get data from each url
for i in range(len(EIA_state["State"].values)):
    
    #Make url of resident popluation for each state
    pop_first_link = "http://api.eia.gov/series/?api_key="    
    API_KEY = api_key
    pop_Middle_link = "&series_id=SEDS.TPOPP."
    state = EIA_state["State"].values[i]
    pop_last_link = ".A"
    pop_url = pop_first_link+API_KEY+pop_Middle_link+state+pop_last_link
    
    #Get the data from url
    pop_state = requests.get(pop_url).json()
    
    #Create a temp. dataframe to store data of each state
    temp_pop_state = pd.DataFrame()
    temp_pop_state["State"] = ""
    temp_pop_state["Year"] = ""
    temp_pop_state["resident_population"] = ""
    temp_pop_state["Pop_Unit"] = ""
    
    for  j in range(len(pop_state["series"][0]["data"])) :
        temp_pop_state.loc[j, "State"] = EIA_state["State"].values[i]
        temp_pop_state.loc[j, "Year"] = pop_state["series"][0]["data"][j][0]
        temp_pop_state.loc[j, "resident_population"] = pop_state["series"][0]["data"][j][1]
        temp_pop_state.loc[j, "Pop_Unit"] = pop_state["series"][0]["units"]
    
    #Concat each state into one dataframe
    final_pop_state = pd.concat([final_pop_state,temp_pop_state], axis=0)
    final_pop_state.reset_index(drop=True,inplace=True)
    

In [13]:
final_pop_state.head()

Unnamed: 0,State,Year,resident_population,Pop_Unit
0,US,2017,325147,Thousand
1,US,2016,323071,Thousand
2,US,2015,320743,Thousand
3,US,2014,318386,Thousand
4,US,2013,316058,Thousand


In [14]:
# Export resident population data to csv file
population_path = os.path.join("static", "data", "EIA_Project_Data", "resident_population.csv")
final_pop_state.to_csv(population_path,index=False)

## Total CO2 Emission
https://www.eia.gov/opendata/qb.php?category=2251670

In [15]:
#Create an empty DataFrame to store each state resident population
final_co2_state = pd.DataFrame()
final_co2_state["State"] = ""
final_co2_state["Year"] = ""
final_co2_state["Total_co2_emission"] = ""
final_co2_state["CO2_Unit"] = ""

#Using for loop to get data from each url
for i in range(len(EIA_state["State"].values)):
    
    #Make url of total CO2 emission for each state
    co2_first_link = "http://api.eia.gov/series/?api_key="    
    API_KEY = api_key
    co2_Middle_link = "&series_id=EMISS.CO2-TOTV-TT-TO-"
    state = EIA_state["State"].values[i]
    co2_last_link = ".A"
    co2_url = co2_first_link+API_KEY+co2_Middle_link+state+co2_last_link
    
    #Get the data from url
    co2_state = requests.get(co2_url).json()
    
    #Create a temp. dataframe to store data of each state
    temp_co2_state = pd.DataFrame()
    temp_co2_state["State"] = ""
    temp_co2_state["Year"] = ""
    temp_co2_state["Total_co2_emission"] = ""
    temp_co2_state["CO2_Unit"] = ""
    
    for  j in range(len(co2_state["series"][0]["data"])) :
        temp_co2_state.loc[j, "State"] = EIA_state["State"].values[i]
        temp_co2_state.loc[j, "Year"] = co2_state["series"][0]["data"][j][0]
        temp_co2_state.loc[j, "Total_co2_emission"] = co2_state["series"][0]["data"][j][1]
        temp_co2_state.loc[j, "CO2_Unit"] = co2_state["series"][0]["units"]
    
    #Concat each state into one dataframe
    final_co2_state = pd.concat([final_co2_state,temp_co2_state], axis=0)
    final_co2_state.reset_index(drop=True,inplace=True)

In [16]:
final_co2_state.head()

Unnamed: 0,State,Year,Total_co2_emission,CO2_Unit
0,US,2016,5160.99,million metric tons CO2
1,US,2015,5207.39,million metric tons CO2
2,US,2014,5350.27,million metric tons CO2
3,US,2013,5301.45,million metric tons CO2
4,US,2012,5162.56,million metric tons CO2


In [17]:
# Export co2 emission data to csv file
co2_path = os.path.join("static", "data", "EIA_Project_Data", "total_co2_emission.csv")
final_co2_state.to_csv(co2_path,index=False)

## Combine all of data into one table for 20 years (2016-1997)

In [18]:
# Import data
final_co2_state = pd.read_csv(co2_path)
final_price_state = pd.read_csv(price_path)
final_pop_state = pd.read_csv(population_path)
final_energy_state = pd.read_csv(energy_path)
final_renewable_state = pd.read_csv(renewable_path)

In [19]:
# Select 20 years (2016-1997)
final_co2_state_20 = final_co2_state.loc[(final_co2_state.Year<=2016) & (final_co2_state.Year>=1997), :]
final_price_state_20 = final_price_state.loc[(final_price_state.Year<=2016) & (final_price_state.Year>=1997), :]
final_pop_state_20 = final_pop_state.loc[(final_pop_state.Year<=2016) & (final_pop_state.Year>=1997), :]
final_energy_state_20 = final_energy_state.loc[(final_energy_state.Year<=2016) & (final_energy_state.Year>=1997), :]
final_renewable_state_20 = final_renewable_state.loc[(final_renewable_state.Year<=2016) & (final_renewable_state.Year>=1997), :]


In [20]:
# Merge all of dataset into one table
df1 = pd.merge(final_co2_state_20,final_price_state_20)
df2 = pd.merge(df1,final_pop_state_20)
df3 = pd.merge(df2,final_energy_state_20)
final_combine_table = pd.merge(df3,final_renewable_state_20)
final_combine_table.head()

Unnamed: 0,State,Year,Total_co2_emission,CO2_Unit,Average_Price,Price_Unit,resident_population,Pop_Unit,Total_energy,ENERGY_Unit,Total_renewable_energy,Renew_Unit
0,US,2016,5160.99085,million metric tons CO2,15.94,Dollars per million Btu,323071,Thousand,84247690,Billion Btu,8053314,Billion Btu
1,US,2015,5207.386647,million metric tons CO2,17.3,Dollars per million Btu,320743,Thousand,88197690,Billion Btu,7488974,Billion Btu
2,US,2014,5350.26544,million metric tons CO2,21.33,Dollars per million Btu,318386,Thousand,87613590,Billion Btu,7670927,Billion Btu
3,US,2013,5301.451176,million metric tons CO2,21.42,Dollars per million Btu,316058,Thousand,81705691,Billion Btu,7445307,Billion Btu
4,US,2012,5162.563034,million metric tons CO2,21.83,Dollars per million Btu,313874,Thousand,79131447,Billion Btu,6936925,Billion Btu


In [21]:
# Export combined data to csv
final_path = os.path.join("static", "data", "EIA_Project_Data", "final_combine_table.csv")
final_combine_table.to_csv(final_path,index=False)

## Combine state geojson with 2016 data

In [3]:
final_combine_table = pd.read_csv(final_path)
final_combine_table.head()

Unnamed: 0,State,Year,Total_co2_emission,CO2_Unit,Average_Price,Price_Unit,resident_population,Pop_Unit,Total_energy,ENERGY_Unit,Total_renewable_energy,Renew_Unit
0,US,2016,5160.99085,million metric tons CO2,15.94,Dollars per million Btu,323071,Thousand,84247690,Billion Btu,8053314,Billion Btu
1,US,2015,5207.386647,million metric tons CO2,17.3,Dollars per million Btu,320743,Thousand,88197690,Billion Btu,7488974,Billion Btu
2,US,2014,5350.26544,million metric tons CO2,21.33,Dollars per million Btu,318386,Thousand,87613590,Billion Btu,7670927,Billion Btu
3,US,2013,5301.451176,million metric tons CO2,21.42,Dollars per million Btu,316058,Thousand,81705691,Billion Btu,7445307,Billion Btu
4,US,2012,5162.563034,million metric tons CO2,21.83,Dollars per million Btu,313874,Thousand,79131447,Billion Btu,6936925,Billion Btu


In [4]:
final_2016 = final_combine_table.loc[(final_combine_table.Year == 2016), :]
final_2016.head()

Unnamed: 0,State,Year,Total_co2_emission,CO2_Unit,Average_Price,Price_Unit,resident_population,Pop_Unit,Total_energy,ENERGY_Unit,Total_renewable_energy,Renew_Unit
0,US,2016,5160.99085,million metric tons CO2,15.94,Dollars per million Btu,323071,Thousand,84247690,Billion Btu,8053314,Billion Btu
20,AL,2016,115.088298,million metric tons CO2,14.81,Dollars per million Btu,4865,Thousand,1124482,Billion Btu,235494,Billion Btu
40,AK,2016,34.909446,million metric tons CO2,16.63,Dollars per million Btu,742,Thousand,1433116,Billion Btu,19428,Billion Btu
60,AZ,2016,87.00595,million metric tons CO2,20.65,Dollars per million Btu,6945,Thousand,593132,Billion Btu,130858,Billion Btu
80,AR,2016,62.410797,million metric tons CO2,14.6,Dollars per million Btu,2990,Thousand,1130117,Billion Btu,117959,Billion Btu


In [6]:
final_2016_state = final_2016.merge(EIA_state) # To get whole state name
final_2016_state.head()

Unnamed: 0,State,Year,Total_co2_emission,CO2_Unit,Average_Price,Price_Unit,resident_population,Pop_Unit,Total_energy,ENERGY_Unit,Total_renewable_energy,Renew_Unit,Latitude,Longitude,City
0,US,2016,5160.99085,million metric tons CO2,15.94,Dollars per million Btu,323071,Thousand,84247690,Billion Btu,8053314,Billion Btu,37.0902,-95.7129,United States
1,AL,2016,115.088298,million metric tons CO2,14.81,Dollars per million Btu,4865,Thousand,1124482,Billion Btu,235494,Billion Btu,32.601011,-86.680736,Alabama
2,AK,2016,34.909446,million metric tons CO2,16.63,Dollars per million Btu,742,Thousand,1433116,Billion Btu,19428,Billion Btu,61.302501,-158.77502,Alaska
3,AZ,2016,87.00595,million metric tons CO2,20.65,Dollars per million Btu,6945,Thousand,593132,Billion Btu,130858,Billion Btu,34.168219,-111.930907,Arizona
4,AR,2016,62.410797,million metric tons CO2,14.6,Dollars per million Btu,2990,Thousand,1130117,Billion Btu,117959,Billion Btu,34.751928,-92.131378,Arkansas


In [7]:
url = "https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json" #state geojson
r = requests.get(url).json()

In [8]:
for i in range(len(final_2016_state["City"].values)-1):
    j = i + 1
    if (r["features"][i]["properties"]["name"] == final_2016_state["City"].values[j]):
        
        r["features"][i]["properties"]["Total_co2_emission"] = float(final_2016_state["Total_co2_emission"].values[j])
        r["features"][i]["properties"]["Average_Price"] = float(final_2016_state["Average_Price"].values[j])
        r["features"][i]["properties"]["resident_population"] = float(final_2016_state["resident_population"].values[j])
        r["features"][i]["properties"]["Total_energy"] = float(final_2016_state["Total_energy"].values[j])
        r["features"][i]["properties"]["Total_renewable_energy"] = float(final_2016_state["Total_renewable_energy"].values[j])
            

In [10]:
geojsonpath = os.path.join("static", "data", "EIA_Project_Data", "stateinformation.json")
with open(geojsonpath, 'w') as json_file:  
    json.dump(r, json_file)