In [166]:
import pandas as pd
import requests
from config import energy_api_key
from pprint import pprint
import numpy as np
import time

In [4]:
fuel_types_url = f"http://api.eia.gov/category/?api_key={energy_api_key}&category_id=3"

In [6]:
response = requests.get(fuel_types_url)

In [15]:
energy_category_objects = response.json()['category']['childcategories']
energy_category_objects

[{'category_id': 4, 'name': 'Coal'},
 {'category_id': 7, 'name': 'Petroleum liquids'},
 {'category_id': 8, 'name': 'Petroleum coke'},
 {'category_id': 9, 'name': 'Natural gas'},
 {'category_id': 10, 'name': 'Other gases'},
 {'category_id': 11, 'name': 'Nuclear'},
 {'category_id': 12, 'name': 'Conventional hydroelectric'},
 {'category_id': 13, 'name': 'Other renewables (total)'},
 {'category_id': 14, 'name': 'Wind'},
 {'category_id': 15, 'name': 'All utility-scale solar'},
 {'category_id': 16, 'name': 'Wood and wood-derived fuels'},
 {'category_id': 17, 'name': 'Geothermal'},
 {'category_id': 18, 'name': 'Other biomass'},
 {'category_id': 19, 'name': 'Hydro-electric pumped storage'},
 {'category_id': 20, 'name': 'Other'},
 {'category_id': 1718400, 'name': 'Utility-scale photovoltaic'},
 {'category_id': 1718401, 'name': 'Utility-scale thermal'},
 {'category_id': 1718408, 'name': 'All solar'},
 {'category_id': 1718409, 'name': 'Small-scale solar photovoltaic'},
 {'category_id': 1736519, '

In [155]:
fuel_types = [ energy_category_object['name']\
                     for energy_category_object \
                     in energy_category_objects
                    ]

In [157]:
def getMonthlyGenerationData(fuel_type):
    category_id = [ energy_category['category_id'] \
                   for energy_category \
                   in energy_category_objects\
                   if energy_category['name'] == fuel_type\
                  ][0]
    wind_data_url = f"http://api.eia.gov/category/?api_key={energy_api_key}&category_id={category_id}"
    generation_data = requests.get(wind_data_url).json()['category']['childseries']
    monthly_generation_data = [region_monthly_data \
                               for region_monthly_data in generation_data \
                               if region_monthly_data['f'] == 'M'\
                              ]
    return monthly_generation_data


In [158]:
all_monthly_generation_data = [getMonthlyGenerationData(fuel_type)\
                              for fuel_type\
                              in fuel_types]
all_monthly_generation_data

[[{'series_id': 'ELEC.GEN.COW-AK-99.M',
   'name': 'Net generation : coal : Alaska : all sectors : monthly',
   'f': 'M',
   'units': 'thousand megawatthours',
   'updated': '01-MAR-19 12.40.06 AM'},
  {'series_id': 'ELEC.GEN.COW-AL-99.M',
   'name': 'Net generation : coal : Alabama : all sectors : monthly',
   'f': 'M',
   'units': 'thousand megawatthours',
   'updated': '01-MAR-19 12.40.06 AM'},
  {'series_id': 'ELEC.GEN.COW-AR-99.M',
   'name': 'Net generation : coal : Arkansas : all sectors : monthly',
   'f': 'M',
   'units': 'thousand megawatthours',
   'updated': '01-MAR-19 12.40.06 AM'},
  {'series_id': 'ELEC.GEN.COW-AZ-99.M',
   'name': 'Net generation : coal : Arizona : all sectors : monthly',
   'f': 'M',
   'units': 'thousand megawatthours',
   'updated': '01-MAR-19 12.40.06 AM'},
  {'series_id': 'ELEC.GEN.COW-CA-99.M',
   'name': 'Net generation : coal : California : all sectors : monthly',
   'f': 'M',
   'units': 'thousand megawatthours',
   'updated': '01-MAR-19 12.40.0

In [151]:
monthly_wind_generation_series = getMonthlyGenerationData("Wind")

In [35]:
def regionFromSeriesName(series_name):
    return series_name.split(' : ')[2]

def getRegionToSeriesID(region_data):
    return {regionFromSeriesName(region_datum['name']): region_datum['series_id']\
     for region_datum in region_data\
    }

In [165]:
fuels_region_to_series_id = [\
 getRegionToSeriesID(fuel_type_monthly_data) \
 for fuel_type_monthly_data \
 in all_monthly_generation_data\
]

In [150]:
getRegionToSeriesID(monthly_wind_generation_series)

{'Alaska': 'ELEC.GEN.WND-AK-99.M',
 'Arizona': 'ELEC.GEN.WND-AZ-99.M',
 'California': 'ELEC.GEN.WND-CA-99.M',
 'Colorado': 'ELEC.GEN.WND-CO-99.M',
 'Connecticut': 'ELEC.GEN.WND-CT-99.M',
 'Delaware': 'ELEC.GEN.WND-DE-99.M',
 'East North Central (total)': 'ELEC.GEN.WND-ENC-99.M',
 'East South Central (total)': 'ELEC.GEN.WND-ESC-99.M',
 'Hawaii': 'ELEC.GEN.WND-HI-99.M',
 'Iowa': 'ELEC.GEN.WND-IA-99.M',
 'Idaho': 'ELEC.GEN.WND-ID-99.M',
 'Illinois': 'ELEC.GEN.WND-IL-99.M',
 'Indiana': 'ELEC.GEN.WND-IN-99.M',
 'Kansas': 'ELEC.GEN.WND-KS-99.M',
 'Massachusetts': 'ELEC.GEN.WND-MA-99.M',
 'Middle Atlantic (total)': 'ELEC.GEN.WND-MAT-99.M',
 'Maryland': 'ELEC.GEN.WND-MD-99.M',
 'Maine': 'ELEC.GEN.WND-ME-99.M',
 'Michigan': 'ELEC.GEN.WND-MI-99.M',
 'Minnesota': 'ELEC.GEN.WND-MN-99.M',
 'Missouri': 'ELEC.GEN.WND-MO-99.M',
 'Montana': 'ELEC.GEN.WND-MT-99.M',
 'Mountain (total)': 'ELEC.GEN.WND-MTN-99.M',
 'North Carolina': 'ELEC.GEN.WND-NC-99.M',
 'North Dakota': 'ELEC.GEN.WND-ND-99.M',
 'Nebraska

In [122]:
# This returns series_data given a series ID
# Series_data is a list of dictionaries, each dict has 2 values, the date and then the energy production
def getSeriesData(series_id):
    url = f"http://api.eia.gov/series/?api_key={energy_api_key}&series_id={series_id}"
    response = requests.get(alaska_wind_series_url)
    return [{"date": date, "energy_production": energy_production} \
            for (date, energy_production) in response.json()['series'][0]['data']\
           ]

In [129]:
# region to series id is a dictionary where keys are regions and values are series IDs
# output a dictionary with region key and series_data key
def getRegionSeriesData(region_to_series_id):
    region_series_data = [ {"region": region, "series_data": getSeriesData(series_id)} \
     for (region, series_id) in region_to_series_id.items()]
    return region_series_data

In [170]:
fuels_region_series_data = []
for fuel_region_to_series_id in fuels_region_to_series_id:
    fuel_region_series_data = getRegionSeriesData(fuel_region_to_series_id)
    fuels_region_series_data.append(fuel_region_series_data)
    first_region_id = next(iter(fuel_region_to_series_id.values()))
    print(f"Finished {first_region_id}")
    time.sleep(5)


# fuels_region_series_data = [getRegionSeriesData(fuel_region_to_series_id)\
#  for fuel_region_to_series_id\
#  in fuels_region_to_series_id\
# ]
fuels_region_series_data[8]

Finished ELEC.GEN.COW-AK-99.M
Finished ELEC.GEN.PEL-AK-99.M
Finished ELEC.GEN.PC-AR-99.M
Finished ELEC.GEN.NG-AK-99.M
Finished ELEC.GEN.OOG-AK-99.M
Finished ELEC.GEN.NUC-AL-99.M
Finished ELEC.GEN.HYC-AK-99.M
Finished ELEC.GEN.AOR-AK-99.M
Finished ELEC.GEN.WND-AK-99.M
Finished ELEC.GEN.SUN-AL-99.M
Finished ELEC.GEN.WWW-AK-99.M
Finished ELEC.GEN.GEO-CA-99.M
Finished ELEC.GEN.WAS-AK-99.M
Finished ELEC.GEN.HPS-AR-99.M
Finished ELEC.GEN.OTH-AK-99.M
Finished ELEC.GEN.SPV-AL-99.M
Finished ELEC.GEN.STH-AZ-99.M
Finished ELEC.GEN.TSN-AK-99.M
Finished ELEC.GEN.DPV-AK-99.M
Finished ELEC.GEN.ALL-AK-99.M


[{'region': 'Alaska',
  'series_data': [{'date': '201812', 'energy_production': 11.84345},
   {'date': '201811', 'energy_production': 11.98104},
   {'date': '201810', 'energy_production': 10.25316},
   {'date': '201809', 'energy_production': 9.39741},
   {'date': '201808', 'energy_production': None},
   {'date': '201807', 'energy_production': None},
   {'date': '201806', 'energy_production': None},
   {'date': '201805', 'energy_production': 12.1464},
   {'date': '201804', 'energy_production': 14.31442},
   {'date': '201803', 'energy_production': 14.75622},
   {'date': '201802', 'energy_production': 15.2678},
   {'date': '201801', 'energy_production': 14.51298},
   {'date': '201712', 'energy_production': 13.49192},
   {'date': '201711', 'energy_production': 13.26932},
   {'date': '201710', 'energy_production': 14.00132},
   {'date': '201709', 'energy_production': 10.11247},
   {'date': '201708', 'energy_production': 7.35166},
   {'date': '201707', 'energy_production': 9.00838},
   {'dat

In [130]:
wind_region_series_data = getRegionSeriesData(region_to_monthly_wind_urls)
wind_region_series_data[0]

{'region': 'Alaska',
 'series_data': [{'date': '201812', 'energy_production': 11.84345},
  {'date': '201811', 'energy_production': 11.98104},
  {'date': '201810', 'energy_production': 10.25316},
  {'date': '201809', 'energy_production': 9.39741},
  {'date': '201808', 'energy_production': None},
  {'date': '201807', 'energy_production': None},
  {'date': '201806', 'energy_production': None},
  {'date': '201805', 'energy_production': 12.1464},
  {'date': '201804', 'energy_production': 14.31442},
  {'date': '201803', 'energy_production': 14.75622},
  {'date': '201802', 'energy_production': 15.2678},
  {'date': '201801', 'energy_production': 14.51298},
  {'date': '201712', 'energy_production': 13.49192},
  {'date': '201711', 'energy_production': 13.26932},
  {'date': '201710', 'energy_production': 14.00132},
  {'date': '201709', 'energy_production': 10.11247},
  {'date': '201708', 'energy_production': 7.35166},
  {'date': '201707', 'energy_production': 9.00838},
  {'date': '201706', 'energ

In [131]:
# Series_data is a list of dictionaries, each dict has 2 values, the date and then the energy production
# Outputs a list of dictionaries with region, date, energy production, & fuel type
def formatedSeriesData(region, fuel_type, series_data):
    return [{"region": region, \
             "date": month_data['date'], \
             "energy_production": month_data['energy_production'], \
             "fuel_type": fuel_type
            } \
            for month_data \
            in series_data
           ]

In [141]:
def formatedRegionSeriesData(region_series_data, fuel_type):
    formatted_region_series_data = [\
             formatedSeriesData(region_series['region'], fuel_type, region_series['series_data']) \
             for region_series in region_series_data\
            ]
    formatted_region_series_data = np.ravel(formatted_region_series_data)
    formatted_region_series_data = [data for data in formatted_region_series_data]
    return formatted_region_series_data
    

In [178]:
formatted_fuels_region_series_data = [formatedRegionSeriesData(fuel_region_series_data, energy_category_objects[index]['name'])\
                                      for (index, fuel_region_series_data)\
                                      in enumerate(fuels_region_series_data)]
formatted_fuels_region_series_data[8][:5]


[{'region': 'Alaska',
  'date': '201812',
  'energy_production': 11.84345,
  'fuel_type': 'Wind'},
 {'region': 'Alaska',
  'date': '201811',
  'energy_production': 11.98104,
  'fuel_type': 'Wind'},
 {'region': 'Alaska',
  'date': '201810',
  'energy_production': 10.25316,
  'fuel_type': 'Wind'},
 {'region': 'Alaska',
  'date': '201809',
  'energy_production': 9.39741,
  'fuel_type': 'Wind'},
 {'region': 'Alaska',
  'date': '201808',
  'energy_production': None,
  'fuel_type': 'Wind'}]

In [142]:
formatted_wind_region_series_data = formatedRegionSeriesData(wind_region_series_data, "wind")
formatted_wind_region_series_data[:5]


[{'region': 'Alaska',
  'date': '201812',
  'energy_production': 11.84345,
  'fuel_type': 'wind'},
 {'region': 'Alaska',
  'date': '201811',
  'energy_production': 11.98104,
  'fuel_type': 'wind'},
 {'region': 'Alaska',
  'date': '201810',
  'energy_production': 10.25316,
  'fuel_type': 'wind'},
 {'region': 'Alaska',
  'date': '201809',
  'energy_production': 9.39741,
  'fuel_type': 'wind'},
 {'region': 'Alaska',
  'date': '201808',
  'energy_production': None,
  'fuel_type': 'wind'}]

In [182]:
formatted_fuels_region_series_data = sum(formatted_fuels_region_series_data, [])

In [186]:
all_fuels_region_production = pd.DataFrame(formatted_fuels_region_series_data)
all_fuels_region_production.tail()

Unnamed: 0,date,energy_production,fuel_type,region
191995,200105,0.069,All fuels,Wyoming
191996,200104,0.071,All fuels,Wyoming
191997,200103,0.081,All fuels,Wyoming
191998,200102,0.074,All fuels,Wyoming
191999,200101,0.087,All fuels,Wyoming


In [187]:
all_fuels_region_production.to_csv('fuel_region_production.csv')
