In [2]:
import requests
import pandas as pd  

In [3]:
#MI Annual Plant Data to get Plant Codes
api = '' #Register for an API key here https://www.eia.gov/opendata/register.php

#data features to use
data = ['total-consumption','total-consumption-btu','generation','gross-generation',
        'consumption-for-eg','consumption-for-eg-btu','average-heat-content']


def api_url (data=['total-consumption'], fuel=['WH','NG'], states=[], plant=[],
             frequency='monthly', start=[], end=[], primeMover=['ALL'], api=api):
    "Given data parameters, creates an api URL for the EIA"
    
    datas = ''.join([f'data[]={x}&' for x in data])
    fuels = ''.join([f'facets[fuel2002][]={x}&' for x in fuel])
    plants = ''.join([f'facets[plantCode][]={x}&' for x in plant])
    statess = ''.join([f'facets[state][]={x}&' for x in states])
    mover = ''.join([f'facets[primeMover][]={x}&' for x in primeMover])
    freq = f'frequency={frequency}&'
    start = ''.join([f'start={x}&' for x in start])
    end = ''.join([f'end={x}&' for x in end])
    api_key = f'api_key={api}'

    url = f'https://api.eia.gov/v2/electricity/facility-fuel/data?{datas}{fuels}{statess}{mover}{plants}{freq}{start}{end}{api_key}'
    print(url)
    return url

In [4]:
def pd_from_api (data=['total-consumption'], fuel=['WH','NG'], states=[], plants=[],
             frequency='monthly', start=[], end=[], primeMover=['ALL'], api=api):
    """Given data parameters, such as state and time frame, uses api_url to pull data from EIA and create
    a pandas data frame - can be used to find plant codes for a specific state, or to pull data for a plant"""
    if plants==[]:
        url = api_url(data=data, fuel=fuel, states=states,
             frequency=frequency, start=start, end=end, primeMover=primeMover, api=api)
        r = requests.get(url)
        txt = r.json()
        df = pd.DataFrame(txt['response']['data'])
    else:
        for code in plants:
            url = api_url(data=data, fuel=fuel, states=states, plant=[code],
                 frequency=frequency, start=start, end=end, primeMover=primeMover, api=api)
            r = requests.get(url)
            txt = r.json()
            temp_df = pd.DataFrame(txt['response']['data'])
            try:
                df = pd.concat([temp_df, df], ignore_index=True, axis=0)
            except:
                df = temp_df
            
    df['plantCode'] = df['plantCode'].astype(int) #so can merge with location data Plant_Code
    
    return df


In [5]:
def add_location(df, output_name):
    """Input: df with plantCode and an output file name
    Output: df merged with location data for longitude and latitude, and saved as CSV"""
    location_df = pd.read_csv('Power_Plants_in_the_U.S..csv')
    combo_df = pd.merge(df, location_df, how='left', left_on='plantCode', right_on='Plant_Code')
    combo_df.to_csv(output_name, index=False)
    return combo_dff

In [7]:
def pull_state (state, start=['2019-01-01'],end=['2023-12-31'], primeMover=['ALL']):
    """Input: 2-letter state code, start and end dates in a list format
    Output: df of total monthly natural gas consumption data for all plants in a given state for a given data range
        primeMover default is 'ALL' and date range is the last 5 years (2019-2023)"""
    plants = pd_from_api(states=[state],frequency='annual')
    df = pd_from_api(data=data, states=[state],frequency='monthly', plants=plants.plantCode.unique(), 
                     start=start, end=end)
    #df.to_csv(f'{state}_Plant.csv', index=False)
    
    #add in location data and save as 'State_Plant_location.csv'
    df_location = add_location(df, f'{state}_Plant_Location.csv')
    return  print(f'{state} data downloaded and saved') #df_location

In [1]:
#pull_state('MI')

In [None]:
continental_states = ['AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA',
           'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
           'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
           'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
           'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']

In [None]:
#Run to pull data for all states and save each as a seperate csv: 'STATEs_Plant_Location.csv'
for state in continental_states:
    pull_state(state)