In [None]:
import pandas as pd
import numpy  as np
from scipy.stats.mstats import winsorize
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

In [None]:
# Read in hourly dispatch model from https://github.com/tdeetjen/simple_dispatch
dispatch = {}
for nerc in ['FRCC', 'MRO', 'NPCC', 'RFC', 'SERC', 'SPP', 'TRE', 'WECC']:
    dispatch[nerc] = pd.read_csv(
        f'https://raw.githubusercontent.com/tdeetjen/simple_dispatch/master/simple_dispatch_{nerc}_2017_0co2price.csv')
    dispatch[nerc].replace(['gas', 'biomass', 'oil', 'SPP'], ['ng', 'residfuel', 'residfuel', 'MRO'], inplace=True)

In [None]:
# Read in individual power plant data
pwr_plants_raw = pd.read_csv(
    "../../../Data/pwr/Output/powerplant_emissions2019.csv",
    dtype={"FIPS": str, "FIPSTATE": str, "STATEFIPS": str},
).drop(columns="Unnamed: 0")

In [None]:
# Read in price data for different energy products across states, and isolate the prices for products used in power generation
price_data = pd.read_excel('../Temp/EIA_price_data/price_data_clean.xlsx',
                           sheet_name='Data',
                           dtype={'FIPSTATE': str},
                           usecols='A:P'
                           )
price_pivot_keys = pd.read_excel('../Temp/EIA_price_data/price_data_clean.xlsx',
                                 sheet_name='Pivot_keys',
                                 usecols='A:C'
                                 )
price_data = pd.melt(frame=price_data,
                     id_vars=['STATE', 'FIPSTATE'],
                     value_vars=list(price_pivot_keys.col_name),
                     var_name='col_name',
                     value_name='price'
                     )
price_data = pd.merge(price_data, price_pivot_keys, how='left', on='col_name')
pwr_fuel_prices = price_data[price_data.sector == 'pwr'][['STATE', 'fuel', 'price']]

# convert to $/MMBtu, from https://www.eia.gov/totalenergy/data/monthly/index.php#appendices
conv_dict = {'coal': 18.915,  # MMBtu / short ton
             'ng': 1033/1000,  # Btu / cf / 1000 = MMBtu/Mcf
             'residfuel': 6287000*10**-6 / 42,  # Btu/barrel *10^-6/42 = MMbtu/gallon
             }
pwr_fuel_prices['price_perMMBtu'] = pwr_fuel_prices.apply(
    lambda x: x.price / conv_dict[x.fuel], axis=1)

# Derive the heat rate of each power plant in MMBtu/MWh
pes_elec_calc = pwr_plants_raw.copy()
pes_elec_calc["Aggregated Fuel Group"] = pes_elec_calc.apply(
    lambda x: "other" if (x["Aggregated Fuel Group"] == "MSW" or x["Aggregated Fuel Group"] == "GEO") else
    "ng" if x['Aggregated Fuel Group'] == 'GAS' else
    "residfuel" if x['Aggregated Fuel Group'] == 'PET' else
    str.lower(x["Aggregated Fuel Group"]),
    axis=1,
)
pes_elec_calc = pes_elec_calc.rename(columns={'State': 'STATE', 'Aggregated Fuel Group': 'fuel'})
pes_elec_calc = pes_elec_calc[pes_elec_calc['Generation (kWh)'] > 0]

pes_elec_calc['heatrate_MMBtu_perMWh'] = pes_elec_calc['Total Fuel Consumption (MMBtu)'] * \
    1000 / pes_elec_calc['Generation (kWh)']
pes_elec_calc = pes_elec_calc.sort_values(by=['heatrate_MMBtu_perMWh', 'fuel'])

# Merge fuel prices onto PES calculation dataframe
pes_elec_calc = pd.merge(pes_elec_calc,
                         pwr_fuel_prices[['STATE', 'fuel', 'price_perMMBtu']],
                         how='left',
                         on=['STATE', 'fuel'])

# Drop unnecessary columns
pes_elec_calc = pes_elec_calc[['STATE', 'Plant Code', 'NERC Region',
                               'fuel', 'Nameplate Capacity (MW)', 'heatrate_MMBtu_perMWh', 'price_perMMBtu']]

# Calculate the marginal cost of each power plant as the heat rate * the price of fuel
pes_elec_calc['MC_$_perMWh'] = pes_elec_calc.heatrate_MMBtu_perMWh * pes_elec_calc.price_perMMBtu
pes_elec_calc = pes_elec_calc.sort_values(by='MC_$_perMWh')

In [None]:
# Calculate
# Create a dictionary, keyed by NERC region, where each value is another dictionary that will contain dataframes of marginal costs for plants of different fuel types.
# These dataframes are the supply curves of power plants of a given fuel type in a given NERC region
pes_elec_fuel_nerc = {}
for nerc in pes_elec_calc['NERC Region'].unique():
    pes_elec_fuel_nerc[nerc] = {}

# Iterate through NERC regions and fuel types to populate dictionary
for nerc in pes_elec_fuel_nerc.keys():
    for fuel in ['coal', 'ng', 'residfuel']:

        # Isolate plant data for plants in this NERC region
        pes_elec_fuel_nerc[nerc][fuel] = pes_elec_calc[(pes_elec_calc['NERC Region'] == nerc)]

        # Calculate the cumulative capacity of each plant when placed in merit order
        pes_elec_fuel_nerc[nerc][fuel]['cum_cap'] = pes_elec_fuel_nerc[nerc][fuel]['Nameplate Capacity (MW)'].cumsum(
        )

        # Remove power plants where the fuel used is different to the fuel in question in this iteration
        pes_elec_fuel_nerc[nerc][fuel] = pes_elec_fuel_nerc[nerc][fuel][pes_elec_fuel_nerc[nerc][fuel].fuel == fuel] 

        if len(pes_elec_fuel_nerc[nerc][fuel]) > 0:

            # Cut out datapoints in the final 95th percentile cumulative capacity to avoid outliers
            pes_elec_fuel_nerc[nerc][fuel] = pes_elec_fuel_nerc[nerc][fuel][
                pes_elec_fuel_nerc[nerc][fuel]['cum_cap'] < np.percentile(pes_elec_fuel_nerc[nerc][fuel]['cum_cap'], 95)]
        else:
            del pes_elec_fuel_nerc[nerc][fuel]

In [None]:
# Calculate the gradient of the line of best fit for each of the supply curves, then use it
# to calculate the PES.
# Iterate through NERC region and fuel type
for nerc in pes_elec_fuel_nerc.keys():
    for fuel in pes_elec_fuel_nerc[nerc].keys():

        # Drop NaNs
        data = pes_elec_fuel_nerc[nerc][fuel].dropna()

        if len(data) > 0:

            # Perform linear regression
            X = np.array(data.cum_cap).reshape(-1, 1)
            y = np.array(data['MC_$_perMWh'])
            reg = LinearRegression().fit(X, y)

            # Populate a new column with the gradient calculated during the regression
            pes_elec_fuel_nerc[nerc][fuel]['grad'] = reg.coef_[0]
        else:
            pes_elec_fuel_nerc[nerc][fuel]['grad'] = np.nan

        # Calculate PES as PES = dQ/dMC * MC/Q = MC/cumulative capacity * 1/gradient of supply curve
        pes_elec_fuel_nerc[nerc][fuel]['pes'] = pes_elec_fuel_nerc[nerc][fuel]['MC_$_perMWh'] / \
            pes_elec_fuel_nerc[nerc][fuel].cum_cap / pes_elec_fuel_nerc[nerc][fuel].grad
        pes_elec_fuel_nerc[nerc][fuel].replace(np.inf, np.nan, inplace=True)
        pes_elec_fuel_nerc[nerc][fuel] = pes_elec_fuel_nerc[nerc][fuel].dropna()

In [None]:
# Create dictionary to store average PES values for each NERC region
avg_pes = {}

# Iterate through NERC regions across dispatch data to calculate the PES in each hour for each region,
# then use these to calculate the average PES for the year for that region.
for nerc in ['MRO', 'NPCC', 'RFC', 'SERC', 'TRE', 'WECC']:

    # Create a new column in the dispatch data to contain the PES values
    dispatch[nerc]['pes'] = np.zeros(len(dispatch[nerc]))

    # Iterate over every hour in the dispatch data
    for hour in np.arange(len(dispatch[nerc])):
        # Isolate the marginal generator's fuel type
        marg_gen_fuel = dispatch[nerc].loc[hour, 'marg_gen_fuel_type']
        # Isolate the amount of demand in MW
        demand = dispatch[nerc].loc[hour, 'demand']

        # Identify the two PES figures for which the cumulative capacity most closely matches the demand,
        # and take the larger of the two.
        df = pes_elec_fuel_nerc[nerc][marg_gen_fuel].iloc[
            (pes_elec_fuel_nerc[nerc][marg_gen_fuel].cum_cap-demand).abs().argsort()[:2]].pes
        pes = df.iloc[1]
        
        # Set the PES value for that hour to this figure
        dispatch[nerc].loc[hour, 'pes'] = pes

    dispatch[nerc].pes.hist()
    print(nerc)
    plt.show()

    # Average PES values over the year and store in dictionary
    avg_pes[nerc] = dispatch[nerc].pes.mean()
    print(avg_pes[nerc])


In [None]:
# Save dictionary to a JSON
import json

with open("../Temp/avg_pes_elec_byNERC.json", "w") as write_file:
    json.dump(avg_pes, write_file, indent=4)