In [106]:
import pandas as pd
import numpy as np
import plotly.express as px
from datetime import timedelta

In [11]:
from gridemissions.emissions import BaDataEmissionsCalc
from gridemissions.load import BaData
from gridemissions.eia_api import KEYS

In [None]:
FUELS = {"NG":"natural_gas"}

In [46]:

def consumption_emissions(F, P, ID):
    """
    Form and solve linear system to compute consumption emissions

    Parameters
    ----------
    F: np.array
        emissions
    P: np.array
        production
    ID: np.array
        exchanges

    Notes
    -----
    Create linear system to calculate consumption emissions
    - Create import matrix
    - Create linear system and solve:
    f_i^c*(d_i+sum_j t_{ji}) - sum_j t_{ij}*f_j^c = F_i^p
    where:
        f_i^c: consumption emissions at node i
        d_i: demand at node i
        t: trade matrix - t_{ij} is from node i to j
        F_i^p: emissions produced at node i
    Note: np version must be high enough, otherwise np.linalg.cond fails
    on a matrix with only zeros.
    """

    # Create and solve linear system
    Imp = (-ID).clip(min=0)  # trade matrix reports exports - we want imports
    I_tot = Imp.sum(axis=1)  # sum over columns
    A = np.diag(P + I_tot) - Imp
    b = F

    perturbed = []
    if np.linalg.cond(A) > (1.0 / sys.float_info.epsilon):
        # matrix is ill-conditioned
        for i in range(len(A)):
            if (np.abs(A[:, i]).sum() == 0.0) & (np.abs(A[i, :]).sum() == 0.0):
                A[i, i] = 1.0  # slightly perturb that element
                perturbed += [i]
                # force this to be zero so the linear system makes sense
                b[i] = 0.0

    X = np.linalg.solve(A, b)

    for j in perturbed:
        if X[j] != 0.0:
            print(b[j])
            print(np.abs(A[j, :]).sum())
            print(np.abs(A[:, j]).sum())
            raise ValueError("X[%d] is %.2f instead of 0" % (j, X[j]))

    return X, len(perturbed)

def calculate_consumed(row, regions, fuel):
    """
        Calculate consumed generation by fuel type: 
        g_i^c * (p_i + sum_j(t_ij)) - sum_j(t_ij * g_j^c) = G_i^p

        where 
            g_i^c is the consumed generation rate, in MWh_fuel/MWh_total_demand, for region i 
            p_i is the total production in region i 
            t_ij is the total interchange from i to j 
            G_i^p is the produced MWh of the target fuel in region i 
    """
    # P = total generation in region 
    P = row[[KEYS["E"]["NG"] % r for r in regions]].values
    # ID = total pairwise trade 
    ID = np.zeros((len(regions), len(regions)))
    for i, ri in enumerate(regions):
        for j, rj in enumerate(regions):
            if KEYS["E"]["ID"] % (ri, rj) in row.index:
                ID[i][j] = row[KEYS["E"]["ID"] % (ri, rj)]
    # F = generated of fuel type 
    F = [np.nan for ba in regions]
    for (i,ba) in enumerate(regions): 
        key = KEYS["E"][f"SRC_{fuel}"]%(ba)
        if key in row.index:
            F[i] = row[key]
        else: 
            F[i] = 0  
    X = [np.nan for ba in regions]
    X, _ = consumption_emissions(F, P, ID)
    demand = row[[KEYS["E"]["D"] % r for r in regions]].values
    return demand * X



In [9]:
original = BaData("../data/downloads/eia930/chalendar/EBA_adjusted_elec.csv")

In [40]:
consumed_key = "CON_NG_%s_%s"

In [44]:

consumed = original.df.copy()

In [47]:
for fuel in ["COL", "NG", "NUC", "OIL", "OTH", "SUN", "UNK", "WAT", "WND"]: # todo combine GEO, BIO, UNK into OTH 
    print(fuel, end="...")
    consumed_keys = [consumed_key%(fuel,ba) for ba in original.regions]
    consumed[consumed_keys] = np.nan
    for (i,row) in original.df.iterrows(): 
        consumed.loc[i,consumed_keys] = calculate_consumed(row, original.regions, fuel)


COL...NG...


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr

NUC...


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr

OIL...


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr

OTH...


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr

SUN...


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr

UNK...


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr

WAT...


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr

WND...


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr

In [193]:
consumed[[c for c in consumed.columns if (c.split("_")[0]=="CON") and (c.split("_")[-1]=="CISO")]]

Unnamed: 0,CON_NG_COL_CISO,CON_NG_NG_CISO,CON_NG_NUC_CISO,CON_NG_OIL_CISO,CON_NG_OTH_CISO,CON_NG_SUN_CISO,CON_NG_UNK_CISO,CON_NG_WAT_CISO,CON_NG_WND_CISO
2018-07-01 00:00:00+00:00,454.946700,10221.452687,2776.715343,83.722930,162.402633,8142.426283,0.0,5367.238885,2723.304351
2018-07-01 01:00:00+00:00,603.548551,10477.737186,2891.672919,83.031274,214.814545,6110.521854,0.0,6406.896912,3470.334130
2018-07-01 02:00:00+00:00,803.576549,12049.834920,3025.555960,84.183984,296.701334,2439.326225,0.0,7297.191299,4101.893318
2018-07-01 03:00:00+00:00,905.133618,12714.903698,3195.241025,85.731653,320.239293,439.328422,0.0,7241.896578,4612.567631
2018-07-01 04:00:00+00:00,973.688930,12430.091914,3252.582528,85.631815,397.165604,55.786644,0.0,6876.425634,4775.730935
...,...,...,...,...,...,...,...,...,...
2022-04-19 16:00:00+00:00,294.751811,5818.172371,1322.990319,41.735634,472.355324,10658.274908,0.0,1202.139486,1673.084614
2022-04-19 17:00:00+00:00,271.421457,5364.490966,1275.304421,41.797576,434.797765,10836.538206,0.0,1262.931930,1487.834643
2022-04-19 18:00:00+00:00,243.148703,4924.645908,1201.388242,41.075461,551.890393,11084.520008,0.0,1030.420782,1753.233617
2022-04-19 19:00:00+00:00,215.219962,4688.032469,1123.231517,39.900787,350.498306,10625.352708,0.0,995.945747,2264.190788


In [237]:
ba = "MISO"
fuel = "NG"
px.line(consumed, y=[f"EBA.{ba}-ALL.NG.{fuel}.H", f"CON_NG_{fuel}_{ba}"])

In [239]:
consumed["CON_NG_NG_MISO"].resample("Y").sum()

2018-12-31 00:00:00+00:00    7.598667e+07
2019-12-31 00:00:00+00:00    1.762204e+08
2020-12-31 00:00:00+00:00    1.922218e+08
2021-12-31 00:00:00+00:00    1.675546e+08
2022-12-31 00:00:00+00:00    5.156001e+07
Freq: A-DEC, Name: CON_NG_NG_MISO, dtype: float64

# Calculate rates using Milo's data, then output

In [281]:
#milo_fname = "~/Downloads/generated_eia_923_unadjusted.csv"
#milo_fname = "~/Downloads/generated_eia_923_adjusted_for_chp.csv"
milo_fname = "~/Downloads/generated_eia_923_adjusted_for_chp_and_biomass.csv"

milo_dat = pd.read_csv(milo_fname, parse_dates=["start_date"])

milo_dat["co2_rate"] = milo_dat["co2_mass_lbs"]/milo_dat["net_generation_mwh"]
milo_dat["so2_rate"] = milo_dat["so2_mass_lbs"]/milo_dat["net_generation_mwh"]
milo_dat["nox_rate"] = milo_dat["nox_mass_lbs"]/milo_dat["net_generation_mwh"]
milo_dat["co2e_rate"] = milo_dat["co2e_mass_lbs"]/milo_dat["net_generation_mwh"]


In [282]:
to_out = consumed[[c for c in consumed.columns if c.split("_")[0]=="CON"]]
to_out = to_out.resample("M", label='left').sum()
to_out.index = to_out.index + pd.tseries.frequencies.to_offset("1d")

In [283]:

to_out = to_out.melt(ignore_index=False, value_vars=to_out.columns)
to_out[["drop1", "drop2", "fuel", "ba"]] = to_out["variable"].str.split("_", expand=True)
to_out = to_out.drop(columns=["drop1", "drop2","variable"])
fuel_map = {"COL":"coal", "NG":"natural_gas", "NUC":"nuclear", "OIL":"petroleum", "OTH":"other", "SUN":"solar", "UNK":"unknown", "WAT":"water", "WND":"wind"}
to_out["fuel"] = to_out["fuel"].map(fuel_map)

to_out = to_out.reset_index()
to_out = to_out.rename(columns={"fuel":"fuel_type", "value":"net_generation_mwh", "ba":"balancing_authority", "index":"start_date"})



In [284]:
to_out[(to_out.fuel_type=="natural_gas") & (to_out.balancing_authority=="MISO") ].resample("Y", on="start_date").sum()

Unnamed: 0_level_0,net_generation_mwh
start_date,Unnamed: 1_level_1
2018-12-31 00:00:00+00:00,75986670.0
2019-12-31 00:00:00+00:00,176220400.0
2020-12-31 00:00:00+00:00,192221800.0
2021-12-31 00:00:00+00:00,167554600.0
2022-12-31 00:00:00+00:00,51560010.0


In [285]:
milo_dat.start_date = milo_dat.start_date.dt.tz_localize("UTC")
to_out = to_out.merge(milo_dat[['start_date','fuel_type','balancing_authority', 'co2_rate','so2_rate','nox_rate','co2e_rate']], 
    how='left', on=['start_date','fuel_type','balancing_authority'])

In [286]:
to_out["fuel_mix_framework"] = "consumed"
to_out["co2_mass_lbs"] = to_out["net_generation_mwh"]*to_out["co2_rate"]
to_out["so2_mass_lbs"] = to_out["net_generation_mwh"]*to_out["so2_rate"]
to_out["nox_mass_lbs"] = to_out["net_generation_mwh"]*to_out["nox_rate"]
to_out["co2e_mass_lbs"] = to_out["net_generation_mwh"]*to_out["co2e_rate"]

In [287]:
to_out.to_csv("~/Desktop/"+milo_fname.split("/")[-1].replace("generated","consumed"), 
columns=["start_date","balancing_authority","fuel_type","net_generation_mwh","fuel_mix_framework","co2_mass_lbs","so2_mass_lbs","nox_mass_lbs","co2e_mass_lbs"], index=False)

In [288]:
to_out[(to_out.fuel_type=="natural_gas") & (to_out.balancing_authority=="MISO") ].resample("Y", on="start_date").sum()

Unnamed: 0_level_0,net_generation_mwh,co2_rate,so2_rate,nox_rate,co2e_rate,co2_mass_lbs,so2_mass_lbs,nox_mass_lbs,co2e_mass_lbs
start_date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-12-31 00:00:00+00:00,75986670.0,5545.277393,0.098936,13.83044,5550.85441,70422770000.0,1228099.0,175694500.0,70493640000.0
2019-12-31 00:00:00+00:00,176220400.0,10832.795217,0.280272,27.82337,10843.830204,159399400000.0,3969868.0,409618700.0,159561700000.0
2020-12-31 00:00:00+00:00,192221800.0,11208.856959,0.237181,29.20561,11220.444391,179855100000.0,3772840.0,468898200.0,180041000000.0
2021-12-31 00:00:00+00:00,167554600.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2022-12-31 00:00:00+00:00,51560010.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [289]:
ba = "MISO" 
toplot = to_out.merge(milo_dat[["start_date", "balancing_authority", "fuel_type", "net_generation_mwh", "co2_mass_lbs"]], 
    how='left', on=["start_date", "balancing_authority", "fuel_type"], suffixes=("", "_orig"))
toplot = toplot[toplot.balancing_authority==ba]

In [290]:
px.line(toplot, x="start_date", y=["net_generation_mwh", "net_generation_mwh_orig"], facet_col="fuel_type", facet_col_wrap=2)