# Uncertainty analysis 

* Compare results to results using a flat profile 
* Calculate maximum uncertainty bands: between CEMS hourly data and CEMS + total non-CEMS capacity 

gailin - 7/21/2022

In [121]:
# Calculate min and max possible at each hour 
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os

In [39]:
%reload_ext autoreload
%autoreload 2

# Tell python where to look for modules. 
# Depending on how your jupyter handles working directories, this may not be needed.
import sys
sys.path.append('../../hourly-egrid/')

import src.load_data as load_data
import src.column_checks as column_checks

In [40]:
year = 2020 

In [41]:
ba = "MISO"

# Visualize flat vs. base result

In [42]:
flat = pd.read_csv(f"../data/results/flat/{year}/power_sector_data/hourly/us_units/{ba}.csv", parse_dates=["datetime_utc"])
base = pd.read_csv(f"../data/results/{year}/power_sector_data/hourly/us_units/{ba}.csv", parse_dates=["datetime_utc"])


In [43]:
flat = flat[flat.fuel_category=="total"]
base = base[base.fuel_category=="total"]

In [44]:
both = flat.merge(base, how='left', on='datetime_utc', suffixes=("_flat", "_base"))

In [46]:
both.columns

Index(['fuel_category_flat', 'datetime_local_flat', 'datetime_utc', 'net_generation_mwh_flat', 'fuel_consumed_mmbtu_flat', 'fuel_consumed_for_electricity_mmbtu_flat', 'co2_mass_lb_flat', 'ch4_mass_lb_flat', 'n2o_mass_lb_flat', 'co2e_mass_lb_flat', 'nox_mass_lb_flat', 'so2_mass_lb_flat', 'co2_mass_lb_for_electricity_flat', 'ch4_mass_lb_for_electricity_flat', 'n2o_mass_lb_for_electricity_flat', 'co2e_mass_lb_for_electricity_flat', 'nox_mass_lb_for_electricity_flat', 'so2_mass_lb_for_electricity_flat', 'co2_mass_lb_adjusted_flat', 'ch4_mass_lb_adjusted_flat', 'n2o_mass_lb_adjusted_flat', 'co2e_mass_lb_adjusted_flat', 'nox_mass_lb_adjusted_flat', 'so2_mass_lb_adjusted_flat', 'co2_mass_lb_for_electricity_adjusted_flat', 'ch4_mass_lb_for_electricity_adjusted_flat', 'n2o_mass_lb_for_electricity_adjusted_flat', 'co2e_mass_lb_for_electricity_adjusted_flat', 'nox_mass_lb_for_electricity_adjusted_flat', 'so2_mass_lb_for_electricity_adjusted_flat',
       'generated_co2_rate_lb_per_mwh_for_electri

In [48]:
fig = px.line(both, x="datetime_utc", y=['generated_co2e_rate_lb_per_mwh_for_electricity_adjusted_flat','generated_co2e_rate_lb_per_mwh_for_electricity_adjusted_base'], title=f"{ba}, carbon intensity using flat vs. base generation")
fig.show()
#newnames = {'generated_co2_rate_lb_per_mwh_adjusted': 'Our data', 'generated_co2_rate_lb_per_mwh_for_electricity_adjusted': 'Real-time data'}
#fig.for_each_trace(lambda t: t.update(name = newnames[t.name]))

# Draw min (CEMS-only) and max (CEMS + total non-CEMS capacity)

In [49]:
cems = pd.read_csv("../data/results/2020/plant_data/hourly/us_units/individual_plant_data.csv", parse_dates=["datetime_utc", "report_date"])
plant_meta = pd.read_csv("../data/results/2020/plant_data/plant_static_attributes.csv")

In [50]:
cems = cems.merge(plant_meta, how='left', on='plant_id_eia')

In [51]:
pudl_reader = load_data.initialize_pudl_out(year)

## Calculate maximum and minimum hourly emissions and generation 

### Generation 

* Min generation is CEMS generation 
* Max generation is CEMS generation + (plant capacity for EIA-only plants with positive 923 generation in this month)

### Emissions 

* Min emissions is CEMS emissions alone 
* Max emissions is CEMS generation + sum[(plant capacity for each plant with positive 923 generation in this month)x(monthly rate for that plant)] 

Note that this assumes (and asserts) that there are no EIA-only plants with negative 923 generation. 

In [232]:
caps = pudl_reader.gens_eia860()[["plant_id_eia", "capacity_mw", "summer_capacity_mw", "winter_capacity_mw"]].copy()
caps["max_capacity"] = caps[["capacity_mw", "summer_capacity_mw", "winter_capacity_mw"]].max(axis=1)
caps = caps.groupby("plant_id_eia").sum()["max_capacity"]

In [234]:
monthly_rates = pd.read_csv("../data/results/2020/plant_data/monthly/us_units/plant_data.csv", parse_dates=["report_date"])

In [235]:
monthly_rates = monthly_rates.merge(caps, how='left', left_on='plant_id_eia', right_index=True)
monthly_rates = monthly_rates.merge(plant_meta, how='left', on='plant_id_eia')

In [236]:
# Calculate per-plant, per-month maximum net generation and emissions: 
#   max net gen = capacity
#   max emissions = capacity * monthly rate
pols = ["co2", "ch4", "n2o", "co2e", "nox", "so2"]
for pol in pols:
    monthly_rates[f"max_hourly_{pol}"] = monthly_rates["max_capacity"] * (monthly_rates[f"{pol}_mass_lb_for_electricity_adjusted"]/monthly_rates["net_generation_mwh"])

In [237]:
# Filter for non-CEMS plants
# Filter for plants with non-zero generation in this month: we know that hourly generation shouldn't include the capacity of these plants
# Group by BA and month to get monthly max hourly emissions 
pol_cols = [f"max_hourly_{p}" for p in pols] + ["max_capacity"]
filtered_monthly_rates = monthly_rates[(monthly_rates.data_availability == "eia_only") & (monthly_rates.net_generation_mwh > 0)]
non_cems_maxes = filtered_monthly_rates.groupby(["report_date", "ba_code"])[pol_cols].sum().reset_index()

In [238]:
# Identify plants with negative net generation. 
# These will be included in net generation minimum, emissions maximum
negative_monthly = monthly_rates[(monthly_rates.data_availability == "eia_only") & (monthly_rates.net_generation_mwh < 0)]
assert len(negative_monthly) == 0


# ########### FAKE NEGATIVE PLANT FOR TESTING 
# negative_monthly = monthly_rates[(monthly_rates.plant_id_eia == 3)].copy()
# negative_monthly.loc[:,"net_generation_mwh"] = -300

In [239]:
cols = [f"{p}_mass_lb_for_electricity_adjusted" for p in pols] + ["net_generation_mwh"]
minimum_bound = cems.groupby(["datetime_utc", "ba_code"])[cols].sum().reset_index()

In [240]:
# Add report date back in (dropped during groupby)
report_dates = cems.groupby(["datetime_utc","ba_code"])["report_date"].first().reset_index()
minimum_bound = minimum_bound.merge(report_dates, how='left', on=["datetime_utc","ba_code"])

In [241]:
# Calculate maximum by merging hourly max emissions and generation into minimum, then adding 
maximum_bound = minimum_bound.merge(non_cems_maxes, how='left', on=["report_date", "ba_code"])

In [243]:
for pol in pols:
    maximum_bound[f"{pol}_mass_lb_for_electricity_adjusted"] = maximum_bound[f"{pol}_mass_lb_for_electricity_adjusted"] +  maximum_bound[f"max_hourly_{pol}"]
maximum_bound["net_generation_mwh"] = maximum_bound["net_generation_mwh"] + maximum_bound["max_capacity"]
maximum_bound = maximum_bound[cols + ["datetime_utc", "ba_code"]]

# Plot max and min bounds

In [278]:
ba = "DEAA"

In [279]:
flat = pd.read_csv(f"../data/results/flat/2020/power_sector_data/hourly/us_units/{ba}.csv", parse_dates=["datetime_utc"])
base = pd.read_csv(f"../data/results/2020/power_sector_data/hourly/us_units/{ba}.csv", parse_dates=["datetime_utc"])

In [280]:
# Only take totals 
flat = flat[flat.fuel_category == "total"]
base = base[base.fuel_category == "total"]

In [281]:
this_max = maximum_bound[maximum_bound.ba_code==ba]
this_min = minimum_bound[minimum_bound.ba_code==ba]

fig = go.Figure()
fig.add_trace(go.Scatter(x=this_min.datetime_utc, y=this_min.net_generation_mwh,
    fill=None,
    mode='lines',
    line_color='indigo', showlegend=False
    ))
fig.add_trace(go.Scatter(
    x=this_max.datetime_utc,
    y=this_max.net_generation_mwh,
    fill='tonexty', # fill area between trace0 and trace1
    mode='lines', line_color='indigo', name="Min/max possible"))

fig.add_trace(go.Scatter(
    x=flat.datetime_utc,
    y=flat.net_generation_mwh,
    mode='lines', line_color='brown', name="Flat"))

fig.add_trace(go.Scatter(
    x=base.datetime_utc,
    y=base.net_generation_mwh,
    mode='lines', line_color='blue', name="Best guess"))

fig.update_layout(
    title=f"{ba} total net generation estimates",
    xaxis_title="Hour",
    yaxis_title="Net generation (MWh)",
    legend_title="Estimate type",
)

fig.show()


In [282]:
this_max = maximum_bound[maximum_bound.ba_code==ba]
this_min = minimum_bound[minimum_bound.ba_code==ba]

fig = go.Figure()
fig.add_trace(go.Scatter(x=this_min.datetime_utc, y=this_min.co2_mass_lb_for_electricity_adjusted,
    fill=None,
    mode='lines',
    line_color='indigo', showlegend=False
    ))
fig.add_trace(go.Scatter(
    x=this_max.datetime_utc,
    y=this_max.co2_mass_lb_for_electricity_adjusted,
    fill='tonexty', # fill area between trace0 and trace1
    mode='lines', line_color='indigo', name="Min/max possible"))

fig.add_trace(go.Scatter(
    x=flat.datetime_utc,
    y=flat.co2_mass_lb_for_electricity_adjusted,
    mode='lines', line_color='brown', name="Flat"))

fig.add_trace(go.Scatter(
    x=base.datetime_utc,
    y=base.co2_mass_lb_for_electricity_adjusted,
    mode='lines', line_color='blue', name="Best guess"))

fig.update_layout(
    title=f"{ba} total carbon emissions",
    xaxis_title="Hour",
    yaxis_title="CO2 (lb)",
    legend_title="Estimate type",
)

fig.show()


# Where do our monthly generation / emissions exceed min/max bounds? 

In [250]:
# "net_generation_mwh" or "co2_mass_lb_for_electricity_adjusted"
col_to_check = "net_generation_mwh"

issues = []
for ba in os.listdir("../data/results/2020/power_sector_data/hourly/us_units/"):
    if ".DS_Store" in ba: 
        continue
    our_guess = pd.read_csv(f"../data/results/2020/power_sector_data/hourly/us_units/{ba}", parse_dates=["datetime_utc"])
    our_guess = our_guess[our_guess.fuel_category == "total"]
    ba = ba.replace(".csv", "")
    to_compare = our_guess[["datetime_utc", "co2_mass_lb_for_electricity_adjusted", "net_generation_mwh"]].merge(minimum_bound.loc[minimum_bound.ba_code==ba,["datetime_utc", "co2_mass_lb_for_electricity_adjusted", "net_generation_mwh"]],
        how='left',
        on='datetime_utc',
        suffixes=("","_min")
    )
    to_compare = to_compare.merge(maximum_bound.loc[maximum_bound.ba_code==ba,["datetime_utc", "co2_mass_lb_for_electricity_adjusted", "net_generation_mwh"]],
        how='left',
        on='datetime_utc',
        suffixes=("_best","_max")
    )
    to_compare["ok"] = (to_compare[f"{col_to_check}_min"] > to_compare[f"{col_to_check}_best"]) | (to_compare[f"{col_to_check}_best"] > to_compare[f"{col_to_check}_max"])
    issues.append(to_compare[["datetime_utc", "ok"]].set_index("datetime_utc").squeeze().rename(ba))


In [251]:
all_issues = pd.concat(issues, axis=1)

In [252]:
all_issues.sum(axis=0).sort_values(ascending=False).head(20)

LGEE    5389
SEC     4343
AEC     2002
TAL     1461
OHMS     720
FPL      697
FMPP     215
TVA      171
GVL       57
SRP       34
AVRN      33
AZPS      21
PJM       15
TEC       11
NEVP      10
TIDC       8
EEI        7
WACM       2
PACW       1
AECI       1
dtype: object

In [283]:
monthly_rates[monthly_rates.ba_code=="DEAA"]

Unnamed: 0.1,plant_id_eia,report_date,net_generation_mwh,fuel_consumed_mmbtu,fuel_consumed_for_electricity_mmbtu,co2_mass_lb,ch4_mass_lb,n2o_mass_lb,co2e_mass_lb,nox_mass_lb,so2_mass_lb,co2_mass_lb_for_electricity,ch4_mass_lb_for_electricity,n2o_mass_lb_for_electricity,co2e_mass_lb_for_electricity,nox_mass_lb_for_electricity,so2_mass_lb_for_electricity,co2_mass_lb_adjusted,ch4_mass_lb_adjusted,n2o_mass_lb_adjusted,co2e_mass_lb_adjusted,nox_mass_lb_adjusted,so2_mass_lb_adjusted,co2_mass_lb_for_electricity_adjusted,ch4_mass_lb_for_electricity_adjusted,n2o_mass_lb_for_electricity_adjusted,co2e_mass_lb_for_electricity_adjusted,nox_mass_lb_for_electricity_adjusted,so2_mass_lb_for_electricity_adjusted,max_capacity,Unnamed: 0,plant_primary_fuel,data_availability,ba_code,ba_code_physical,state,distribution_flag,fuel_category,fuel_category_eia930,timezone,max_hourly_co2,max_hourly_ch4,max_hourly_n2o,max_hourly_co2e,max_hourly_nox,max_hourly_so2
45002,55282,2020-01-01,137843.84,1019058.12,1019058.12,121122800.0,2241.93,224.19,121265800.0,8237.06,611.47,121122800.0,2241.93,224.19,121265800.0,8237.06,611.47,121122800.0,2241.93,224.19,121265800.0,8237.06,611.47,121122800.0,2241.93,224.19,121265800.0,8237.06,611.47,713.0,3843,NG,cems_and_eia,DEAA,SRP,AZ,False,natural_gas,natural_gas,America/Phoenix,626510.070414,11.596427,1.159627,627249.921792,42.606356,3.162841
45003,55282,2020-02-01,237234.54,1715569.6,1715569.6,203908800.0,3774.25,377.43,204149600.0,12712.72,1029.41,203908800.0,3774.25,377.43,204149600.0,12712.72,1029.41,203908800.0,3774.25,377.43,204149600.0,12712.72,1029.41,203908800.0,3774.25,377.43,204149600.0,12712.72,1029.41,713.0,3843,NG,cems_and_eia,DEAA,SRP,AZ,False,natural_gas,natural_gas,America/Phoenix,612840.624897,11.343375,1.134352,613564.332796,38.207629,3.093855
45004,55282,2020-03-01,-6248.29,3296.69,3296.69,391786.9,7.25,0.73,392249.6,34.06,1.98,391786.9,7.25,0.73,392249.6,34.06,1.98,391786.9,7.25,0.73,392249.6,34.06,1.98,391786.9,7.25,0.73,392249.6,34.06,1.98,713.0,3843,NG,cems_and_eia,DEAA,SRP,AZ,False,natural_gas,natural_gas,America/Phoenix,-44707.283746,-0.827306,-0.083301,-44760.085291,-3.886628,-0.22594
45005,55282,2020-04-01,58467.57,461438.62,461438.62,54844250.0,1015.16,101.52,54909020.0,5313.03,276.88,54844250.0,1015.16,101.52,54909020.0,5313.03,276.88,54844250.0,1015.16,101.52,54909020.0,5313.03,276.88,54844250.0,1015.16,101.52,54909020.0,5313.03,276.88,713.0,3843,NG,cems_and_eia,DEAA,SRP,AZ,False,natural_gas,natural_gas,America/Phoenix,668814.349454,12.379668,1.238016,669604.176049,64.791309,3.376495
45006,55282,2020-05-01,132974.34,967428.01,967428.01,114985400.0,2128.34,212.83,115121100.0,9094.36,580.48,114985400.0,2128.34,212.83,115121100.0,9094.36,580.48,114985400.0,2128.34,212.83,115121100.0,9094.36,580.48,114985400.0,2128.34,212.83,115121100.0,9094.36,580.48,713.0,3843,NG,cems_and_eia,DEAA,SRP,AZ,False,natural_gas,natural_gas,America/Phoenix,616544.191394,11.412024,1.141181,617272.279079,48.763383,3.112497
45007,55282,2020-06-01,208547.65,1519402.5,1519402.5,180591800.0,3342.69,334.27,180805100.0,12790.48,911.69,180591800.0,3342.69,334.27,180805100.0,12790.48,911.69,180591800.0,3342.69,334.27,180805100.0,12790.48,911.69,180591800.0,3342.69,334.27,180805100.0,12790.48,911.69,713.0,3843,NG,cems_and_eia,DEAA,SRP,AZ,False,natural_gas,natural_gas,America/Phoenix,617422.220467,11.428266,1.14283,618151.342825,43.729154,3.116961
45008,55282,2020-07-01,252541.74,1846785.36,1846785.36,219501300.0,4062.93,406.29,219760500.0,15175.32,1108.13,219501300.0,4062.93,406.29,219760500.0,15175.32,1108.13,219501300.0,4062.93,406.29,219760500.0,15175.32,1108.13,219501300.0,4062.93,406.29,219760500.0,15175.32,1108.13,713.0,3843,NG,cems_and_eia,DEAA,SRP,AZ,False,natural_gas,natural_gas,America/Phoenix,619717.07948,11.470853,1.147077,620448.919473,42.844415,3.128579
45009,55282,2020-08-01,254215.57,1890147.08,1890147.08,224658300.0,4158.32,415.83,224923600.0,13496.8,1134.15,224658300.0,4158.32,415.83,224923600.0,13496.8,1134.15,224658300.0,4158.32,415.83,224923600.0,13496.8,1134.15,224658300.0,4158.32,415.83,224923600.0,13496.8,1134.15,713.0,3843,NG,cems_and_eia,DEAA,SRP,AZ,False,natural_gas,natural_gas,America/Phoenix,630100.61251,11.662866,1.166281,630844.703998,37.854559,3.180958
45010,55282,2020-09-01,190639.54,1421616.81,1421616.81,168971600.0,3127.56,312.76,169171100.0,10937.96,853.02,168971600.0,3127.56,312.76,169171100.0,10937.96,853.02,168971600.0,3127.56,312.76,169171100.0,10937.96,853.02,168971600.0,3127.56,312.76,169171100.0,10937.96,853.02,713.0,3843,NG,cems_and_eia,DEAA,SRP,AZ,False,natural_gas,natural_gas,America/Phoenix,631960.914757,11.697208,1.169736,632707.195862,40.908436,3.190331
45011,55282,2020-10-01,336900.09,2402470.47,2402470.47,285551600.0,5285.44,528.54,285888900.0,16746.1,1441.56,285551600.0,5285.44,528.54,285888900.0,16746.1,1441.56,285551600.0,5285.44,528.54,285888900.0,16746.1,1441.56,285551600.0,5285.44,528.54,285888900.0,16746.1,1441.56,713.0,3843,NG,cems_and_eia,DEAA,SRP,AZ,False,natural_gas,natural_gas,America/Phoenix,604328.496372,11.185864,1.118578,605042.15386,35.440683,3.050852
