# Uncertainty analysis 

* Compare results to results using a flat profile 
* Calculate maximum uncertainty bands: between CEMS hourly data and CEMS + total non-CEMS capacity 

gailin - 7/14/2022

In [38]:
# Calculate min and max possible at each hour 
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [39]:
%reload_ext autoreload
%autoreload 2

# Tell python where to look for modules. 
# Depending on how your jupyter handles working directories, this may not be needed.
import sys
sys.path.append('../../hourly-egrid/')

import src.load_data as load_data
import src.column_checks as column_checks

In [40]:
year = 2020 

In [41]:
ba = "MISO"

# Visualize flat vs. base result

In [42]:
flat = pd.read_csv(f"../data/results/flat/{year}/power_sector_data/hourly/us_units/{ba}.csv", parse_dates=["datetime_utc"])
base = pd.read_csv(f"../data/results/{year}/power_sector_data/hourly/us_units/{ba}.csv", parse_dates=["datetime_utc"])


In [43]:
flat = flat[flat.fuel_category=="total"]
base = base[base.fuel_category=="total"]

In [44]:
both = flat.merge(base, how='left', on='datetime_utc', suffixes=("_flat", "_base"))

In [46]:
both.columns

Index(['fuel_category_flat', 'datetime_local_flat', 'datetime_utc', 'net_generation_mwh_flat', 'fuel_consumed_mmbtu_flat', 'fuel_consumed_for_electricity_mmbtu_flat', 'co2_mass_lb_flat', 'ch4_mass_lb_flat', 'n2o_mass_lb_flat', 'co2e_mass_lb_flat', 'nox_mass_lb_flat', 'so2_mass_lb_flat', 'co2_mass_lb_for_electricity_flat', 'ch4_mass_lb_for_electricity_flat', 'n2o_mass_lb_for_electricity_flat', 'co2e_mass_lb_for_electricity_flat', 'nox_mass_lb_for_electricity_flat', 'so2_mass_lb_for_electricity_flat', 'co2_mass_lb_adjusted_flat', 'ch4_mass_lb_adjusted_flat', 'n2o_mass_lb_adjusted_flat', 'co2e_mass_lb_adjusted_flat', 'nox_mass_lb_adjusted_flat', 'so2_mass_lb_adjusted_flat', 'co2_mass_lb_for_electricity_adjusted_flat', 'ch4_mass_lb_for_electricity_adjusted_flat', 'n2o_mass_lb_for_electricity_adjusted_flat', 'co2e_mass_lb_for_electricity_adjusted_flat', 'nox_mass_lb_for_electricity_adjusted_flat', 'so2_mass_lb_for_electricity_adjusted_flat',
       'generated_co2_rate_lb_per_mwh_for_electri

In [48]:
fig = px.line(both, x="datetime_utc", y=['generated_co2e_rate_lb_per_mwh_for_electricity_adjusted_flat','generated_co2e_rate_lb_per_mwh_for_electricity_adjusted_base'], title=f"{ba}, carbon intensity using flat vs. base generation")
fig.show()
#newnames = {'generated_co2_rate_lb_per_mwh_adjusted': 'Our data', 'generated_co2_rate_lb_per_mwh_for_electricity_adjusted': 'Real-time data'}
#fig.for_each_trace(lambda t: t.update(name = newnames[t.name]))

# Draw min (CEMS-only) and max (CEMS + total non-CEMS capacity)

In [49]:
cems = pd.read_csv("../data/results/2020/plant_data/hourly/us_units/CEMS_plant_data.csv", parse_dates=["datetime_utc", "report_date"])
plant_meta = pd.read_csv("../data/results/2020/plant_data/plant_static_attributes.csv")

In [50]:
cems = cems.merge(plant_meta, how='left', on='plant_id_eia')

In [51]:
pudl_reader = load_data.initialize_pudl_out(year)

### Calculate maximum and minimum hourly 

In [52]:
caps = pudl_reader.gens_eia860()[["plant_id_eia", "capacity_mw"]]
caps = caps.groupby("plant_id_eia").sum()

In [79]:
monthly_rates = pd.read_csv("../data/results/2020/plant_data/monthly/us_units/plant_data.csv", parse_dates=["report_date"])

In [80]:
monthly_rates = monthly_rates.merge(caps, how='left', left_on='plant_id_eia', right_index=True)
monthly_rates = monthly_rates.merge(plant_meta, how='left', on='plant_id_eia')

In [81]:
# Calculate per-plant, per-month maximum net generation and emissions: 
#   max net gen = capacity
#   max emissions = capacity * monthly rate
pols = ["co2", "ch4", "n2o", "co2e", "nox", "so2"]
for pol in pols:
    monthly_rates[f"max_hourly_{pol}"] = monthly_rates["capacity_mw"] * (monthly_rates[f"{pol}_mass_lb_for_electricity_adjusted"]/monthly_rates["net_generation_mwh"])

In [82]:
# Filter for non-CEMS plants
# Filter for plants with non-zero generation in this month: we know that hourly generation shouldn't include the capacity of these plants
# Group by BA and month to get monthly max hourly emissions 
pol_cols = [f"max_hourly_{p}" for p in pols] + ["capacity_mw"]
filtered_monthly_rates = monthly_rates[(monthly_rates.data_availability == "eia_only") & (monthly_rates.net_generation_mwh > 0)]
non_cems_maxes = filtered_monthly_rates.groupby(["report_date", "ba_code"])[pol_cols].sum().reset_index()

In [83]:
cols = [f"{p}_mass_lb" for p in pols] + ["net_generation_mwh"]
minimum_bound = cems.groupby(["datetime_utc", "ba_code"])[cols].sum().reset_index()

In [84]:
# Add report date back in (dropped during groupby)
report_dates = cems.groupby(["datetime_utc","ba_code"])["report_date"].first().reset_index()
minimum_bound = minimum_bound.merge(report_dates, how='left', on=["datetime_utc","ba_code"])

In [85]:
# Calculate maximum by merging hourly max emissions and generation into minimum, then adding 
maximum_bound = minimum_bound.merge(non_cems_maxes, how='left', on=["report_date", "ba_code"])

In [86]:
for pol in pols:
    maximum_bound[f"{pol}_mass_lb"] = maximum_bound[f"{pol}_mass_lb"] +  maximum_bound[f"max_hourly_{pol}"]
maximum_bound["net_generation_mwh"] = maximum_bound["net_generation_mwh"] + maximum_bound["capacity_mw"]
maximum_bound = maximum_bound[cols + ["datetime_utc", "ba_code"]]

In [87]:
# Calculate rates for min and max bounds 
for pol in pols: 
    maximum_bound[f"{pol}_rate_lb_per_mwh"] = maximum_bound[f"{pol}_mass_lb"] / maximum_bound["net_generation_mwh"]
    minimum_bound[f"{pol}_rate_lb_per_mwh"] = minimum_bound[f"{pol}_mass_lb"] / minimum_bound["net_generation_mwh"]

# Plot max and min bounds

In [88]:
ba = "PJM"

In [89]:
flat = pd.read_csv(f"../data/results/flat/2020/power_sector_data/hourly/us_units/{ba}.csv", parse_dates=["datetime_utc"])
base = pd.read_csv(f"../data/results/2020/power_sector_data/hourly/us_units/{ba}.csv", parse_dates=["datetime_utc"])

In [90]:
# Only take totals 
flat = flat[flat.fuel_category == "total"]
base = base[base.fuel_category == "total"]

In [91]:
this_max = maximum_bound[maximum_bound.ba_code==ba]
this_min = minimum_bound[minimum_bound.ba_code==ba]

fig = go.Figure()
fig.add_trace(go.Scatter(x=this_min.datetime_utc, y=this_min.net_generation_mwh,
    fill=None,
    mode='lines',
    line_color='indigo', showlegend=False
    ))
fig.add_trace(go.Scatter(
    x=this_max.datetime_utc,
    y=this_max.net_generation_mwh,
    fill='tonexty', # fill area between trace0 and trace1
    mode='lines', line_color='indigo', name="Min/max possible"))

fig.add_trace(go.Scatter(
    x=flat.datetime_utc,
    y=flat.net_generation_mwh,
    mode='lines', line_color='brown', name="Flat"))

fig.add_trace(go.Scatter(
    x=base.datetime_utc,
    y=base.net_generation_mwh,
    mode='lines', line_color='blue', name="Best guess"))

fig.update_layout(
    title=f"{ba} total net generation estimates",
    xaxis_title="Hour",
    yaxis_title="Net generation (MWh)",
    legend_title="Estimate type",
)

fig.show()


In [92]:
this_max = maximum_bound[maximum_bound.ba_code==ba]
this_min = minimum_bound[minimum_bound.ba_code==ba]

fig = go.Figure()
fig.add_trace(go.Scatter(x=this_min.datetime_utc, y=this_min.co2_mass_lb,
    fill=None,
    mode='lines',
    line_color='indigo', showlegend=False
    ))
fig.add_trace(go.Scatter(
    x=this_max.datetime_utc,
    y=this_max.co2_mass_lb,
    fill='tonexty', # fill area between trace0 and trace1
    mode='lines', line_color='indigo', name="Min/max possible"))

fig.add_trace(go.Scatter(
    x=flat.datetime_utc,
    y=flat.co2_mass_lb_adjusted,
    mode='lines', line_color='brown', name="Flat"))

fig.add_trace(go.Scatter(
    x=base.datetime_utc,
    y=base.co2_mass_lb_adjusted,
    mode='lines', line_color='blue', name="Best guess"))

fig.update_layout(
    title=f"{ba} total carbon emissions",
    xaxis_title="Hour",
    yaxis_title="CO2 (lb)",
    legend_title="Estimate type",
)

fig.show()
