In [1]:
# import packages
import numpy as np
import pandas as pd
import os
import plotly.express as px

%reload_ext autoreload
%autoreload 2

# Tell python where to look for modules. 
import sys
sys.path.append('../../../open-grid-emissions/src/')

# import local modules
import load_data
import validate_egrid
from filepaths import *
from logging_util import get_logger, configure_root_logger
configure_root_logger()
logger = get_logger("egrid_test")

In [11]:
year = 2021
pct_diff_threshold = 0.5

# load plant data
oge_plant = validate_egrid.load_oge_plant_data(year)
egrid_plant = validate_egrid.load_egrid_plant_file(year)

# load ba level data
columns_to_compare =  [
    "net_generation_mwh",
    "fuel_consumed_mmbtu",
    "co2_mass_lb",
]

# aggregate the plant data up to the BA level
oge_ba = validate_egrid.load_oge_ba_data(year, columns_to_compare)
egrid_ba = validate_egrid.create_egrid_ba_from_plant_data(egrid_plant, columns_to_compare)

  egrid_plant.groupby(["ba_code"]).sum()[data_columns].reset_index()


In [10]:
# compare ba-level data

# divide our calculation by the BA totals from eGRID
# if there are 0 values, replace with 0.1, so that div by zero doesn't return missing value
ba_comparison = (
    oge_ba.replace(0, 0.1)
    .div(egrid_ba.replace(0, 0.1))
    .sort_values(by="co2_mass_lb")
    .round(3)
)
ba_comparison = ba_comparison - 1

total = pd.DataFrame(
    oge_ba
    .sum()
    .div(egrid_ba.sum())
    .rename("Total")
).T
total = total - 1

# calculate the difference in the number of plants in each region
oge_plant_count = oge_plant[
    ~(
        oge_plant[
            [
                "net_generation_mwh",
                "fuel_consumed_mmbtu",
                "fuel_consumed_for_electricity_mmbtu",
                "co2_mass_lb",
            ]
        ].sum(axis=1)
        == 0
    )
].groupby("ba_code", dropna=False)["plant_id_eia"].nunique().rename("num_plants_oge")

egrid_plant_count = egrid_plant[
        ~(
            egrid_plant[
                [
                    "net_generation_mwh",
                    "fuel_consumed_mmbtu",
                    "fuel_consumed_for_electricity_mmbtu",
                    "co2_mass_lb",
                ]
            ].sum(axis=1)
            == 0
        )
    ].groupby("ba_code", dropna=False)["plant_id_eia"].nunique().rename("num_plants_egrid")

ba_comparison = ba_comparison.merge(
    oge_plant_count, how="left", left_index=True, right_index=True
).sort_index()

ba_comparison = ba_comparison.merge(
    egrid_plant_count, how="left", left_index=True, right_index=True
).sort_index()

ba_comparison = ba_comparison.sort_values(by=["generated_co2_rate_lb_per_mwh"], ascending=True)

ba_comparison = pd.concat([ba_comparison, total], axis=0).round(3)

#ba_comparison = ba_comparison[DATA_COLUMNS + ["generated_co2_rate_lb_per_mwh", "num_plants_oge","num_plants_egrid"]]

columns_to_check = [
    "net_generation_mwh",
    "fuel_consumed_mmbtu",
    #"fuel_consumed_for_electricity_mmbtu",
    "co2_mass_lb",
    "generated_co2_rate_lb_per_mwh"
]

ba_comparison[columns_to_check] = ba_comparison[columns_to_check] * 100

# ignore miscellaneous BAs
ba_comparison = ba_comparison[~ba_comparison.index.str.contains("MS")]

ba_comparison = ba_comparison[~np.isclose(ba_comparison[columns_to_check], 0, atol=pct_diff_threshold).all(axis=1)]

with pd.option_context("display.max_rows", None, "display.max_columns", None):
    display(ba_comparison)


Unnamed: 0,net_generation_mwh,fuel_consumed_mmbtu,co2_mass_lb,generated_co2_rate_lb_per_mwh,num_plants_oge,num_plants_egrid
HST,0.7,0.2,-13.0,-13.5,1.0,1.0
PGE,22.5,9.0,12.1,-8.5,73.0,73.0
IID,0.0,0.0,-8.2,-8.2,54.0,54.0
HECO,18.8,18.2,14.3,-3.8,42.0,31.0
CISO,3.9,-0.3,0.7,-3.1,1276.0,1269.0
PACW,0.0,-1.0,-1.9,-1.9,99.0,99.0
FMPP,1.5,0.2,0.1,-1.4,14.0,14.0
PNM,1.3,0.0,0.0,-1.3,75.0,75.0
PACE,0.0,-0.7,-0.4,-0.4,154.0,154.0
SCEG,1.2,1.4,1.5,0.3,63.0,63.0


In [None]:
# compare plant level data

# evaluate all plants
comparison_count, compared = validate_egrid.compare_plant_level_results_to_egrid(
    oge_plant, egrid_plant, PLANTS_MISSING_FROM_EGRID
)
comparison_count

In [13]:
oge_plant

Unnamed: 0,plant_id_eia,net_generation_mwh,fuel_consumed_mmbtu,fuel_consumed_for_electricity_mmbtu,co2_mass_lb,ch4_mass_lb,n2o_mass_lb,co2e_mass_lb,nox_mass_lb,so2_mass_lb,co2_mass_lb_for_electricity,ch4_mass_lb_for_electricity,n2o_mass_lb_for_electricity,co2e_mass_lb_for_electricity,nox_mass_lb_for_electricity,so2_mass_lb_for_electricity,co2_mass_lb_adjusted,ch4_mass_lb_adjusted,n2o_mass_lb_adjusted,co2e_mass_lb_adjusted,nox_mass_lb_adjusted,so2_mass_lb_adjusted,co2_mass_lb_for_electricity_adjusted,ch4_mass_lb_for_electricity_adjusted,n2o_mass_lb_for_electricity_adjusted,co2e_mass_lb_for_electricity_adjusted,nox_mass_lb_for_electricity_adjusted,so2_mass_lb_for_electricity_adjusted,plant_primary_fuel,data_availability,ba_code,ba_code_physical,state,distribution_flag,fuel_category,fuel_category_eia930,timezone,shaped_plant_id,plant_id_egrid
0,1,2863.10,36674.00,36674.00,5.796227e+06,234.60,47.20,5.815562e+06,113161.10,10995.80,5.796227e+06,234.60,47.20,5.815562e+06,113161.10,10995.80,5.796227e+06,234.60,47.20,5.815562e+06,113161.10,10995.80,5.796227e+06,234.60,47.20,5.815562e+06,113161.10,10995.80,DFO,eia_only,AKMS,AKMS,AK,True,petroleum,petroleum,America/Anchorage,990208.0,1
1,2,215445.90,1905189.00,1905189.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,WAT,eia_only,SOCO,SOCO,AL,False,hydro,hydro,America/Chicago,917904.0,2
2,3,11792286.22,97913748.98,97913748.98,1.515480e+10,1113588.86,156369.64,1.522856e+10,4731550.62,7763341.23,1.515480e+10,1113588.86,156369.64,1.522856e+10,4731550.62,7763341.23,1.515480e+10,1113588.86,156369.64,1.522856e+10,4731550.62,7763341.23,1.515480e+10,1113588.86,156369.64,1.522856e+10,4731550.62,7763341.23,NG,cems_and_eia,SOCO,SOCO,AL,False,natural_gas,natural_gas,America/Chicago,,3
3,4,392133.90,3467638.80,3467638.80,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,WAT,eia_only,SOCO,SOCO,AL,False,hydro,hydro,America/Chicago,917904.0,4
4,7,-5901.00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,NG,eia_only,SOCO,SOCO,AL,False,natural_gas,natural_gas,America/Chicago,917905.0,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11047,65731,2539.90,22463.00,22463.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,SUN,eia_only,MISO,MISO,IL,True,solar,solar,America/Chicago,912909.0,65731
11048,65732,2582.00,22833.00,22833.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,SUN,eia_only,MISO,MISO,IL,True,solar,solar,America/Chicago,912909.0,65732
11049,65740,0.00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,SUN,eia_only,CISO,CISO,CA,True,solar,solar,America/Los_Angeles,903909.0,65740
11050,65767,469.20,5201.50,5201.50,8.495087e+05,34.30,7.10,8.523426e+05,16665.40,1755.60,8.495087e+05,34.30,7.10,8.523426e+05,16665.40,1755.60,8.495087e+05,34.30,7.10,8.523426e+05,16665.40,1755.60,8.495087e+05,34.30,7.10,8.523426e+05,16665.40,1755.60,DFO,eia_only,AKMS,AKMS,AK,True,petroleum,petroleum,America/Juneau,990208.0,65767


In [32]:
oge_to_compare = oge_plant.set_index("plant_id_eia")[columns_to_compare].round(1)
egrid_to_compare = egrid_plant.set_index("plant_id_eia")[columns_to_compare].round(1)

oge_egrid_plant_comparison = ((oge_to_compare - egrid_to_compare) / egrid_to_compare).round(4)
oge_egrid_plant_comparison = oge_egrid_plant_comparison.add_suffix("_pct_diff")

# merge data from each frame
oge_egrid_data_combined = oge_to_compare.merge(egrid_to_compare, how="outer", left_index=True, right_index=True, suffixes=("_oge","_egrid"))
oge_egrid_plant_comparison = oge_egrid_plant_comparison.merge(oge_egrid_data_combined, how="outer", left_index=True, right_index=True,)

# for each column, change missing values to zero if both values are zero (only nan b/c divide by zero)
for col in columns_to_compare:
    # identify plants with zero values for both
    plant_ids = list(
        oge_egrid_plant_comparison[
            (oge_egrid_plant_comparison[f"{col}_oge"] == 0)
            & (oge_egrid_plant_comparison[f"{col}_egrid"] == 0)
        ].index
    )
    oge_egrid_plant_comparison.loc[oge_egrid_plant_comparison.index.isin(plant_ids), f"{col}_pct_diff"] = 0


# add some other data 
oge_egrid_plant_comparison = oge_egrid_plant_comparison.merge(oge_plant[["plant_id_eia","ba_code","state","data_availability"]], how="left", left_index=True, right_on="plant_id_eia")

oge_egrid_plant_comparison

Unnamed: 0,net_generation_mwh_pct_diff,fuel_consumed_mmbtu_pct_diff,co2_mass_lb_pct_diff,net_generation_mwh_oge,fuel_consumed_mmbtu_oge,co2_mass_lb_oge,net_generation_mwh_egrid,fuel_consumed_mmbtu_egrid,co2_mass_lb_egrid,plant_id_eia,ba_code,state,data_availability
0,0.0000,0.0000,-0.0000,2863.1,36674.0,5.796227e+06,2863.0,36674.0,5.796432e+06,1,AKMS,AK,eia_only
1,-0.0000,0.0000,0.0000,215445.9,1905189.0,0.000000e+00,215446.0,1905189.0,0.000000e+00,2,SOCO,AL,eia_only
2,0.0000,0.0000,0.0000,11792286.2,97913749.0,1.515480e+10,11792286.0,97913749.0,1.515480e+10,3,SOCO,AL,cems_and_eia
3,-0.0000,-0.0000,0.0000,392133.9,3467638.8,0.000000e+00,392134.0,3467639.0,0.000000e+00,4,SOCO,AL,eia_only
4,-0.0000,,,-5901.0,0.0,0.000000e+00,-5901.0,,,7,SOCO,AL,eia_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...
11047,-0.0000,0.0000,0.0000,2539.9,22463.0,0.000000e+00,2540.0,22463.0,0.000000e+00,65731,MISO,IL,eia_only
11048,0.0000,0.0000,0.0000,2582.0,22833.0,0.000000e+00,2582.0,22833.0,0.000000e+00,65732,MISO,IL,eia_only
11049,,,,0.0,0.0,0.000000e+00,,,,65740,CISO,CA,eia_only
11050,0.0004,0.0001,0.0001,469.2,5201.5,8.495087e+05,469.0,5201.0,8.494580e+05,65767,AKMS,AK,eia_only


In [31]:
oge_plant

Unnamed: 0,plant_id_eia,net_generation_mwh,fuel_consumed_mmbtu,fuel_consumed_for_electricity_mmbtu,co2_mass_lb,ch4_mass_lb,n2o_mass_lb,co2e_mass_lb,nox_mass_lb,so2_mass_lb,co2_mass_lb_for_electricity,ch4_mass_lb_for_electricity,n2o_mass_lb_for_electricity,co2e_mass_lb_for_electricity,nox_mass_lb_for_electricity,so2_mass_lb_for_electricity,co2_mass_lb_adjusted,ch4_mass_lb_adjusted,n2o_mass_lb_adjusted,co2e_mass_lb_adjusted,nox_mass_lb_adjusted,so2_mass_lb_adjusted,co2_mass_lb_for_electricity_adjusted,ch4_mass_lb_for_electricity_adjusted,n2o_mass_lb_for_electricity_adjusted,co2e_mass_lb_for_electricity_adjusted,nox_mass_lb_for_electricity_adjusted,so2_mass_lb_for_electricity_adjusted,plant_primary_fuel,data_availability,ba_code,ba_code_physical,state,distribution_flag,fuel_category,fuel_category_eia930,timezone,shaped_plant_id,plant_id_egrid
0,1,2863.10,36674.00,36674.00,5.796227e+06,234.60,47.20,5.815562e+06,113161.10,10995.80,5.796227e+06,234.60,47.20,5.815562e+06,113161.10,10995.80,5.796227e+06,234.60,47.20,5.815562e+06,113161.10,10995.80,5.796227e+06,234.60,47.20,5.815562e+06,113161.10,10995.80,DFO,eia_only,AKMS,AKMS,AK,True,petroleum,petroleum,America/Anchorage,990208.0,1
1,2,215445.90,1905189.00,1905189.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,WAT,eia_only,SOCO,SOCO,AL,False,hydro,hydro,America/Chicago,917904.0,2
2,3,11792286.22,97913748.98,97913748.98,1.515480e+10,1113588.86,156369.64,1.522856e+10,4731550.62,7763341.23,1.515480e+10,1113588.86,156369.64,1.522856e+10,4731550.62,7763341.23,1.515480e+10,1113588.86,156369.64,1.522856e+10,4731550.62,7763341.23,1.515480e+10,1113588.86,156369.64,1.522856e+10,4731550.62,7763341.23,NG,cems_and_eia,SOCO,SOCO,AL,False,natural_gas,natural_gas,America/Chicago,,3
3,4,392133.90,3467638.80,3467638.80,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,WAT,eia_only,SOCO,SOCO,AL,False,hydro,hydro,America/Chicago,917904.0,4
4,7,-5901.00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,NG,eia_only,SOCO,SOCO,AL,False,natural_gas,natural_gas,America/Chicago,917905.0,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11047,65731,2539.90,22463.00,22463.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,SUN,eia_only,MISO,MISO,IL,True,solar,solar,America/Chicago,912909.0,65731
11048,65732,2582.00,22833.00,22833.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,SUN,eia_only,MISO,MISO,IL,True,solar,solar,America/Chicago,912909.0,65732
11049,65740,0.00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,0.000000e+00,0.00,0.00,SUN,eia_only,CISO,CISO,CA,True,solar,solar,America/Los_Angeles,903909.0,65740
11050,65767,469.20,5201.50,5201.50,8.495087e+05,34.30,7.10,8.523426e+05,16665.40,1755.60,8.495087e+05,34.30,7.10,8.523426e+05,16665.40,1755.60,8.495087e+05,34.30,7.10,8.523426e+05,16665.40,1755.60,8.495087e+05,34.30,7.10,8.523426e+05,16665.40,1755.60,DFO,eia_only,AKMS,AKMS,AK,True,petroleum,petroleum,America/Juneau,990208.0,65767


In [None]:
# start here, integrating the new oge_egrid_plant_comparison dataframe

plant_cause = oge_egrid_plant_comparison[oge_egrid_plant_comparison["ba_code"].isin(ba_comparison.index)]
plant_cause = plant_cause.loc[
    (
        ~plant_cause[
            [
                "net_generation_mwh_status",
                "fuel_consumed_mmbtu_status",
                "co2_mass_lb_status",
            ]
        ].isin(["!exact", "+/-1%", "egrid_missing_zero_value_from_oge"])
    ).any(axis=1),
    [
        "plant_name_eia",
        "ba_code",
        "state",
        "net_generation_mwh_status",
        "net_generation_mwh_oge",
        "net_generation_mwh_egrid",
        "fuel_consumed_mmbtu_status",
        "fuel_consumed_mmbtu_oge",
        "fuel_consumed_mmbtu_egrid",
        "co2_mass_lb_status",
        "co2_mass_lb_oge",
        "co2_mass_lb_egrid",
    ],
]

eia_gf = load_data.load_pudl_table("generation_fuel_eia923", year=year)
eia_esc = eia_gf[eia_gf["fuel_consumed_mmbtu"] > 0].groupby("plant_id_eia")[["energy_source_code"]].agg(["unique"]).droplevel(level=1, axis=1).add_suffix("_eia923")
eia_gf = eia_gf.groupby("plant_id_eia")[["net_generation_mwh","fuel_consumed_mmbtu"]].sum().add_suffix("_eia923")


egrid_unit_fuel = pd.read_excel(
    downloads_folder(f"egrid/egrid{year}_data.xlsx"),
    sheet_name=f"UNT{str(year)[-2:]}",
    header=1,
    usecols=["ORISPL","FUELU1"],
).rename(columns={"ORISPL":"plant_id_egrid","FUELU1":"energy_source_code_egrid"})
egrid_unit_fuel = egrid_unit_fuel.groupby("plant_id_egrid")[["energy_source_code_egrid"]].agg(["unique"]).droplevel(level=1, axis=1)

plant_cause = plant_cause.merge(eia_gf, how="left", left_index=True, right_index=True)
plant_cause = plant_cause.merge(eia_esc, how="left", left_index=True, right_index=True)
plant_cause = plant_cause.merge(egrid_unit_fuel, how="left", left_index=True, right_index=True)
plant_cause = plant_cause.merge(egrid_plant[["plant_id_egrid", "fuel_data_source_annual","fuel_data_source_ozone"]].set_index("plant_id_egrid"), how="left", left_index=True, right_index=True)

# add oge data source
plant_metadata = pd.read_csv(results_folder(f"{year}/plant_data/plant_metadata.csv"))
plant_metadata = plant_metadata.groupby("plant_id_eia")[["data_source"]].agg(["unique"]).droplevel(level=1, axis=1).add_suffix("_oge")
plant_metadata['data_source_oge'] = [','.join(map(str, l)) for l in plant_metadata['data_source_oge']]

plant_cause = plant_cause.merge(plant_metadata, how="left", left_index=True, right_index=True)

plant_cause  = plant_cause[[
        "plant_name_eia",
        "ba_code",
        "state",
        "net_generation_mwh_status",
        "net_generation_mwh_oge",
        "net_generation_mwh_egrid",
        "net_generation_mwh_eia923",
        "fuel_consumed_mmbtu_status",
        "fuel_consumed_mmbtu_oge",
        "fuel_consumed_mmbtu_egrid",
        "fuel_consumed_mmbtu_eia923",
        "data_source_oge",
        "fuel_data_source_annual",
        "fuel_data_source_ozone",
        "co2_mass_lb_status",
        "co2_mass_lb_oge",
        "co2_mass_lb_egrid",
        "energy_source_code_eia923",
        "energy_source_code_egrid"
    ]]
