This notebook is set up to test code as needed.

In [1]:
# import packages
import pandas as pd
import os

%reload_ext autoreload
%autoreload 2

# # Tell python where to look for modules.
import sys
sys.path.append('../../../open-grid-emissions/src/')

import download_data
import load_data
from column_checks import get_dtypes
from filepaths import *
import impute_hourly_profiles
import data_cleaning
import output_data
import emissions
import validation
import gross_to_net_generation
import eia930

year = 2020
path_prefix = f"{year}/"

In [None]:
# 3. Clean EIA-923 Generation and Fuel Data at the Monthly Level
####################################################################################
print("3. Cleaning EIA-923 data")
eia923_allocated, primary_fuel_table = data_cleaning.clean_eia923(year, False)
# Add primary fuel data to each generator
eia923_allocated = eia923_allocated.merge(
    primary_fuel_table,
    how="left",
    on=["plant_id_eia", "generator_id"],
    validate="m:1",
)

# 4. Clean Hourly Data from CEMS
####################################################################################
print("4. Cleaning CEMS data")
cems = data_cleaning.clean_cems(year, False, primary_fuel_table)

In [None]:
report_months = cems[["plant_id_eia","unitid","report_date"]].drop_duplicates()
report_months["plant_unit"] = report_months["plant_id_eia"].astype(str) + "_" + report_months["unitid"].astype(str)
report_months["data_reported"] = 1
report_months = report_months.pivot_table(index="plant_unit", columns="report_date", values="data_reported")
report_months = report_months.sort_values(by=list(report_months.columns))
report_months["oz_total"] = report_months[["2020-05-01 00:00:00", "2020-06-01 00:00:00", "2020-07-01 00:00:00", "2020-08-01 00:00:00", "2020-09-01 00:00:00"]].sum(axis=1)
report_months["total"] = report_months.sum(axis=1) - report_months["oz_total"]
report_months["type"] = 0
report_months.loc[(report_months["total"] == 5) & (report_months["oz_total"] == 5) ,"type"] = 1
report_months.loc[(report_months["total"] == 12) ,"type"] = 2
report_months = report_months.sort_values(by=["type","total","oz_total"])
report_months = report_months.drop(columns=["type","total","oz_total"])
report_months

In [None]:
import plotly.express as px

In [None]:
px.imshow(report_months, height=800)

In [None]:
clean_fuels = ["SUN", "MWH", "WND", "WAT", "WH", "PUR", "NUC"]
primary_fuel_table.loc[(primary_fuel_table["plant_primary_fuel"].isin(clean_fuels)) & (~primary_fuel_table["energy_source_code"].isin(clean_fuels)), ["plant_id_eia","energy_source_code","plant_primary_fuel"]].drop_duplicates()

In [None]:
cems = cems.merge(primary_fuel_table[["plant_id_eia","plant_primary_fuel"]].drop_duplicates(), how="left", on="plant_id_eia")

In [None]:
cems.loc[cems["plant_primary_fuel"] == "NUC",["plant_id_eia","unitid"]].drop_duplicates()

In [None]:
pudl_out = load_data.initialize_pudl_out(2020)

In [None]:
gens = pudl_out.gens_eia860()

In [None]:
gens

In [None]:
resolution = "hourly"

all_data = []
for ba in os.listdir(results_folder(f"2020/power_sector_data/{resolution}/us_units")):
    df = pd.read_csv(results_folder(f"2020/power_sector_data/{resolution}/us_units/{ba}"))
    df["ba_code"] = ba.split(".")[0]
    all_data.append(df)

all_data = pd.concat(all_data, axis=0)

In [2]:
hourly_profiles = pd.read_csv(f"{outputs_folder()}{path_prefix}/hourly_profiles_{year}.csv")

  hourly_profiles = pd.read_csv(f"{outputs_folder()}{path_prefix}/hourly_profiles_{year}.csv")


In [None]:
resolution = "hourly"
pjm = pd.read_csv(results_folder(f"2020/power_sector_data/{resolution}/us_units/PJM.csv"))

In [None]:
pjm_total = pjm[pjm["fuel_category"] == "total"].set_index("datetime_local")
pjm_total = pjm_total.filter(like="generated_co2_")
pjm_total

In [None]:
import plotly.express as px

In [None]:
px.line(pjm_total)

In [None]:
pjm_fuel = pjm[pjm["fuel_category"] != "total"].set_index("datetime_local")
pjm_fuel = pjm_fuel[["fuel_category","generated_co2_rate_lb_per_mwh_for_electricity"]]
pjm_fuel

In [None]:
px.line(pjm_fuel, color="fuel_category")

In [None]:
pjm_gen = pjm[pjm["fuel_category"] != "total"].set_index("datetime_local")
pjm_gen = pjm_gen[["fuel_category","net_generation_mwh"]]
px.line(pjm_gen, color="fuel_category")

In [None]:
pjm

In [4]:
import plotly.express as px

In [None]:
px.line(pjm, y="generated_co2_rate_lb_per_mwh_for_electricity", x="datetime_local", color="fuel_category")

In [None]:
data

In [None]:
ba = "PJM"
fuel_category = "nuclear"
data = hourly_profiles[
    (hourly_profiles["ba_code"] == ba)
    & (hourly_profiles["fuel_category"] == fuel_category)
]
px.line(data, x="datetime_local", y=["eia930_profile","cems_profile","residual_profile","scaled_residual_profile","shifted_residual_profile"])

In [5]:
ba = "BANC"
fuel_category = "natural_gas"
data = hourly_profiles[
    (hourly_profiles["ba_code"] == ba)
    & (hourly_profiles["fuel_category"] == fuel_category)
]
px.line(
    data.rename(columns={"eia930_profile":"EIA-930 Net Generation", "cems_profile":"CEMS Net Generation", "residual_profile":"Residual Net Generation"}),
    x="datetime_local",
    y=["EIA-930 Net Generation", "CEMS Net Generation", "Residual Net Generation"],
    width=1000,
    title="Hourly net generation for natural gas generators in BANC",
    labels={"value": "MW", "variable": "Profile",},
)


In [11]:
ba = "BANC"
fuel_category = "natural_gas"
data = hourly_profiles[
    (hourly_profiles["ba_code"] == ba)
    & (hourly_profiles["fuel_category"] == fuel_category)
]
px.line(
    data.rename(columns={"eia930_profile":"EIA-930 Reported", "cems_profile":"CEMS Reported", "residual_profile":"Residual"}),
    x="datetime_local",
    y=["EIA-930 Reported", "CEMS Reported", "Residual"],
    width=1200,
    height=600,
    color_discrete_sequence=['#636EFA', '#00CC96', '#EF553B'],
    title="Calculating the hourly profile for non-CEMS gas generators in BANC",
    labels={"value": "MW", "variable": "Net Generation",},
).update_layout(legend=dict(
    orientation="h",
    yanchor="top",
    y=1.1,
    xanchor="left",
    x=0
)).update_yaxes(rangemode="tozero")


In [None]:
# load gridemissions data
clean_930_file = downloads_folder("eia930/chalendar/EBA_elec.csv")

eia930_data = eia930.load_chalendar_for_pipeline(clean_930_file, year=year)
eia930_data

In [None]:
eia930_data.loc[eia930_data["report_date"].dt.year == 2020, "net_generation_mwh_930"].sum()

In [None]:
# load co2 data
ge_co2 = pd.read_csv(data_folder("reference/EBA_co2.csv"))

In [None]:
ba_ref = pd.read_csv(manual_folder("ba_reference.csv"))
foreign_bas = list(ba_ref.loc[ba_ref["us_ba"] == "No", "ba_code"])

In [None]:
# only keep generation-related emissions
ge_co2 = ge_co2.filter(like="_NG")
#remove columns for foreign BAs