# Validate EIA-930 data against net generation outputs

In [30]:
import pandas as pd
import numpy as np
import os

import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [22]:
year = 2020

In [23]:
# EIA-930 data after timestamp adjustments but no cleaning
raw = pd.read_csv(f"../data/outputs/{year}/eia930/eia930_raw.csv", index_col=0, parse_dates=True)

In [14]:
GEN_ID = "EBA.{}-ALL.NG.H"
path = f"../data/results/{year}/power_sector_data/hourly/us_units/"
cors = {}
percent_difs = {}
annual_gen = {}
for ba_f in os.listdir(path):
    ba = ba_f.replace(".csv", "")
    print(ba, end="...")
    col_name = GEN_ID.format(ba)
    if col_name not in raw.columns: 
        continue
    else:
        dat = pd.read_csv(path+ba_f, parse_dates=["datetime_utc"])
        dat = dat[dat.fuel_category=="total"]
        dat = dat.merge(raw[ col_name], left_on="datetime_utc", right_index=True)
        c = dat[["net_generation_mwh", col_name]].corr().to_numpy()[0,1]
        cors[ba] = c
        difs = (dat[col_name]-dat["net_generation_mwh"])/dat["net_generation_mwh"]
        difs = difs.replace(np.inf, np.nan)
        percent_difs[ba] = difs.median()
        annual_gen[ba] = dat["net_generation_mwh"].sum()

OHMS...FMPP...ERCO...TPWR...AMPL...SOCO...IPCO...PJM...WWA...EEI...AZPS...TEC...DOPD...YAD...IID...HGMA...DEAA...CPLW...SPA...GVL...ORMS...FPL...TAL...SEC...JEA...GCPD...TVA...TIDC...HECO...SCEG...INMS...CPLE...SEPA...AKMS...CHPD...PNM...WAUW...WACM...WALC...NWMT...PACE...AVA...SC...NSB...GWA...HIMS...LGEE...TEPC...ISNE...SRP...GRIS...HST...LDWP...RIMS...FPC...PACW...GRIF...PSEI...AECI...CSTO...DUK...AVRN...CEA...MISO...AEC...PSCO...OVEC...SWPP...BANC...NYIS...EPE...NBSO...SCL...BPAT...NEVP...CISO...PGE...

In [5]:
out = pd.DataFrame(data={"Difference as percent of hourly-egrid":percent_difs, "Correlation":cors, "Annual BA generation":annual_gen})
out = out.sort_values("Annual BA generation", ascending=False)
out.to_csv(f"../data/results/{year}/validation_metrics/us_units/compare_930_hourlyegrid.csv")

NameError: name 'percent_difs' is not defined

# Visualize BA of interest

In [9]:
ba = "SWPP"
col_name = GEN_ID.format(ba)
dat = pd.read_csv(path+ba+".csv", parse_dates=["datetime_utc"])
dat = dat[dat.fuel_category=="total"]
dat = dat.merge(raw[ col_name], left_on="datetime_utc", right_index=True)

px.line(dat, x="datetime_utc", y=["net_generation_mwh", col_name])

In [23]:
residual_profiles = pd.read_csv(f"../data/outputs/{year}/residual_profiles_{year}.csv", parse_dates=["datetime_utc"])

In [24]:
toplot = residual_profiles[(residual_profiles.ba_code==ba) & (residual_profiles.fuel_category_eia930 =="natural_gas")]
px.line(toplot,x="datetime_utc", y="profile")

# Scrape Singularity emission and rate data 

* Drawbacks: 
    * won't have timestamp fix 
    * slow the first time (cache locally)

* Benefits: 
    * Don't have to re-do emission logic 

In [28]:
rate = pd.read_csv("../data/results/2020/power_sector_data/hourly/us_units/DEAA.csv", parse_dates=["datetime_utc"])
rate = rate[rate.fuel_category=="total"]

In [31]:
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=rate.datetime_utc, y=rate.generated_co2_rate_lb_per_mwh_for_electricity, name="Rate"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=rate.datetime_utc, y=rate.net_generation_mwh, name="generation"),
    secondary_y=True,
)

fig.show()

In [32]:
cems = pd.read_csv("../data/results/2020/plant_data/hourly/us_units/CEMS_plant_data.csv", parse_dates=["datetime_utc"])
cems_orig = pd.read_csv("../data/outputs/2020/cems_2020.csv", parse_dates = ["datetime_utc"])

In [86]:
# 51 = a coal plant 
# 55282 = spiky NG plant 
plant_num = 55282
plant_of_interest = cems[cems.plant_id_eia==plant_num].copy()

In [87]:
plant_of_interest["heat_rate_mmbtu_mwh"] = plant_of_interest.fuel_consumed_for_electricity_mmbtu/plant_of_interest.net_generation_mwh
plant_of_interest["emission_rate_co2_mmbtu"] = plant_of_interest.co2_mass_lb_for_electricity/plant_of_interest.fuel_consumed_for_electricity_mmbtu

In [88]:
px.line(plant_of_interest, x="datetime_utc", y="emission_rate_co2_mmbtu")

In [89]:
plant_of_interest.head()

Unnamed: 0,plant_id_eia,datetime_utc,report_date,net_generation_mwh,fuel_consumed_mmbtu,fuel_consumed_for_electricity_mmbtu,co2_mass_lb,ch4_mass_lb,n2o_mass_lb,co2e_mass_lb,...,nox_mass_lb_adjusted,so2_mass_lb_adjusted,co2_mass_lb_for_electricity_adjusted,ch4_mass_lb_for_electricity_adjusted,n2o_mass_lb_for_electricity_adjusted,co2e_mass_lb_for_electricity_adjusted,nox_mass_lb_for_electricity_adjusted,so2_mass_lb_for_electricity_adjusted,heat_rate_mmbtu_mwh,emission_rate_co2_mmbtu
7228250,55282,2020-01-01 07:00:00+00:00,2020-01-01,492.44,3479.2,3479.2,413600.0,7.65,0.77,414088.34,...,24.35,2.09,413600.0,7.65,0.77,414088.34,24.35,2.09,7.065226,118.877903
7228251,55282,2020-01-01 08:00:00+00:00,2020-01-01,493.41,3486.9,3486.9,414600.0,7.67,0.77,415089.42,...,24.41,2.09,414600.0,7.67,0.77,415089.42,24.41,2.09,7.066942,118.902177
7228252,55282,2020-01-01 09:00:00+00:00,2020-01-01,493.41,3484.6,3484.6,414200.0,7.67,0.77,414689.1,...,24.39,2.09,414200.0,7.67,0.77,414689.1,24.39,2.09,7.062281,118.865867
7228253,55282,2020-01-01 10:00:00+00:00,2020-01-01,494.37,3488.1,3488.1,414600.0,7.67,0.77,415089.59,...,24.42,2.09,414600.0,7.67,0.77,415089.59,24.42,2.09,7.055647,118.861271
7228254,55282,2020-01-01 11:00:00+00:00,2020-01-01,493.41,3486.8,3486.8,414400.0,7.67,0.77,414889.41,...,24.41,2.09,414400.0,7.67,0.77,414889.41,24.41,2.09,7.06674,118.848228


In [90]:
plant_of_interest_orig = cems_orig[cems_orig.plant_id_eia==plant_num]

In [91]:
# Create figure with secondary y-axis
from matplotlib.pyplot import title


fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=plant_of_interest.datetime_utc, y=plant_of_interest.co2_mass_lb, name="CO2 emissions"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=plant_of_interest_orig.datetime_utc, y=plant_of_interest_orig.gross_generation_mwh, name="gross generation"),
    secondary_y=True,
)

fig.update_layout(
    title=f"Plant {plant_num} generation vs. emissions"
)

fig.show()

In [53]:
plant_meta = pd.read_csv("../data/results/2020/plant_data/plant_static_attributes.csv")

In [59]:
plant_meta[plant_meta.plant_id_eia==55282]

Unnamed: 0.1,Unnamed: 0,plant_id_eia,plant_primary_fuel,data_availability,ba_code,ba_code_physical,state,distribution_flag,fuel_category,fuel_category_eia930,timezone
3843,3843,55282,NG,cems_and_eia,DEAA,SRP,AZ,False,natural_gas,natural_gas,America/Phoenix


In [78]:
cems_orig[(cems_orig.gross_generation_mwh <= 2) & (cems_orig.gross_generation_mwh > 0)]

Unnamed: 0,plant_id_eia,subplant_id,datetime_utc,report_date,gross_generation_mwh,steam_load_1000_lb,fuel_consumed_mmbtu,co2_mass_lb,ch4_mass_lb,n2o_mass_lb,...,n2o_mass_lb_for_electricity,n2o_mass_lb_for_electricity_adjusted,nox_mass_lb_for_electricity,nox_mass_lb_for_electricity_adjusted,so2_mass_lb_for_electricity,so2_mass_lb_for_electricity_adjusted,co2e_mass_lb,co2e_mass_lb_adjusted,co2e_mass_lb_for_electricity,co2e_mass_lb_for_electricity_adjusted
631,3,0.0,2020-01-27 13:00:00+00:00,2020-01-01,2.00,0.0,224.800003,26800.000000,0.494560,0.049456,...,0.049456,0.049456,8.801000,8.801000,0.13500,0.13500,26831.552928,26831.552928,26831.552928,26831.552928
2071,3,1.0,2020-01-27 13:00:00+00:00,2020-01-01,0.50,0.0,17.231251,2050.000000,0.037909,0.003791,...,0.003791,0.003791,0.674750,0.674750,0.01025,0.01025,2052.418578,2052.418578,2052.418578,2052.418578
8581,3,3.0,2020-03-24 18:00:00+00:00,2020-03-01,1.00,0.0,417.799988,85800.000000,10.131650,1.474834,...,1.474834,1.474834,48.000000,48.000000,16.60000,16.60000,86583.976609,86583.976609,86583.976609,86583.976609
9982,3,3.0,2020-05-22 03:00:00+00:00,2020-05-01,1.00,0.0,401.899994,82400.000000,9.746075,1.418707,...,1.418707,1.418707,38.599998,38.599998,16.00000,16.00000,83154.141225,83154.141225,83154.141225,83154.141225
19901,3,4.0,2020-07-08 10:00:00+00:00,2020-07-01,0.50,0.0,89.250000,10600.000000,0.196350,0.019635,...,0.019635,0.019635,10.900000,10.900000,0.05350,0.05350,10612.527130,10612.527130,10612.527130,10612.527130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21580076,61242,1.0,2020-08-12 21:00:00+00:00,2020-08-01,0.50,0.0,1.249000,148.000000,0.002748,0.000275,...,0.000275,0.000275,0.162000,0.162000,0.00070,0.00070,148.175310,148.175310,148.175310,148.175310
21580081,61242,1.0,2020-08-13 02:00:00+00:00,2020-08-01,0.84,0.0,2.023200,239.039993,0.004451,0.000445,...,0.000445,0.000445,0.185760,0.185760,0.00120,0.00120,239.323970,239.323970,239.323970,239.323970
21580194,61242,1.0,2020-08-17 19:00:00+00:00,2020-08-01,1.30,0.0,2.720900,324.480011,0.005986,0.000599,...,0.000599,0.000599,0.275470,0.275470,0.00169,0.00169,324.861917,324.861917,324.861917,324.861917
21580225,61242,1.0,2020-08-19 02:00:00+00:00,2020-08-01,1.82,0.0,3.096080,368.420013,0.006811,0.000681,...,0.000681,0.000681,0.365040,0.365040,0.00182,0.00182,368.854579,368.854579,368.854579,368.854579
