In [1]:
# Tell python where to look for modules. 
# Depending on how your jupyter handles working directories, this may not be needed.
import sys
sys.path.append('../../hourly-egrid/')

# import the necessary packages
%reload_ext autoreload
%autoreload 2

# import packages
import os
import requests
import tarfile
import sqlalchemy as sa
from pathlib import Path
import pandas as pd
import plotly.express as px
import numpy as np

import src.data_cleaning as data_cleaning
import src.load_data as load_data

  from pandas import Int64Index as NumericIndex


In [None]:
# add code to download eia data

In [2]:
year = 2020

In [6]:
# load plant level data from egrid
egrid_plant = pd.read_excel(f'../data/egrid/egrid{year}_data.xlsx', 
                            sheet_name=f'PLNT{str(year)[-2:]}', 
                            header=1, 
                            usecols=['BACODE','PSTATABB', 'ORISPL', 'PNAME','PLGENATN', 'PLGENATR', 'PLHTIANT','UNCO2','PLCO2AN'])
# calculate total net generation
egrid_plant['net_generation_mwh'] = egrid_plant['PLGENATN'] + egrid_plant['PLGENATR']
egrid_plant = egrid_plant.drop(columns=['PLGENATN', 'PLGENATR'])
# rename the columns
egrid_plant = egrid_plant.rename(columns={'BACODE':'ba_code',
                                          'PSTATABB':'state',
                                          'ORISPL':'plant_id_eia',
                                          'PNAME':'plant_name',
                                          'PLHTIANT':'heat_input_mmbtu',
                                          'UNCO2':'co2_mass_tons_unadjusted',
                                          'PLCO2AN':'co2_mass_tons'})
# reorder the columns                                         
egrid_plant = egrid_plant[['ba_code', 'state', 'plant_id_eia', 'plant_name', 'net_generation_mwh', 'heat_input_mmbtu', 'co2_mass_tons', 'co2_mass_tons_unadjusted']]

In [4]:
egrid_plant

Unnamed: 0,ba_code,state,plant_id_eia,plant_name,net_generation_mwh,heat_input_mmbtu,co2_mass_tons,co2_mass_tons_unadjusted
0,,AK,60814,7-Mile Ridge Wind Project,,,,
1,,AK,54452,Agrium Kenai Nitrogen Operations,,,,
2,,AK,57053,Alakanuk,0.000,,,
3,,AK,58982,Allison Creek Hydro,20808.000,1.825500e+05,,
4,,AK,60243,Ambler,1212.000,1.155500e+04,943.616,943.616
...,...,...,...,...,...,...,...,...
12663,WACM,WY,57967,Western Sugar Coop - Torrington,,,,
12664,WACM,WY,55479,Wygen I,699013.998,8.761069e+06,918861.688,918861.688
12665,WACM,WY,56319,Wygen II,716788.996,8.916959e+06,935205.875,935205.875
12666,WACM,WY,56596,Wygen III,843013.000,9.668216e+06,1014000.875,1014000.875


In [3]:
eia_emissions = pd.read_excel(f'../data/eia/emissions{year}.xlsx', 
                                sheet_name='CO2', 
                                header=1, 
                                skipfooter=2,
                                usecols=['Plant Code','Plant Name','State', 'Generation (kWh)','Total Fuel Consumption (MMBtu)', 'Tons of CO2 Emissions','Balancing Authority Code'])

eia_emissions = eia_emissions.rename(columns={'Plant Code':'plant_id_eia',
                                              'Plant Name':'plant_name',
                                              'State':'state', 
                                              'Generation (kWh)':'net_generation_mwh',
                                              'Total Fuel Consumption (MMBtu)':'heat_input_mmbtu', 
                                              'Tons of CO2 Emissions':'co2_mass_tons_unadjusted',
                                              'Balancing Authority Code':'ba_code'})

eia_emissions['net_generation_mwh'] = eia_emissions['net_generation_mwh'] / 1000

eia_emissions = eia_emissions.groupby(['ba_code','state','plant_id_eia','plant_name'], dropna=False).sum().reset_index()

eia_emissions

Unnamed: 0,ba_code,state,plant_id_eia,plant_name,net_generation_mwh,heat_input_mmbtu,co2_mass_tons_unadjusted
0,AEC,AL,56,Lowman Energy Center,317291.000,4163218,4.268872e+05
1,AEC,AL,533,McWilliams,4158113.000,30970570,1.806303e+06
2,AEC,AL,7063,McIntosh (AL),369974.000,4200371,2.449897e+05
3,AECI,AR,55340,Dell Power Station,2774720.000,20367675,1.187908e+06
4,AECI,IA,1127,Bloomfield,56.000,645,5.271279e+01
...,...,...,...,...,...,...,...
3165,,HI,56055,Hana Substation,124.000,1395,1.140067e+02
3166,,HI,56258,Kapaia Power Station,16868.596,151313,1.236609e+04
3167,,HI,56329,Campbell Industrial Park,83712.000,1410821,1.152997e+05
3168,,HI,59035,"Biomass to Energy Facility, Kauai",140.140,1860,1.520090e+02


In [7]:
plants_not_in_eia = list(set(egrid_plant['plant_id_eia'].unique()) - set(eia_emissions['plant_id_eia'].unique()))
plants_not_in_egrid = list(set(eia_emissions['plant_id_eia'].unique()) - set(egrid_plant['plant_id_eia'].unique()))

In [42]:
egrid_plant[(egrid_plant['plant_id_eia'].isin(plants_not_in_eia)) & (egrid_plant['co2_mass_tons_unadjusted'] > 0)]

Unnamed: 0,ba_code,state,plant_id_eia,plant_name,net_generation_mwh,heat_input_mmbtu,co2_mass_tons,co2_mass_tons_unadjusted
177,TVA,AL,58061,Decatur-Morgan Co LFG Recovery Project,12862.00,137357.000,0.000,8722.140
223,TVA,AL,59710,Sand Valley Power Station,24443.00,360361.008,0.001,22882.849
235,SOCO,AL,57467,Westervelt Moundville Cogen,32244.00,185500.529,0.004,135928.422
303,MISO,AR,56079,Riceland Foods Cogeneration Plant,12874.00,78686.454,0.001,187364.062
311,MISO,AR,56523,Two Pine Landfill Gas Recovery,28810.00,368965.992,0.000,23429.262
...,...,...,...,...,...,...,...,...
12503,MISO,WI,56525,Timberline Trail Gas Recovery,19434.00,244885.000,0.000,15550.145
12512,MISO,WI,58747,Viresco Turtle Lake,9209.00,46572.190,0.000,7621.118
12530,MISO,WI,50936,Winnebago County Landfill Gas,17073.00,114884.018,0.000,12690.751
12577,PJM,WV,63246,New River Clean Energy,6457.00,29111.000,0.000,1848.542


In [10]:
eia_emissions[(eia_emissions['plant_id_eia'].isin(plants_not_in_egrid))]

Unnamed: 0,ba_code,state,plant_id_eia,plant_name,net_generation_mwh,heat_input_mmbtu,co2_mass_tons_unadjusted
232,CISO,CA,55874,Panoche Peaker,3826.0,58814,3430.222
299,CISO,CA,57901,El Segundo Energy Center LLC,395756.0,3558364,207535.3
328,CISO,CA,59002,Carlsbad Energy Center,388042.0,4424921,258075.7
402,CPLE,NC,7538,Wayne County Energy Complex,175398.0,2067503,122120.4
418,CPLE,NC,58697,L V Sutton Combined Cycle,4078353.0,29520555,1721822.0
474,DUK,NC,58215,Lee Combined Cycle Plant,5654012.0,42170811,2459538.0
545,ERCO,TX,7512,Arthur Von Rosenberg,3187965.0,22400959,1306496.0
614,ERCO,TX,55545,Hidalgo Energy Center,2529351.0,19000365,1108163.0
770,ISNE,CT,57068,GenConn Middletown LLC,693.0,43009,3339.597
1191,MISO,IN,10397,ArcelorMittal Indiana Harbor West,90853.631,3701661,215892.8


In [19]:
percent_diff = eia_emissions[['plant_id_eia','co2_mass_tons_unadjusted']].merge(egrid_plant[['plant_id_eia','co2_mass_tons_unadjusted']], how='outer', on='plant_id_eia', suffixes=('_eia','_egrid')).fillna(0)
percent_diff['percent_diff'] = (percent_diff['co2_mass_tons_unadjusted_eia'] - percent_diff['co2_mass_tons_unadjusted_egrid']) / percent_diff['co2_mass_tons_unadjusted_egrid']
percent_diff = percent_diff.replace(np.inf,1)
percent_diff.loc[percent_diff['percent_diff'] > 1, 'percent_diff'] = 1
percent_diff

Unnamed: 0,plant_id_eia,co2_mass_tons_unadjusted_eia,co2_mass_tons_unadjusted_egrid,percent_diff
0,56,4.268872e+05,414894.312,0.028906
1,533,1.806303e+06,1776162.656,0.016970
2,7063,2.449897e+05,237737.302,0.030506
3,55340,1.187908e+06,1275819.438,-0.068906
4,1127,5.271279e+01,38.753,0.360225
...,...,...,...,...
12687,62516,0.000000e+00,0.000,
12688,57327,0.000000e+00,0.000,
12689,56093,0.000000e+00,0.000,
12690,63972,0.000000e+00,0.000,


In [32]:
px.histogram(percent_diff, x='percent_diff', nbins=40, range_x=[-1,1], width=800, title='Number of plants by percent difference between EIA and eGRID').update_xaxes(dtick=0.1, tick0=0.05).add_vline(x=0)

In [34]:
eia_emissions.groupby('ba_code').sum()['co2_mass_tons_unadjusted']

ba_code
AEC     2.478180e+06
AECI    1.786504e+07
AVA     1.151475e+06
AVRN    1.183976e+06
AZPS    1.469719e+07
BANC    2.703297e+06
BPAT    1.096445e+07
CISO    3.974728e+07
CPLE    1.599850e+07
CSTO    1.657885e+06
DEAA    9.098123e+05
DUK     2.924707e+07
EEI     4.513089e+06
EPE     2.866484e+06
ERCO    1.829199e+08
FMPP    9.040570e+06
FPC     2.194937e+07
FPL     4.157300e+07
GRIF    1.003217e+06
GVL     1.061437e+06
HST     4.421561e+02
IID     5.975766e+05
IPCO    1.414436e+06
ISNE    2.749965e+07
JEA     7.404762e+06
LDWP    1.122695e+07
LGEE    3.083305e+07
MISO    3.695482e+08
NBSO    1.244135e+05
NEVP    1.436806e+07
NSB     5.884219e+00
NWMT    1.111678e+07
NYIS    3.263924e+07
OVEC    1.023809e+07
PACE    4.728161e+07
PACW    1.051130e+06
PGE     5.307620e+06
PJM     3.087753e+08
PNM     7.802419e+06
PSCO    1.520822e+07
PSEI    3.126455e+06
SC      1.213508e+07
SCEG    1.018206e+07
SCL     9.078981e+04
SEC     8.700669e+06
SOCO    1.044267e+08
SPA     1.818873e+06
SRP  

In [38]:
percent_diff_ba = eia_emissions.groupby('ba_code', dropna=False).sum()['co2_mass_tons_unadjusted'].reset_index().merge(egrid_plant.groupby('ba_code', dropna=False).sum()['co2_mass_tons_unadjusted'].reset_index(), how='outer', on='ba_code', suffixes=('_eia','_egrid'))
percent_diff_ba = percent_diff_ba.set_index('ba_code')
percent_diff_ba = percent_diff_ba.fillna(0)
percent_diff_ba['percent_diff'] = (percent_diff_ba['co2_mass_tons_unadjusted_eia'] - percent_diff_ba['co2_mass_tons_unadjusted_egrid']) / percent_diff_ba['co2_mass_tons_unadjusted_egrid']
percent_diff_ba = percent_diff_ba.replace(np.inf,1)
percent_diff_ba.loc[percent_diff_ba['percent_diff'] > 1, 'percent_diff'] = 1
percent_diff_ba

Unnamed: 0_level_0,co2_mass_tons_unadjusted_eia,co2_mass_tons_unadjusted_egrid,percent_diff
ba_code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AEC,2.478180e+06,2.459531e+06,0.007582
AECI,1.786504e+07,1.817683e+07,-0.017153
AVA,1.151475e+06,3.418281e+06,-0.663142
AVRN,1.183976e+06,1.247514e+06,-0.050932
AZPS,1.469719e+07,1.484023e+07,-0.009639
...,...,...,...
HGMA,0.000000e+00,0.000000e+00,
SEPA,0.000000e+00,4.411831e+04,-1.000000
WAUW,0.000000e+00,0.000000e+00,
WWA,0.000000e+00,0.000000e+00,


In [39]:
px.histogram(percent_diff_ba, x='percent_diff', nbins=40, range_x=[-1,1], width=800, title='Number of BAs by percent difference between EIA and eGRID').update_xaxes(dtick=0.1, tick0=0.05).add_vline(x=0)