In [None]:
# Tell python where to look for modules. 
# Depending on how your jupyter handles working directories, this may not be needed.
import sys
sys.path.append('../../hourly-egrid/')

# import the necessary packages
%reload_ext autoreload
%autoreload 2

# import packages
import os
import requests
import tarfile
import sqlalchemy as sa
from pathlib import Path
import pandas as pd
import plotly.express as px
import numpy as np

import src.data_cleaning as data_cleaning
import src.load_data as load_data

In [None]:
# add code to download eia data

In [None]:
year = 2020

In [None]:
# load plant level data from egrid
egrid_plant = pd.read_excel(f'../data/egrid/egrid{year}_data.xlsx', 
                            sheet_name=f'PLNT{str(year)[-2:]}', 
                            header=1, 
                            usecols=['BACODE','PSTATABB', 'ORISPL', 'PNAME','PLGENATN', 'PLGENATR', 'PLHTIANT','UNCO2','PLCO2AN'])
# calculate total net generation
egrid_plant['net_generation_mwh'] = egrid_plant['PLGENATN'] + egrid_plant['PLGENATR']
egrid_plant = egrid_plant.drop(columns=['PLGENATN', 'PLGENATR'])
# rename the columns
egrid_plant = egrid_plant.rename(columns={'BACODE':'ba_code',
                                          'PSTATABB':'state',
                                          'ORISPL':'plant_id_eia',
                                          'PNAME':'plant_name',
                                          'PLHTIANT':'heat_input_mmbtu',
                                          'UNCO2':'co2_mass_tons_unadjusted',
                                          'PLCO2AN':'co2_mass_tons'})
# reorder the columns                                         
egrid_plant = egrid_plant[['ba_code', 'state', 'plant_id_eia', 'plant_name', 'net_generation_mwh', 'heat_input_mmbtu', 'co2_mass_tons', 'co2_mass_tons_unadjusted']]

In [None]:
egrid_plant

In [None]:
eia_emissions = pd.read_excel(f'../data/eia/emissions{year}.xlsx', 
                                sheet_name='CO2', 
                                header=1, 
                                skipfooter=2,
                                usecols=['Plant Code','Plant Name','State', 'Generation (kWh)','Total Fuel Consumption (MMBtu)', 'Tons of CO2 Emissions','Balancing Authority Code'])

eia_emissions = eia_emissions.rename(columns={'Plant Code':'plant_id_eia',
                                              'Plant Name':'plant_name',
                                              'State':'state', 
                                              'Generation (kWh)':'net_generation_mwh',
                                              'Total Fuel Consumption (MMBtu)':'heat_input_mmbtu', 
                                              'Tons of CO2 Emissions':'co2_mass_tons_unadjusted',
                                              'Balancing Authority Code':'ba_code'})

eia_emissions['net_generation_mwh'] = eia_emissions['net_generation_mwh'] / 1000

eia_emissions = eia_emissions.groupby(['ba_code','state','plant_id_eia','plant_name'], dropna=False).sum().reset_index()

eia_emissions

In [None]:
plants_not_in_eia = list(set(egrid_plant['plant_id_eia'].unique()) - set(eia_emissions['plant_id_eia'].unique()))
plants_not_in_egrid = list(set(eia_emissions['plant_id_eia'].unique()) - set(egrid_plant['plant_id_eia'].unique()))

In [None]:
egrid_plant[(egrid_plant['plant_id_eia'].isin(plants_not_in_eia)) & (egrid_plant['co2_mass_tons_unadjusted'] > 0)]

In [None]:
eia_emissions[(eia_emissions['plant_id_eia'].isin(plants_not_in_egrid))]

In [None]:
percent_diff = eia_emissions[['plant_id_eia','co2_mass_tons_unadjusted']].merge(egrid_plant[['plant_id_eia','co2_mass_tons_unadjusted']], how='outer', on='plant_id_eia', suffixes=('_eia','_egrid')).fillna(0)
percent_diff['percent_diff'] = (percent_diff['co2_mass_tons_unadjusted_eia'] - percent_diff['co2_mass_tons_unadjusted_egrid']) / percent_diff['co2_mass_tons_unadjusted_egrid']
percent_diff = percent_diff.replace(np.inf,1)
percent_diff.loc[percent_diff['percent_diff'] > 1, 'percent_diff'] = 1
percent_diff

In [None]:
px.histogram(percent_diff, x='percent_diff', nbins=40, range_x=[-1,1], width=800, title='Number of plants by percent difference between EIA and eGRID').update_xaxes(dtick=0.1, tick0=0.05).add_vline(x=0)

In [None]:
eia_emissions.groupby('ba_code').sum()['co2_mass_tons_unadjusted']

In [None]:
percent_diff_ba = eia_emissions.groupby('ba_code', dropna=False).sum()['co2_mass_tons_unadjusted'].reset_index().merge(egrid_plant.groupby('ba_code', dropna=False).sum()['co2_mass_tons_unadjusted'].reset_index(), how='outer', on='ba_code', suffixes=('_eia','_egrid'))
percent_diff_ba = percent_diff_ba.set_index('ba_code')
percent_diff_ba = percent_diff_ba.fillna(0)
percent_diff_ba['percent_diff'] = (percent_diff_ba['co2_mass_tons_unadjusted_eia'] - percent_diff_ba['co2_mass_tons_unadjusted_egrid']) / percent_diff_ba['co2_mass_tons_unadjusted_egrid']
percent_diff_ba = percent_diff_ba.replace(np.inf,1)
percent_diff_ba.loc[percent_diff_ba['percent_diff'] > 1, 'percent_diff'] = 1
percent_diff_ba

In [None]:
px.histogram(percent_diff_ba, x='percent_diff', nbins=40, range_x=[-1,1], width=800, title='Number of BAs by percent difference between EIA and eGRID').update_xaxes(dtick=0.1, tick0=0.05).add_vline(x=0)