In [6]:
import pandas as pd
import numpy as np
from decimal import Decimal
from pandas import ExcelWriter
from pandas import ExcelFile
import datetime
import matplotlib.pyplot as plt
import matplotlib.style as style

Annual production data is from United States Energy Information Administration (EIA.gov) form 923 (Annual production data)
download at:
https://www.eia.gov/electricity/data/eia923/

In [7]:
production_2019 = pd.read_excel("EIA923_Schedules_2_3_4_5_M_12_2019_21FEB2020.xlsx",skiprows=5)
production_2018 = pd.read_excel("EIA923_Schedules_2_3_4_5_M_12_2018_Final_Revision.xlsx",skiprows=5)
production_2017 = pd.read_excel("EIA923_Schedules_2_3_4_5_M_12_2017_Final_Revision.xlsx",skiprows=5)
production_2016 = pd.read_excel("EIA923_Schedules_2_3_4_5_M_12_2016_Final_Revision.xlsx",skiprows=5)
production_2012 = pd.read_excel("EIA923_Schedules_2_3_4_5_M_12_2012_Final_Revision.xlsx",skiprows=5)

In [8]:
production_2019.shape, production_2018.shape, production_2017.shape, production_2016.shape, production_2012.shape


((6384, 97), (13962, 97), (13379, 97), (12865, 97), (11004, 97))

In [9]:
prod_columns =['Plant Id','Plant Name', 'Operator Name','Plant State','Reported\nPrime Mover',\
       'Reported\nFuel Type Code', 'AER\nFuel Type Code','Netgen\nJanuary', 'Netgen\nFebruary', 'Netgen\nMarch', \
        'Netgen\nApril','Netgen\nMay', 'Netgen\nJune', 'Netgen\nJuly', 'Netgen\nAugust','Netgen\nSeptember', \
        'Netgen\nOctober', 'Netgen\nNovember','Netgen\nDecember','Net Generation\n(Megawatthours)','YEAR']

In [28]:
prod_2019_sum = production_2019[prod_columns].copy()
prod_2018_sum = production_2018[prod_columns].copy()
prod_2017_sum = production_2017[prod_columns].copy()
prod_2016_sum = production_2016[prod_columns].copy()
prod_2012_sum = production_2012[prod_columns].copy()

Let's filter the data down just to nuclear power sources.

In [29]:
nuc_2019_prod = prod_2019_sum[prod_2019_sum['Reported\nFuel Type Code'] == 'NUC']
nuc_2018_prod = prod_2018_sum[prod_2018_sum['Reported\nFuel Type Code'] == 'NUC']
nuc_2017_prod = prod_2017_sum[prod_2017_sum['Reported\nFuel Type Code'] == 'NUC']
nuc_2016_prod = prod_2016_sum[prod_2016_sum['Reported\nFuel Type Code'] == 'NUC']
nuc_2012_prod = prod_2012_sum[prod_2012_sum['Reported\nFuel Type Code'] == 'NUC']

In [30]:
prod_total_19 = nuc_2019_prod['Net Generation\n(Megawatthours)'].sum()
prod_total_18 = nuc_2018_prod['Net Generation\n(Megawatthours)'].sum()
prod_total_17 = nuc_2017_prod['Net Generation\n(Megawatthours)'].sum()
prod_total_16 = nuc_2016_prod['Net Generation\n(Megawatthours)'].sum()
prod_total_12 = nuc_2012_prod['Net Generation\n(Megawatthours)'].sum()

from decimal import Decimal
"2019: {:.2E} MW-hr".format(Decimal(str(prod_total_19))), \
"2018: {:.2E} MW-hr".format(Decimal(str(prod_total_18))), \
"2017: {:.2E} MW-hr".format(Decimal(str(prod_total_17))), \
"2016: {:.2E} MW-hr".format(Decimal(str(prod_total_16))), \
"2012: {:.2E} MW-hr".format(Decimal(str(prod_total_12))), \

('2019: 8.09E+8 MW-hr',
 '2018: 8.07E+8 MW-hr',
 '2017: 8.05E+8 MW-hr',
 '2016: 8.06E+8 MW-hr',
 '2012: 7.69E+8 MW-hr')

Electricity generation from nuclear has been very consistent over recent years.  Actually increasing slightly since 2012.

In [47]:
nuc_2019_prod.shape

(98, 21)

In [48]:
nuc_2012_prod.shape

(104, 21)

In [46]:
Plant_list_2019 = nuc_2019_prod['Plant Name'].values.tolist()
Plant_list_2012 = nuc_2012_prod['Plant Name'].values.tolist()

In [50]:
set(Plant_list_2019).symmetric_difference(set(Plant_list_2012))

{'Crystal River',
 'Fort Calhoun',
 'Kewaunee',
 'Oyster Creek',
 'San Onofre Nuclear Generating Station',
 'Vermont Yankee'}

As shown above, six plants have shutdown since 2012.  Two more plants actually shutdown during calendar 2019, Pilgrim and Three Mile Island (TMI).  But the overall electricity production from nuclear has still managed to increase during this period.  How was the industry able to increase output with less operating plants?

Let's look at plant nameplate capacity data available from the Energy Information Administation
https://www.eia.gov/electricity/data/eia860/ for several of these years.

In [56]:
nameplate_2018 = pd.read_excel("3_1_Generator_Y2018.xlsx", skiprows=1)
nameplate_2017 = pd.read_excel("3_1_Generator_Y2017.xlsx", skiprows=1)
nameplate_2012 = pd.read_excel("GeneratorY2012.xlsx", skiprows=1)

In [91]:
nuc_MW_18 = nameplate_2018[nameplate_2018['Energy Source 1'] == 'NUC'].copy()
nuc_MW_17 = nameplate_2017[nameplate_2017['Energy Source 1'] == 'NUC'].copy()
nuc_MW_12 = nameplate_2012[nameplate_2012['Energy Source 1'] == 'NUC'].copy()

In [93]:
name_total_18 = nuc_MW_18['Nameplate Capacity (MW)'].sum()
name_total_17 = nuc_MW_17['Nameplate Capacity (MW)'].sum()
name_total_12 = nuc_MW_12['Nameplate Capacity (MW)'].sum()

from decimal import Decimal
"2018: {:.3E} MW".format(Decimal(str(name_total_18))), \
"2017: {:.3E} MW".format(Decimal(str(name_total_17))), \
"2012: {:.3E} MW".format(Decimal(str(name_total_12))), \

('2018: 1.043E+5 MW', '2017: 1.048E+5 MW', '2012: 1.079E+5 MW')

Based on this data, the overall Nameplate capacity (power producing capability) decreased, but not very much considering six plants permanently shut down.  Let's investigate further to make sure everything is correct.

In [72]:
Nameplate_dict_18 = nuc_MW_18.groupby('Plant Name')['Nameplate Capacity (MW)'].sum().to_dict()
Nameplate_dict_12 = nuc_MW_12.groupby('Plant Name')['Nameplate Capacity (MW)'].sum().to_dict()

In [77]:
uprates_dict = {}
diff_list = []
for key in Nameplate_dict_12:
    if key in Nameplate_dict_18:
        diff = Nameplate_dict_18[key] - Nameplate_dict_12[key]
        if diff != 0:
            uprates_dict[key] = diff
    else:
        diff_list.append(key)
        
for key in Nameplate_dict_18:
    if key not in Nameplate_dict_12:
        diff_list.append(key)

for key in uprates_dict:
    print(key, uprates_dict[key])
    
diff_list

Watts Bar Nuclear Plant 1269.9
Three Mile Island 5.2
Turkey Point -117.2
Palo Verde 0.300000000001
Peach Bottom 557.0
James A Fitzpatrick 1.3


['PPL Susquehanna',
 'Cooper',
 'San Onofre Nuclear Generating Station',
 'Vermont Yankee',
 'Oyster Creek',
 'Brunswick',
 'Crystal River',
 'Kewaunee',
 'Monticello',
 'Fort Calhoun',
 'TalenEnergy Susquehanna',
 'Cooper Nuclear Station',
 'Brunswick Nuclear',
 'Monticello Nuclear Facility']

There appear to be a couple of explanations for the small decrease.  Notice Watts Bar nameplate increased by 1269.9 MW.  This is because a second unit came online at Watts Bar in 2016 (https://en.wikipedia.org/wiki/Watts_Bar_Nuclear_Plant).  This was the first new nuclear plant in over 20 years.  Also notice power increases at several plants due to power uprates.  Information on power uprates is available at https://www.nrc.gov/reactors/operating/licensing/power-uprates/status-power-apps.html

We see the shutdown plants when we look at differences between the nameplate ratings for 2012 and 2018, and also that several plants reported under different names for each year, e.g. 'TalenEnergy Susquehanna' vs. 'PPL Susquehanna'.  Let's fix these name discrepancies and make sure we didn't miss anything.

In [78]:
name_fix = dict.fromkeys(['Monticello Nuclear Facility','Monticello'], 'Monticello Nuclear Facility')

name_fix.update(dict.fromkeys(['Brunswick','Brunswick Nuclear'], 'Brunswick Nuclear'))
name_fix.update(dict.fromkeys(['Cooper','Cooper Nuclear Station'], 'Cooper Nuclear Station'))
name_fix.update(dict.fromkeys(['TalenEnergy Susquehanna','PPL Susquehanna'], 'Susquehanna'))
name_fix

{'Brunswick': 'Brunswick Nuclear',
 'Brunswick Nuclear': 'Brunswick Nuclear',
 'Cooper': 'Cooper Nuclear Station',
 'Cooper Nuclear Station': 'Cooper Nuclear Station',
 'Monticello': 'Monticello Nuclear Facility',
 'Monticello Nuclear Facility': 'Monticello Nuclear Facility',
 'PPL Susquehanna': 'Susquehanna',
 'TalenEnergy Susquehanna': 'Susquehanna'}

In [83]:
nuc_MW_12['Plant Name'].replace(to_replace = name_fix, inplace=True)
nuc_MW_18['Plant Name'].replace(to_replace = name_fix, inplace=True)

In [84]:
Nameplate_dict_18 = nuc_MW_18.groupby('Plant Name')['Nameplate Capacity (MW)'].sum().to_dict()
Nameplate_dict_12 = nuc_MW_12.groupby('Plant Name')['Nameplate Capacity (MW)'].sum().to_dict()

In [88]:
len(Nameplate_dict_12), len(Nameplate_dict_18)

(66, 60)

In [85]:
uprates_dict = {}
diff_list = []
for key in Nameplate_dict_12:
    if key in Nameplate_dict_18:
        diff = Nameplate_dict_18[key] - Nameplate_dict_12[key]
        if diff != 0:
            uprates_dict[key] = diff
    else:
        diff_list.append(key)
        
for key in Nameplate_dict_18:
    if key not in Nameplate_dict_12:
        diff_list.append(key)

for key in uprates_dict:
    print(key, uprates_dict[key])
    
diff_list

Peach Bottom 557.0
Three Mile Island 5.2
Turkey Point -117.2
Palo Verde 0.300000000001
Watts Bar Nuclear Plant 1269.9
Susquehanna -64.0
James A Fitzpatrick 1.3


['San Onofre Nuclear Generating Station',
 'Vermont Yankee',
 'Oyster Creek',
 'Crystal River',
 'Kewaunee',
 'Fort Calhoun']

Cleaning up the name discrepancies clarifies the differences.  We have a list of plant uprates and a list of shutdown plants.  Everything makes sense.

In [94]:
shutdown_nameplate = 0
for i in diff_list:
    shutdown_nameplate += Nameplate_dict_12[i]
    print(i, Nameplate_dict_12[i])
    
shutdown_nameplate

San Onofre Nuclear Generating Station 2254.0
Vermont Yankee 563.4
Oyster Creek 550.0
Crystal River 890.4
Kewaunee 560.1
Fort Calhoun 502.0


5319.9000000000005

This list above shows the nameplate capacity of the shutdown plants.  A total loss of 5319 MW of generating capacity, but this was partially offest by a new unit at Watts Bar and a major uprate at Peach Bottom.

Finally, let's look at capacity factor, this the ratio of actual power a plant produces compared to it's nameplate.  A capacity factor of 1.0 would indicate a plant was able to operate 24 hrs a day, 365 days per year with no downtime.

In [96]:
prod_total_18/(24*365*name_total_18)

0.8835979135398567

In [97]:
prod_total_17/(24*365*name_total_17)

0.8768691979544897

In [98]:
prod_total_12/(24*365*name_total_12)

0.8136479034241941

So the loss of operating capacity was offset by improved capacity factors.  There has been almost a 10% improvement since 2012. It should be noted that 2012 total capacity (name_total_12) includes Crystal River, however Crystal River did not operate at all between 2010 and 2013 (it was officially inactivated in 2013), so the capacity factor reported for 2012 is slightly lower than reality (it isn't objectively fair to count an effecitvely inactivated plant agains the industry metric for plant performance).  Likewise the capacity factor calculated for 2018 was slightly higher than reality, because Oyster Creek (which shutdown in September 2018) did not report its nameplate capacity in EIA-860.  The 2017 capaciy factor is accurate as there were no inactivations in 2017.  Let's clean up these minor errors and get more accurate values for capacity factor.


In [105]:
check18 = nuc_MW_18['Plant Name'].tolist()
'Oyster Creek' in check18

False

In [106]:
check12 = nuc_MW_12['Plant Name'].tolist()
'Crystal River' in check12

True

In [109]:
prod_total_18/(24*365*(name_total_18+ 550)), prod_total_17/(24*365*name_total_17), prod_total_12/(24*365*(name_total_12-890.4))

(0.8789616043890736, 0.8768691979544897, 0.8204156852364124)

These are the corrected values.  Still an impressive improvement in capacity factor between 2012 and 2018.