# Create time series of nuclear capacity installments and retirements

## import libaries

In [1]:
import pandas as pd
import numpy as np
import os

## read in data

## eia 860 data (annual)

### currently operating generators in 2018

In [2]:
eia_op = pd.read_excel('data/3_1_Generator_Y2018.xlsx', 
                          sheet_name = 'Operable', 
                          skiprows = 1, 
                          skipfooter = 1,
                          usecols = [2,3,6,7,15,25,26,27,28],
                          names = ["plant_code", "plant_name", 
                                   "gen_id", "technology", 
                                   "capacity", "op_month", 
                                   "op_year", "ret_month", 
                                   "ret_year"])

In [3]:
# only keep nuclear plants
eia_op = eia_op[eia_op.technology == 'Nuclear']

In [4]:
eia_op.head()

Unnamed: 0,plant_code,plant_name,gen_id,technology,capacity,op_month,op_year,ret_month,ret_year
81,46,Browns Ferry,1,Nuclear,1152.0,8,1974,,
82,46,Browns Ferry,2,Nuclear,1152.0,3,1975,,
83,46,Browns Ferry,3,Nuclear,1190.0,3,1977,,
388,204,Clinton Power Station,1,Nuclear,1138.3,11,1987,,
389,210,Wolf Creek Generating Station,1,Nuclear,1267.7,9,1985,,


This isn't reflected in the 2018 dataset, but in the summer of 2019, the state of Ohio passed a piece of legislation that allowed their nuclear plants to remain open. Thus, the Davis Besse will not have to retire after all:

In [5]:
eia_op[eia_op.plant_name == "Davis Besse"]

Unnamed: 0,plant_code,plant_name,gen_id,technology,capacity,op_month,op_year,ret_month,ret_year
6728,6149,Davis Besse,1,Nuclear,925.2,11,1977,5,2020


In [6]:
eia_op.loc[eia_op.plant_name == "Davis Besse", "ret_month"] = " "
eia_op.loc[eia_op.plant_name == "Davis Besse", "ret_year"] = " "

### generators already retired by 2018

In [7]:
eia_ret = pd.read_excel('data/3_1_Generator_Y2018.xlsx', 
                          sheet_name = 'Retired and Canceled', 
                          skiprows = 1, 
                          skipfooter = 1,
                          usecols = [2,3,6,7,15,23,25,26,27,28],
                          names = ["plant_code", 
                                   "plant_name", 
                                   "gen_id", 
                                   "technology", 
                                   "capacity", 
                                   "status", 
                                   "op_month", "op_year", 
                                   "ret_month", "ret_year"])

In [8]:
# only keep nuclear plants
eia_ret = eia_ret[eia_ret.technology == 'Nuclear']

In [9]:
eia_ret = eia_ret[eia_ret.status.isin(["RE"])]

In [10]:
eia_ret

Unnamed: 0,plant_code,plant_name,gen_id,technology,capacity,status,op_month,op_year,ret_month,ret_year
165,360,San Onofre Nuclear Generating Station,2,Nuclear,1127.0,RE,8,1983,6,2013
166,360,San Onofre Nuclear Generating Station,3,Nuclear,1127.0,RE,4,1984,6,2013
307,628,Crystal River,3,Nuclear,890.4,RE,3,1977,2,2013
1322,2289,Fort Calhoun,1,Nuclear,502.0,RE,9,1973,10,2016
1368,2388,Oyster Creek,1,Nuclear,550.0,RE,12,1969,9,2018


### generators planned for installation in 2018

In [11]:
eia_add = pd.read_excel('data/3_1_Generator_Y2018.xlsx', 
                            sheet_name = 'Proposed', 
                            skiprows = 1, 
                            skipfooter = 1,
                            usecols = [2,3,6,7,15,19,22,23],
                            names = ["plant_code", "plant_name", "gen_id", "technology", "capacity", "status", "op_month", "op_year"])

In [12]:
# only keep nuclear plants
eia_add = eia_add[eia_add.technology == 'Nuclear']

In [13]:
eia_add = eia_add[eia_add.status.isin(["TS", "P", "L", "T", "U", "V"])]

In [14]:
eia_add

Unnamed: 0,plant_code,plant_name,gen_id,technology,capacity,status,op_month,op_year
24,649,Vogtle,3,Nuclear,1100.0,U,11,2021
25,649,Vogtle,4,Nuclear,1100.0,U,11,2022
484,61075,UAMPS Carbon Free Power Plant,NPM1,Nuclear,50.0,P,5,2026
485,61075,UAMPS Carbon Free Power Plant,NPM10,Nuclear,50.0,P,2,2027
486,61075,UAMPS Carbon Free Power Plant,NPM11,Nuclear,50.0,P,3,2027
487,61075,UAMPS Carbon Free Power Plant,NPM12,Nuclear,50.0,P,4,2027
488,61075,UAMPS Carbon Free Power Plant,NPM2,Nuclear,50.0,P,6,2026
489,61075,UAMPS Carbon Free Power Plant,NPM3,Nuclear,50.0,P,7,2026
490,61075,UAMPS Carbon Free Power Plant,NPM4,Nuclear,50.0,P,9,2026
491,61075,UAMPS Carbon Free Power Plant,NPM5,Nuclear,50.0,P,9,2026


## create operating dataframe and retired (or soon to be retired) dataframe

In [15]:
df_op = pd.concat([eia_op[eia_op['ret_year'] == " "][["plant_code", "plant_name", "gen_id", "capacity", "op_year"]],
           eia_add[["plant_code", "plant_name", "gen_id", "capacity", "op_year"]]])

In [16]:
df_ret = pd.concat([eia_op[~eia_op.ret_year.isin([" "])][["plant_code", "plant_name", "gen_id", "capacity", "op_year", "ret_year"]],
                    eia_ret[["plant_code", "plant_name", "gen_id", "capacity", "op_year", "ret_year"]]])

## create time series data for each nuclear plant

In [17]:
min(df_op['op_year'])

1969

In [18]:
max(df_op['op_year'])

2027

In [19]:
un_op = df_op.plant_code.unique()
l_op = []
for i in list(range(0,len(un_op))):
    dat = df_op[df_op.plant_code == un_op[i]][["plant_code", "plant_name", "capacity", "op_year"]]
    dat = dat.rename(columns={"op_year": "year"})
    first = dat.sort_values(by='year').drop_duplicates(subset = 'plant_code')
    df = pd.DataFrame(data = {"year": list(range(1969-1,2028+1)) , 
                              "plant_code" : np.repeat(first['plant_code'], len(range(1969-1,2028+1))),
                              "plant_name" :  np.repeat(first['plant_name'], len(range(1969-1,2028+1)))   } )
    df = pd.merge(df, dat, on = ["year", "plant_code", "plant_name"], how = "left")
    df = df.fillna(0)
    df['op_capacity'] = df['capacity'].cumsum()
    df['net_capacity'] = df['op_capacity']
    df.loc[df.capacity < 0, 'net_capacity'] = df.capacity

    l_op.append(df)
    del df
    del dat
    del first

In [20]:
seq_op = pd.concat(l_op)

In [21]:
un_ret = df_ret.plant_code.unique()
l_ret = []
for i in list(range(0,len(un_ret))):
    dat = df_ret[df_ret.plant_code == un_ret[i]][["plant_code", "plant_name", "capacity", "op_year", "ret_year"]]
    dat = pd.melt(dat, 
                  id_vars = ["plant_code", "plant_name", "capacity"], 
                  value_vars = ["op_year", "ret_year"],
                  var_name = "type", value_name = "year")
    dat.loc[dat.type == "ret_year", "capacity"] = -dat.capacity
    dat = dat[["plant_code", "plant_name", "capacity", "year"]]
    dat = dat.astype({'plant_code': 'int64', 'year' : 'int64'})

    first = dat.sort_values(by='year').drop_duplicates(subset = 'plant_code')
    
    
    df = pd.DataFrame(data = {"year": list(range(1969-1,2028+1)) , 
                              "plant_code" : np.repeat(first['plant_code'], len(range(1969-1,2028+1))),
                              "plant_name" :  np.repeat(first['plant_name'], len(range(1969-1,2028+1)))   } )
    df = pd.merge(df, dat, on = ["year", "plant_code", "plant_name"], how = "left")
    df = df.fillna(0)
    df['op_capacity'] = df['capacity'].cumsum()
    df['net_capacity'] = df['op_capacity']
    df.loc[df.capacity < 0, 'net_capacity'] = df.capacity    
    op = df.loc[df.year < min(df[df.capacity < 0]['year'])]
    ret = df.loc[df.year >= min(df[df.capacity < 0]['year'])][["year", "plant_code", "plant_name", "capacity", "op_capacity"]]
    ret['net_capacity'] = ret['capacity'].cumsum()
    
    df = pd.concat([op, ret])
    l_ret.append(df)
    del df
    del dat
    del first

In [22]:
seq_ret = pd.concat(l_ret)

In [23]:
df_all = pd.concat([seq_op, seq_ret])

In [24]:
df_all.head()

Unnamed: 0,year,plant_code,plant_name,capacity,op_capacity,net_capacity
0,1968,46,Browns Ferry,0.0,0.0,0.0
1,1969,46,Browns Ferry,0.0,0.0,0.0
2,1970,46,Browns Ferry,0.0,0.0,0.0
3,1971,46,Browns Ferry,0.0,0.0,0.0
4,1972,46,Browns Ferry,0.0,0.0,0.0


## read in plant location data

In [25]:
plants = pd.read_excel('data/2___Plant_Y2018.xlsx', 
                          sheet_name = 'Plant', 
                          skiprows = 1, 
                          skipfooter = 1,
                          usecols = [2,5,6,9,10],
                          names = ["plant_code", "city", "state", "latitude", "longitude"])

In [26]:
df_all = pd.merge(df_all, plants, on = "plant_code")

In [27]:
df_all.head()

Unnamed: 0,year,plant_code,plant_name,capacity,op_capacity,net_capacity,city,state,latitude,longitude
0,1968,46,Browns Ferry,0.0,0.0,0.0,Decatur,AL,34.7042,-87.1189
1,1969,46,Browns Ferry,0.0,0.0,0.0,Decatur,AL,34.7042,-87.1189
2,1970,46,Browns Ferry,0.0,0.0,0.0,Decatur,AL,34.7042,-87.1189
3,1971,46,Browns Ferry,0.0,0.0,0.0,Decatur,AL,34.7042,-87.1189
4,1972,46,Browns Ferry,0.0,0.0,0.0,Decatur,AL,34.7042,-87.1189


## create ranking for power plants

### get final year only

In [28]:
df_2027 = df_all[df_all.year == 2027]

In [29]:
df_2027.shape

(68, 10)

In [30]:
df_2027 = df_2027.sort_values(by = ['op_capacity'])

In [31]:
df_2027['order'] = range(1,69)

## export to csv

In [32]:
df_all.to_csv("nuclear_capacity.csv")