In [1]:
import os
import pandas as pd
import numpy as np
import re
from ppmusa import load_eia_data

### Load EIA Data

In [2]:
eia_data_operable, eia_storage, eia_loc, eia_plants_locs = load_eia_data(base_dir = '../..')

In [3]:
eia_plants_locs['NERC Region'].value_counts()

NERC Region
WECC    5825
SERC    5314
MRO     4020
RFC     3960
NPCC    3672
TRE     1778
Name: count, dtype: int64

### Load ADS Data

In [4]:
#Load ADS Data
ADS_PATH = os.path.join('../../data/WECC_ADS/downloads/2032/Public Data')
ads_thermal= pd.read_csv(ADS_PATH + '/Thermal_General_Info.csv',skiprows=1, )#encoding='unicode_escape')
ads_thermal = ads_thermal[['GeneratorName', ' Turbine Type', 'Model Type', 'MustRun',
       'MinimumDownTime(hr)', 'MinimumUpTime(hr)', 'MaxUpTime(hr)',
       'Generic Ramp Rate Name', 'RampUp Rate(MW/minute)',
       'RampDn Rate(MW/minute)', 'Startup Cost Fixed($)', 'StartFuel(MMBTu)', 'Startup Time',
       'VOM Cost']]

# Combine ADS IOC data with the ADS General Thermal Plant Data
ads_ioc= pd.read_csv(ADS_PATH + '/Thermal_IOCurve_Info.csv',skiprows=1, ).rename(columns={'Generator Name':'GeneratorName'})
ads_ioc = ads_ioc[['GeneratorName', 'IOMaxCap(MW)','IOMinCap(MW)', 'MinInput(MMBTu)', 'IncCap2(MW)', 'IncHR2(MMBTu/MWh)',
       'IncCap3(MW)', 'IncHR3(MMBTu/MWh)', 'IncCap4(MW)', 'IncHR4(MMBTu/MWh)',
       'IncCap5(MW)', 'IncHR5(MMBTu/MWh)', 'IncCap6(MW)', 'IncHR6(MMBTu/MWh)',
       'IncCap7(MW)', 'IncHR7(MMBTu/MWh)']]

print('Size of thermal generator data: ', ads_thermal.shape)
print('Size of IOC generator data: ', ads_ioc.shape)
ads_thermal = pd.merge(ads_thermal, ads_ioc, on='GeneratorName', how='left')

Size of thermal generator data:  (1508, 14)
Size of IOC generator data:  (1506, 16)


In [5]:
#Replacing heat rates with 0 with NaN
ads_thermal['IncHR2(MMBTu/MWh)'] = ads_thermal['IncHR2(MMBTu/MWh)'].replace(0, np.nan)

In [6]:
#loading ads to match ads_name with generator key in order to link with ads thermal file
ads= pd.read_csv(ADS_PATH + '/GeneratorList.csv',skiprows=2, encoding='unicode_escape')
ads = ads[ads['State'].isin(['NM', 'AZ', 'CA', 'WA', 'OR', 'ID', 'WY', 'MT', 'UT', 'SD', 'CO', 'NV', 'NE', '0', 'TX'])]
ads['Long Name'] = ads['Long Name'].astype(str)
ads['Name'] = ads['Name'].str.replace(" ", "")
ads['Name'] = ads['Name'].apply(lambda x: re.sub(r'[^a-zA-Z0-9]', '', x).lower())
ads['Long Name'] = ads['Long Name'].str.replace(" ", "")
ads['Long Name'] = ads['Long Name'].apply(lambda x: re.sub(r'[^a-zA-Z0-9]', '', x).lower())
ads['SubType'] = ads['SubType'].apply(lambda x: re.sub(r'[^a-zA-Z0-9]', '', x).lower())
ads.rename({'Name': 'ads_name', 'Long Name': 'ads_long_name',
             'SubType': 'subtype','Commission Date':'commission_date',
             'Retirement Date':'retirement_date','Area Name':'balancing_area'},
               axis=1, inplace=True)
ads.rename(str.lower, axis='columns', inplace=True)
ads['long id'] = ads['long id'].astype(str)
ads_name_key_dict = dict(zip(ads['ads_name'], ads['generatorkey']))
ads

Unnamed: 0,generatorkey,ads_name,bus id,bus name,bus kv,unit id,generator typeid,subtype,long id,ads_long_name,...,state,county,city,zipcode,fueltype,technology,btm,internalid,economicpmin,economicpmax
0,40,hatchsolarec,11404,HAT_DIST_PV,0.69,'PV,4,solarpvtracking,NM-57591-1,hatchsolarenergycenterillc1,...,NM,,,,Solar,Tracking,#FALSE#,,0,5
1,45,copper1,11051,COPPER_G,13.80,'1,1,ctnatgasindustrial,TX-9-1,copper1,...,TX,,,,NG,Industrial,#FALSE#,,10,68
2,54,montana4g4,11233,MPS4,13.80,'1,1,ctnatgasaero,TX-58562-GT-4,montana4g4,...,TX,,,,NG,Areo,#FALSE#,,50,97
3,55,montana3g3,11232,MPS3,13.80,'1,1,ctnatgasaero,TX-58562-GT-3,montana3g3,...,TX,,,,NG,Areo,#FALSE#,,50,97
4,56,montana2g2,11228,MPS2,13.80,'1,1,ctnatgasaero,TX-58562-GT-2,montana2g2,...,TX,,,,NG,Areo,#FALSE#,,50,97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5030,20640,spindlehill1,70593,SPNDLE1,18.00,'G1,1,ctnatgasindustrial,CO-56445-GEN1,spindlehillenergycentergen1,...,CO,Weld,Fort Lupton,,NG,Industrial,#FALSE#,,81,160
5031,20641,spindlehill2,70594,SPNDLE2,18.00,'G2,1,ctnatgasindustrial,CO-56445-GEN2,spindlehillenergycentergen2,...,CO,Weld,Fort Lupton,,NG,Industrial,#FALSE#,,80,160
5032,20642,opalsprings,45368,OPAL,69.00,'N,2,hydro,OR-54251-1,deschutesvalleywaterdistrictopalsprings1hy593,...,OR,Jeffereson,,,Water,,#FALSE#,,0,5
5033,20643,dciblackwater,13402,BLACKWTR,345.00,'DC,4,dcintertie,NM--,blackwaterdcintertiefictionalresource,...,NM,,,,,,#FALSE#,,0,200


In [7]:
ads_thermal['generator_name_alt'] = ads_thermal['GeneratorName'].str.replace(" ", "").str.lower().str.replace('_',"")
ads_thermal['generator_key'] = ads_thermal['generator_name_alt'].map(ads_name_key_dict).fillna(9999).astype(int)
ads_thermal.columns

Index(['GeneratorName', ' Turbine Type', 'Model Type', 'MustRun',
       'MinimumDownTime(hr)', 'MinimumUpTime(hr)', 'MaxUpTime(hr)',
       'Generic Ramp Rate Name', 'RampUp Rate(MW/minute)',
       'RampDn Rate(MW/minute)', 'Startup Cost Fixed($)', 'StartFuel(MMBTu)',
       'Startup Time', 'VOM Cost', 'IOMaxCap(MW)', 'IOMinCap(MW)',
       'MinInput(MMBTu)', 'IncCap2(MW)', 'IncHR2(MMBTu/MWh)', 'IncCap3(MW)',
       'IncHR3(MMBTu/MWh)', 'IncCap4(MW)', 'IncHR4(MMBTu/MWh)', 'IncCap5(MW)',
       'IncHR5(MMBTu/MWh)', 'IncCap6(MW)', 'IncHR6(MMBTu/MWh)', 'IncCap7(MW)',
       'IncHR7(MMBTu/MWh)', 'generator_name_alt', 'generator_key'],
      dtype='object')

### Matching EIA and ADS Data

In [8]:
#load mapping file to match the ads thermal to the eia_plants_locs file
eia_ads_mapping = pd.read_csv('../../repo_data/eia_mappings/eia_ads_generator_mapping_updated.csv')
# thermal_ads_eia = pd.merge(ads_thermal, eia_ads_mapping, left_on= 'generator_key', right_on= 'generatorkey', how='inner')
thermal_ads_eia = pd.merge(ads_thermal, eia_ads_mapping, left_on= 'generator_name_alt', right_on= 'ads_name', how='inner')
thermal_ads_eia.drop(columns=['lat','lon'],inplace=True)

In [9]:
#Merge Plant locs file with ADS thermal data
eia_plants_locs_thermal = eia_plants_locs[eia_plants_locs.tech_type.str.contains('Gas_SC|Gas_CC|Oil|Waste|Geothermal|Nuclear|Coal|Biomass|Other')]
eia_plants_locs_non_thermal = eia_plants_locs[~eia_plants_locs.tech_type.str.contains('Gas_SC|Gas_CC|Oil|Waste|Geothermal|Nuclear|Coal|Biomass|Other')]
eia_plants_locs_non_thermal.to_csv('eia_plants_locs_non_thermal.csv',index=False)
eia_thermal_plant_ramps = pd.merge(eia_plants_locs_thermal, thermal_ads_eia.rename(columns={'generator_id_ads':'generator_id'}), on= ['plant_id_eia','generator_id'], how='left')
eia_thermal_plant_ramps.sort_values(by=['capacity_mw'],ascending=False, inplace=True)
eia_thermal_plant_ramps.to_csv('eia_thermal_plant_ramps.csv',index=False)

In [10]:
thermal_characteristics_grouped = thermal_ads_eia.groupby('plant_id_eia').agg({'GeneratorName':'last', 
       'MinimumDownTime(hr)':'mean', 'MinimumUpTime(hr)':'mean', 'MaxUpTime(hr)':'mean', 'RampUp Rate(MW/minute)': 'mean',
       'RampDn Rate(MW/minute)':'mean', 'Startup Cost Fixed($)':'mean', 'StartFuel(MMBTu)':'mean',
       'Startup Time':'mean', 'VOM Cost':'mean', 'generator_key':'last', 'ads_name':'last', 'bus id':'last', 'bus name':'last',
       'devstatus':'last', 'balancing_area':'last', 'region name':'last',
       'IOMaxCap(MW)':'mean','IOMinCap(MW)':'mean', 'MinInput(MMBTu)':'mean', 'IncCap2(MW)':'mean', 'IncHR2(MMBTu/MWh)':'mean',
       'IncCap3(MW)':'mean', 'IncHR3(MMBTu/MWh)':'mean' }).reset_index()
thermal_characteristics_grouped['plant_id_eia'] = thermal_characteristics_grouped['plant_id_eia'].astype(int)
thermal_characteristics_grouped

Unnamed: 0,plant_id_eia,GeneratorName,MinimumDownTime(hr),MinimumUpTime(hr),MaxUpTime(hr),RampUp Rate(MW/minute),RampDn Rate(MW/minute),Startup Cost Fixed($),StartFuel(MMBTu),Startup Time,...,devstatus,balancing_area,region name,IOMaxCap(MW),IOMinCap(MW),MinInput(MMBTu),IncCap2(MW),IncHR2(MMBTu/MWh),IncCap3(MW),IncHR3(MMBTu/MWh)
0,99,Frederickson1,1.00,1.00,-1.0,80.010,80.010,2663.270000,402.717,1.0,...,Existing,PSEI,NW_PSEI,67.000000,57.785000,705.395000,9.215000,7.901000,0.000000,0.000000
1,114,Douglas 1,8.00,12.00,-1.0,5.000,5.000,1351.991000,142.720,1.0,...,Existing,AZPS,SW_AZPS,16.000000,8.000000,151.466000,8.000000,8.727000,0.000000,0.000000
2,115,CYMRIC_115_GN1_35032_RT,4.00,2.00,-1.0,2.080,2.080,1429.063000,0.000,1.0,...,Future-Conceptual,CIPV,CA_CISO,20.800000,0.000000,0.000000,20.800000,11.000000,0.000000,0.000000
3,116,Ocotillo GT7,1.00,1.00,-1.0,83.250,83.250,2771.118714,419.025,1.0,...,Future-Planned,AZPS,SW_AZPS,94.014286,34.321429,418.943643,19.299857,8.971857,36.850143,9.644286
4,117,West Phoenix CC5b,4.75,1.75,-1.0,18.078,18.078,9232.848375,30.332,1.0,...,Existing,AZPS,SW_AZPS,107.762500,64.377500,608.712937,29.393250,8.852875,10.137625,2.647500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
410,61503,Ces Dairy Biogas,1.00,1.00,-1.0,9.900,9.900,329.538500,49.830,1.0,...,Existing,CIPV,CA_CISO,1.000000,1.000000,66.241000,0.000000,,0.000000,0.000000
411,61754,SunSelect 11GT4,6.00,2.00,-1.0,9.420,9.420,12221.130000,37.680,1.0,...,Existing,CISC,CA_CISO,4.000000,4.000000,18.000000,0.000000,,0.000000,0.000000
412,61846,Tulare BioMAT Fuel Cell,1.00,1.00,-1.0,4.000,4.000,26.476490,0.470,1.0,...,Existing,CISC,CA_CISO,2.800000,1.400000,54.921070,1.400000,24.157000,0.000000,0.000000
413,62115,Alamitos CC1b,7.00,2.00,-1.0,17.600,17.600,13979.040000,38.520,1.0,...,Existing,CISC,CA_CISO,-1.000000,-1.000000,428.364600,0.000000,,0.000000,0.000000


In [11]:
unmatched = eia_thermal_plant_ramps[eia_thermal_plant_ramps.generator_key.isna()]
unmatched = unmatched.dropna(axis=1, how='all')
rematched = pd.merge(unmatched.reset_index(), thermal_characteristics_grouped, on='plant_id_eia', how='inner')
print('Number of unmatched plants: {}'.format(len(unmatched)))
print('Number of rematched plants: {}'.format(len(rematched)))
print('Number of plants in original dataset: {}'.format(len(eia_thermal_plant_ramps)))
rematched.rename(columns={'index':'index_old'}, inplace=True)
rematched

Number of unmatched plants: 12814
Number of rematched plants: 544
Number of plants in original dataset: 13480


Unnamed: 0,index_old,plant_name_eia,generator_id,capacity_mw,summer_capacity_mw,winter_capacity_mw,p_nom_min,Status,operating_year,tech_type,...,devstatus,balancing_area,region name,IOMaxCap(MW),IOMinCap(MW),MinInput(MMBTu),IncCap2(MW),IncHR2(MMBTu/MWh),IncCap3(MW),IncHR3(MMBTu/MWh)
0,3910,Intermountain Power Project,2,820.0,900.0,900.0,270.0,OP,1987,Coal,...,Existing,LDWP,CA_LDWP,398.0,200.000,1513.905,90.0000,5.402,108.0000,5.850
1,3909,Intermountain Power Project,1,820.0,900.0,900.0,270.0,OP,1986,Coal,...,Existing,LDWP,CA_LDWP,398.0,200.000,1513.905,90.0000,5.402,108.0000,5.850
2,3742,Laramie River Station,1,621.0,560.0,560.0,200.0,OP,1981,Coal,...,Existing,WACM,RM_WACM,570.0,207.677,2651.361,72.4645,10.029,72.4645,10.334
3,9606,Dry Fork Station,01,483.7,390.0,405.0,260.0,OP,2011,Coal,...,Existing,WACM,RM_WACM,405.0,260.000,2811.032,29.0000,10.234,43.5000,10.400
4,360,Comanche (CO),2,396.0,335.0,335.0,170.0,OP,1975,Coal,...,Existing,PSCO,RM_PSCO,766.0,466.261,4548.511,59.9480,7.967,59.9480,8.472
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
539,10580,Sunnyvale City of WPCP,S-14,0.8,0.8,0.8,0.6,OP,1997,Gas_SC,...,Existing,CIPB,CA_CISO,0.3,0.300,66.241,0.0000,,0.0000,0.000
540,10581,Sunnyvale City of WPCP,S-15,0.8,0.8,0.8,0.6,OP,1997,Gas_SC,...,Existing,CIPB,CA_CISO,0.3,0.300,66.241,0.0000,,0.0000,0.000
541,7480,Monterey One Water,EG1,0.6,0.6,0.6,0.4,OP,1988,Waste,...,Existing,CIPB,CA_CISO,2.0,2.000,97.333,0.0000,,0.0000,0.000
542,7482,Monterey One Water,EG3,0.6,0.6,0.6,0.4,OP,1988,Waste,...,Existing,CIPB,CA_CISO,2.0,2.000,97.333,0.0000,,0.0000,0.000


In [12]:
# adding more ramp rates
combined = eia_thermal_plant_ramps.copy()
combined.drop(combined.loc[rematched.index_old].index, inplace= True)
combined = pd.concat([combined, rematched.drop(columns='index_old')]).sort_values(by='capacity_mw', ascending=False)

In [13]:
all_plants_operable = pd.concat([combined, eia_plants_locs_non_thermal])
all_plants_operable.drop(columns = ['Model Type', 'Generic Ramp Rate Name'],inplace=True)
all_plants_operable.drop_duplicates(subset=['plant_name_eia', 'generator_id'],inplace=True)
all_plants_operable.to_csv('../../repo_data/eia_plants_usa.csv',index=False)
all_plants_operable

Unnamed: 0,plant_name_eia,generator_id,capacity_mw,summer_capacity_mw,winter_capacity_mw,p_nom_min,Status,operating_year,tech_type,fuel_type,...,generator_key,generatorkey,ads_name,long id,bus id,bus name,devstatus,balancing_area,region name,plant_id_ads
2924,Peach Bottom,2,1499.4,1264.7,1345.1,115.8,OP,1974,Nuclear,Nuclear,...,,,,,,,,,,
3603,Grand Gulf,1,1440.0,1400.5,1448.1,300.0,OP,1985,Nuclear,Nuclear,...,,,,,,,,,,
3489,Palo Verde,3,1403.2,1312.0,1334.0,110.0,OP,1988,Nuclear,Nuclear,...,13306.0,13306.0,paloverde3,AZ-6008-3,14933.0,PALOVRD3,Existing,TH_PV,SW_TH_PV,6008
3488,Palo Verde,2,1403.2,1314.0,1336.0,110.0,OP,1986,Nuclear,Nuclear,...,13305.0,13305.0,paloverde2,AZ-6008-2,14932.0,PALOVRD2,Existing,TH_PV,SW_TH_PV,6008
3487,Palo Verde,1,1403.2,1311.0,1333.0,110.0,OP,1986,Nuclear,Nuclear,...,13304.0,13304.0,paloverde1,AZ-6008-1,14931.0,PALOVRD1,Existing,TH_PV,SW_TH_PV,6008
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25300,Clay County Electrical Cooperative,SOL,1.0,1.0,1.0,0.0,OP,2020,Solar,Solar,...,,,,,,,,,,
25301,Craighead Electric Solar One,SOL,1.0,1.0,1.0,0.0,OP,2018,Solar,Solar,...,,,,,,,,,,
25302,DeGraff Butler Electric,SOL,1.0,1.0,1.0,0.0,OP,2021,Solar,Solar,...,,,,,,,,,,
25303,Farmers Electric Cooperative,SOL,1.0,1.0,1.0,0.0,OP,2019,Solar,Solar,...,,,,,,,,,,


In [14]:
all_plants_operable.energy_capacity_mwh.sum()

22526.1

In [15]:
eia_plants_grouped  = all_plants_operable.groupby('tech_type').agg({'capacity_mw':'sum'})
print(eia_plants_grouped.sum())
eia_plants_grouped

capacity_mw    1253435.4
dtype: float64


Unnamed: 0_level_0,capacity_mw
tech_type,Unnamed: 1_level_1
Battery,9003.1
Biomass,8996.0
Coal,207065.9
Gas_CC,320432.4
Gas_SC,248006.3
Geothermal,3965.2
Hydro,101967.9
Nuclear,99435.0
Oil,31240.3
Other,3638.5


In [22]:
be_plants = pd.read_csv('../data/breakthrough_network/base_grid/plant.csv')
be_plants = be_plants[be_plants.interconnect == "Western"]
plants = be_plants.groupby('type').agg({'Pmax':'sum'})
print(plants.sum())
plants

FileNotFoundError: [Errno 2] No such file or directory: '../data/breakthrough_network/base_grid/plants.csv'