In [206]:
import numpy as np
import pandas as pd



def print_prime_mover_description(string, prime_mover_codes):
    description = prime_mover_codes.loc[prime_mover_codes['Prime Mover Codes'] == string, 'Description'].values
    if len(description) > 0:
        print(description[0])
    else:
        print("No description found for the given string.")

def print_fuel_codes_description(string, fuel_codes):
    description = fuel_codes.loc[fuel_codes['Energy Source Code'] == string, 'Description'].values
    if len(description) > 0:
        print(description[0])
    else:
        print("No description found for the given string.")

def get_fuel_mover(data, fuel, mover):
    return data[(data['Energy_Source_1 (Fuel)'] == fuel) & (data['PRIMEMOVER'] == mover)]



## Data processing

In [207]:
# Get the sheet names
excel_file = pd.ExcelFile('ferc_data\\20120724-4012_Generator_Data_Summer.xlsx')
sheet_names = excel_file.sheet_names

# Create a dictionary to store the separated dataframes
separated_dataframes = {}

# Iterate over each sheet and store the data in the dictionary
for sheet_name in sheet_names:
    separated_dataframes[sheet_name] = excel_file.parse(sheet_name)

# Process for Cleaning Generator Characteristics
gen_characteristics = separated_dataframes['Generator Characteristics']
gen_characteristics.columns = separated_dataframes['Generator Characteristics'].iloc[0]
gen_characteristics.drop(0, inplace=True)
gen_characteristics.reset_index(drop=True, inplace=True)
# gen_characteristics.drop_duplicates(gen_characteristics.columns[duplicate_index], axis=1)
gen_characteristics= gen_characteristics.loc[:,~gen_characteristics.columns.duplicated()].copy()

# Process for Cleaning Generator costs
gen_costs = separated_dataframes['Generator Offer Curve'].iloc[:,24:28]
gen_costs.columns = gen_costs.iloc[1]
gen_costs.drop([0,1], inplace=True)
gen_costs.reset_index(drop=True, inplace=True)
gen_costs.rename(columns={'1': 'index'}, inplace=True)
gen_costs.rename(columns={np.nan: 'Generic Name'}, inplace=True)

In [208]:
# the important paramaters are:
raw_key_params = ['NAMEPLATE (MWs)','RAMP UP (MW/min)', 'RAMP DOWN (MW/min)', 'Economic Minimum (MW)', 'MIN_DOWN_TIME (hr)', 'MIN_RUN_TIME (hr)']

Note that ramping is in MW/min and needs to be % of MW capacity /hr

In [209]:
ramp_params = ['RAMP UP (MW/min)', 'RAMP DOWN (MW/min)']
capacity_param = ['NAMEPLATE (MWs)']

# Divide each row of ramp_params by NAMEPLATE (MWs) and multiply by 60 to convert to per hour
percent_ramp =  gen_characteristics[ramp_params].div(gen_characteristics['NAMEPLATE (MWs)'], axis=0) * 60
gen_characteristics[ramp_params] = percent_ramp

# change the name of the columns to reflect the change in units for ramping
gen_characteristics.rename(columns={'RAMP UP (MW/min)': 'PERC RAMP UP', 'RAMP DOWN (MW/min)': 'PERC RAMP DOWN'}, inplace=True)

# update key param names
cleaned_key_params = ['NAMEPLATE (MWs)','PERC RAMP UP', 'PERC RAMP DOWN', 'Economic Minimum (MW)', 'MIN_DOWN_TIME (hr)', 'MIN_RUN_TIME (hr)']

## Processing Requiring Inputs

In [210]:
# selected fuel types and prime movers
BIT_ST_coal = get_fuel_mover(gen_characteristics, 'BIT', 'ST')
DFO_GT_diesel = get_fuel_mover(gen_characteristics, 'DFO', 'GT')
NG_GT_gas = get_fuel_mover(gen_characteristics, 'NG', 'GT')
NG_CT_gas = get_fuel_mover(gen_characteristics, 'NG', 'CT')
NG_IT_gas = get_fuel_mover(gen_characteristics, 'NG', 'IT')

In [211]:
gen_characteristics['NAMEPLATE (MWs)']

0        660
1        660
2        660
3       18.6
4       12.5
        ... 
1006     198
1007    10.8
1008    10.8
1009    29.4
1010    37.8
Name: NAMEPLATE (MWs), Length: 1011, dtype: object

Do this in generic scalable code 

In [212]:
def get_cleaned_cost_df(gen_costs_df, spec_gen_df):
    #
    costs = gen_costs_df[gen_costs_df['Generic Name'].isin(spec_gen_df['Generic Name'])]
    costs.drop('Generic Name', axis=1, inplace=True)
    # get the nameplate capacity of the generators
    nameplate_cap = spec_gen_df['NAMEPLATE (MWs)'].values
    # normalize the BIT_ST_coal_costs by the nameplate capacity
    costs_normalized = costs.div(nameplate_cap, axis=0)
    # change the titles of the columns to reflect the normalization
    costs_normalized.columns = ['No Load Cost ($/MW)', 'Cold Start Cost ($/MW)', 'Hot Start Cost ($/MW)']

    return costs_normalized

In [213]:
# emulate reading in the data from the excel file
fuel_id = 'DFO'
mover_id = 'GT'

In [214]:
specified_gen_df = gen_characteristics[(gen_characteristics['Energy_Source_1 (Fuel)'] == fuel_id) 
                        & (gen_characteristics['PRIMEMOVER'] == mover_id)]
                        
cleaned_cost_df = get_cleaned_cost_df(gen_costs, specified_gen_df)

### Decision processes for choosing costs and parameters
# costs
mean_costs = cleaned_cost_df.mean()
Start_Cost_per_MW = mean_costs.mean()

# parameters 
Ramp_Up_Percentage = specified_gen_df['PERC RAMP UP'].mean()
Ramp_Dn_Percentage =  specified_gen_df['PERC RAMP DOWN'].mean()
Min_Power =  specified_gen_df['Economic Minimum (MW)'].mean()

Down_Time =  specified_gen_df['MIN_DOWN_TIME (hr)'].min()
Up_Time =  specified_gen_df['MIN_RUN_TIME (hr)'].min()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  costs.drop('Generic Name', axis=1, inplace=True)


In [215]:
mean_costs

No Load Cost ($/MW)        4.367296
Cold Start Cost ($/MW)    39.863212
Hot Start Cost ($/MW)     30.226053
dtype: object