# Sensitivity CSV creator 

## This script is to make the csvs for OPGEE to run to complete sensitivity analysis

By taking in a schema that specifies options for variables as well as ranges of values, this script creates a csv with the right format for OPGEEv4 input that has the baseline field (average observed values) and many fictional fields. These fictional fields just have one variable edited at a time from the baseline to show the first order importance of individual variables to the model's calculation.

In [1]:
import pandas as pd
import numpy as np

import ast                                      # for converting string of options list to actual list

### Loading schema and preparing the output table

In [3]:
# load OPGEE sensitivity information table

df = pd.read_csv("./data/OPGEE_py_inputs_alt.csv")  # reading in the generated input data for sensitivity analysis 
df.index = df['Name']                               # set the dataframe's index to the variable name for ease
df = df[~np.isnan(df['mapped'])]                    # temporary measure for reducing input options
df = df[1:]

In [4]:
# Prepare outputs

baseline = df['Mean']                           # create a default set of inputs to alter

out = pd.DataFrame(                             # create a new dataframe that will be the OPGEE input with many fields
    index=df['Name'],
    columns = ['python_name','Type']
)  

# setup of the output
out['python_name'] = df['Name']                 # OPGEE input variable names 
out['Type'] = df['Type']                        # adds the input type
out['Field 1'] = baseline                       # adds the baseline set of inputs for comparison

### Creating and adding fields to the dataframe

In [5]:
j=2                                                                             # for naming the field, because the first field is the baseline

# creates a future row that will be added to help define which fields are relevant to which variables
asset = ['name','str','baseline']                                       



# here is where we actually create the table

for name in out.index:

    i=0                                                                         # when giving asset names based on the field, start at zero

    # for handling categorical variables
    if df.at[name,'Type']=='str' and '[' in df.at[name,'options']:              
        
        df.at[name,'options'] = ast.literal_eval(df.at[name,'options'])         # converts the string of options to a list of strings

        for option in df.at[name,'options']:                                    # adds a field for each option in options where that value is changed
            
            asset.append(f'{name}_{i}')
            out = pd.concat((out, baseline.rename(f'Field {j}')),axis=1)        # copies baseline
            out.at[name,f'Field {j}'] = option                                  # changes value for this variable to the next option in options

            # iterating for naming convention
            i=i+1                                                               
            j=j+1

    # for handling variables of type int
    elif df.at[name,'Type']=="int":

        vals = np.arange(                                                       # create the values to alter the baseline from the schema
            int(df.at[name,'Min']),
            int(df.at[name,'Max'])+int(df.at[name,'step']),
            int(df.at[name,'step'])
        )

        for val in vals:                                                        # for each of these values, add the new field and alter the baseline

            asset.append(f'{name}_{i}')            
            out = pd.concat((out, baseline.rename(f'Field {j}')),axis=1)
            out.at[name,f'Field {j}'] = val

            i=i+1
            j=j+1
    
    # for handling float variables
    else:

        vals = np.arange(                                                       # create the values to alter the baseline from the schema
            float(df.at[name,'Min']),
            float(df.at[name,'Max'])+float(df.at[name,'step']),
            float(df.at[name,'step'])
        )

        for val in vals:                                                        # for each of these values, add the new field and alter the baseline

            asset.append(f'{name}_{i}')            
            out = pd.concat((out, baseline.rename(f'Field {j}')),axis=1)
            out.at[name,f'Field {j}'] = val
            
            i=i+1
            j=j+1

### Post-processing to fix some errors and formatting

In [6]:
out.loc['country'] = 'RMI'                                                      # turn the country for all fields to RMI because they are made up
out.loc['asset'] = asset                                                        # insert the asset list as a new row that explains the variable of interest

# Get the last two rows
last_two_rows = out.iloc[-2:]

# Get the remaining rows
remaining_rows = out.iloc[:-2]

# Concatenate the last two rows with the remaining rows
out = pd.concat([last_two_rows, remaining_rows])

# make the python name consistent for clarity
out.at['country','python_name'] = 'country'
out.at['country','Type'] = 'str'

In [7]:
# Iterate through rows (excluding the first row which contains data types) to coerce to correct types
for index, row in out.iterrows():

    target_type = row['Type']

    for col in out.columns[2:]:

        out.at[index, col] = pd.to_numeric(row[col], errors='coerce') if target_type == 'int' or target_type == 'float' else row[col]

# print(out)

In [8]:
out.to_csv("../OPGEEv4/opgee/etc/sensitivity/sensitivity.csv",index=False)      # write out the csv

OSError: Cannot save file into a non-existent directory: '..\OPGEEv4\opgee\etc\sensitivity'