In [1]:
import pickle
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold
import pandas as pd
import numpy as np

In [2]:
import os
new_path = '../data_folder'
os.chdir(new_path)

## Import Data

In [3]:
sobolPF = pd.read_csv('sobol_probability_filter.csv')
sobolPF.columns

Index(['mu1_div_mu3', 'mu2_div_mu3', 'std1', 'std2', 'std3', 'comp1', 'comp2',
       'num_particles', 'safety_factor', 'fba_isna_prob', 'ls_isna_prob'],
      dtype='object')

In [4]:
# parse data for target "fba_isna_prob"
fba_isna_prob = sobolPF['fba_isna_prob']
fba_isna_prob

sobolPF_fba_isna_prob = sobolPF.drop(['ls_isna_prob','fba_isna_prob'], axis=1)
sobolPF_fba_isna_prob

fba_isna_prob = fba_isna_prob.to_frame()
# sobolPF_fba_isna_prob

## define f(x) to calc mae scores 

In [5]:
def rfr_mae(X_array, y_array):
    kf = KFold(n_splits=5, shuffle=True, random_state=13)
    mae_scores = []
    for train_index, test_index in kf.split(X_array):
        X_train, X_test = X_array[train_index], X_array[test_index]
        y_train, y_test = y_array[train_index], y_array[test_index]
        # Define the model
        rfr_model = RandomForestRegressor(random_state=13)
        # Fit model
        rfr_model.fit(X_train, y_train)
        # predict
        mae_scores.append(mean_absolute_error(y_test, rfr_model.predict(X_test)))
        mae = sum(mae_scores) / len(mae_scores)
    return(mae)

## Train/Val on Sobol_Probability_Filter for "fba_isna_prob"

In [31]:
# Create a KFold cross-validation iterator
X_array_fba_isna_prob = sobolPF_fba_isna_prob.to_numpy()
y_array_fba_isna_prob = fba_isna_prob.to_numpy().ravel()


print("Average MAE for fba_isna_prob",rfr_mae(X_array_fba_isna_prob, y_array_fba_isna_prob))


# Save the model
# with open('path/to/save/model.pkl', 'wb') as f:
#     pickle.dump(model, f)

# # Load the model
# with open('path/to/save/model.pkl', 'rb') as f:
#     loaded_model = pickle.load(f)



Average MAE for fba_isna_prob 0.04876547325285035


## Train/Val on Sobol_Probability_Filter for "ls_isna_prob"

In [33]:
ls_isna_prob = sobolPF['ls_isna_prob']
sobolPF_ls_isna_prob = sobolPF.drop(['ls_isna_prob','fba_isna_prob'], axis=1)

X_array_ls = sobolPF_ls_isna_prob.to_numpy()
y_array_ls = ls_isna_prob.to_numpy().ravel()

print("Average MAE for ls_isna_prob",rfr_mae(X_array_ls, y_array_ls))


Average MAE for ls_isna_prob 0.09105841918934479


## Import sobol_regression Data

In [44]:
# read in sobol_regression.csv
sobolREG = pd.read_csv('sobol_regression.csv')

# drop nan values
sobolREG = sobolREG.dropna()

#select features 
sobol_reg =sobolREG[['mu1_div_mu3', 'mu2_div_mu3', 'std1', 'std2', 'std3', 'comp1', 'comp2', 'num_particles', 'safety_factor', 'fba_rank', 'ls_rank', 'fba_time_s_rank', 'ls_time_s_rank']]

#select targets 'fba', 'ls', 'fba_time_s', 'ls_time_s'
fba = sobolREG[['fba']]
ls = sobolREG[['ls']]
fba_time_s = sobolREG[['fba_time_s']]
ls_time_s = sobolREG[['ls_time_s']]

## target = fba 

In [49]:
X_array_fba = sobol_reg.to_numpy()
y_array_fba = fba.to_numpy().ravel()

print("Average MAE for fba",rfr_mae(X_array_fba, y_array_fba))

Average MAE for fba 0.0031074618510043336


## target = ls 

In [51]:
X_array_ls_reg = sobol_reg.to_numpy()
y_array_ls_reg = ls.to_numpy().ravel()

print("Average MAE for ls",rfr_mae(X_array_ls_reg, y_array_ls_reg))

Average MAE for ls 0.006576529961592949
