In [2]:
import numpy as np
import pandas as pd
import shap
import warnings
import sklearn
warnings.filterwarnings("ignore")

In [None]:
#LR
from sklearn.linear_model import LogisticRegression
def lr(X, y, index):
    logit_model = LogisticRegression()
    logit_model.fit(X, y)
    explainer = shap.Explainer(logit_model.predict_proba, X)
    X_background = X.iloc[index]  
    shap_values = explainer(X_background)
    importance = shap_values.abs.mean(axis=0).values[:, 0]
    return importance


In [None]:
#CatB
from catboost import CatBoostClassifier
def catb(X, y, index):
    CATB = CatBoostClassifier(random_state=2023, verbose=False)
    CATB.fit(X, y)
    explainer = shap.TreeExplainer(CATB)
    X_background = X.iloc[index]
    shap_values = explainer(X_background)
    importance = shap_values.abs.mean(axis=0).values
    return importance

In [None]:
#RF
from sklearn.ensemble import RandomForestClassifier
def rf(X, y, index):
    rf = RandomForestClassifier(random_state=2023)
    rf.fit(X, y)
    explainer = shap.TreeExplainer(rf)
    X_background = X.iloc[index]
    shap_values = explainer(X_background)
    importance = shap_values.abs.mean(axis=0).values[:,0]
    return importance

In [None]:
#NN
from sklearn.neural_network import MLPClassifier
def nn(X, y, index):
    nn = MLPClassifier(solver="lbfgs", alpha=1e-1, hidden_layer_sizes=(5, 2), random_state=0)
    nn.fit(X, y)
    explainer = shap.Explainer(nn.predict_proba, X)
    X_background = X.iloc[index]
    shap_values = explainer(X_background)
    importance = shap_values.abs.mean(axis=0).values[:,0]
    return importance


In [None]:
outcome = pd.read_csv(mimic_outcome.csv")
y = outcome['icudead']
X = pd.read_csv("mimic_original.csv")
index = pd.read_csv("Index.csv", header=None)
index = index[0].astype(int).tolist()

In [None]:
#Cal FI
M=5
lr_q1 = lr(X,y,index)
lr_im = pd.DataFrame(np.zeros((M+1, 100)))
lr_im.iloc[0] = lr_q1
    
catb_q1 = catb(X,y,index)
catb_im = pd.DataFrame(np.zeros((M+1, 100)))
catb_im.iloc[0] = catb_q1

rf_q1 = rf(X,y,index)
rf_im = pd.DataFrame(np.zeros((M+1, 100)))
rf_im.iloc[0] = rf_q1
    
nn_q1 = nn(X,y,index)
nn_im = pd.DataFrame(np.zeros((M+1, 100)))
nn_im.iloc[0] = nn_q1


    
print("Original, Done!")

for i in range(M):
    
    file = f'mimic_knockoff_k{i + 1}.csv'  
    
    data = pd.read_csv(file)
        
    lr_im.iloc[i+1] = lr(data,y,index)
    catb_im.iloc[i+1] = catb(data,y,index)
    rf_im.iloc[i+1] = rf(data,y,index)
    nn_im.iloc[i+1] = nn(data,y,index)
    
    
    print(f"Index: {i + 1}, Done!")
        
    
basepath = ""    
file_lr = f"{basepath}/lr_fi_icu.csv"
file_catb = f"{basepath}/catb_fi_icu.csv"
file_rf = f"{basepath}/rf_fi_icu.csv"
file_nn = f"{basepath}/nn_fi_icu.csv"
    
lr_im.to_csv(file_lr)
catb_im.to_csv(file_catb)
rf_im.to_csv(file_rf)
nn_im.to_csv(file_nn)
