In [1]:
import numpy as np
import pandas as pd
import shap
import sklearn
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier

In [2]:
def cal_abs_shap(X, y, index, model_name):
    if model_name == "catb":
        model = CatBoostClassifier(random_state=2025, verbose=False)
        model.fit(X, y)
        explainer = shap.TreeExplainer(model)
        X_background = X.iloc[index]
        shap_values = explainer(X_background)
        importance = shap_values.abs.mean(axis=0).values
    elif model_name == "ligb":
        model = LGBMClassifier(random_state=2025)
        model.fit(X, y)
        explainer = shap.TreeExplainer(model)
        X_background = X.iloc[index]
        shap_values = explainer(X_background)
        importance = shap_values.abs.mean(axis=0).values
    elif model_name == "xgb":
        model = XGBClassifier(random_state=2025, eval_metric='logloss')
        model.fit(X, y)
        explainer = shap.TreeExplainer(model)
        X_background = X.iloc[index]
        shap_values = explainer(X_background)
        importance = shap_values.abs.mean(axis=0).values
    elif model_name == "gbdt":
        model = GradientBoostingClassifier(random_state=2025)
        model.fit(X, y)
        explainer = shap.Explainer(model)
        X_background = X.iloc[index]
        shap_values = explainer(X_background)
        importance = shap_values.abs.mean(axis=0).values
    elif model_name == "rf":
        model = RandomForestClassifier(random_state=2025)
        model.fit(X, y)
        explainer = shap.TreeExplainer(model)
        X_background = X.iloc[index]
        shap_values = explainer(X_background)
        importance = shap_values.abs.mean(axis=0).values[:,0]
    else:
        raise ValueError(f"Unrecognized model name: {model_name}. Please check your input.")
    return importance

In [5]:
def calculate_fi(X, y, index, models, M=5, kopath="", outpath=""):

    ncol = X.shape[1]
    y=y.to_numpy().ravel()
    index = index[0].astype(int).tolist()

    
    for model_name in models:
        im = pd.DataFrame(np.zeros((M + 1, ncol)))
        im.iloc[0] = cal_abs_shap(X, y, index,model_name=model_name)

        print(f"{model_name}: Original, Done!")

        for i in range(M):
            file = f'{kopath}/X_k{i + 1}.csv'
            data = pd.read_csv(file)
            im.iloc[i+1] = cal_abs_shap(data,y,index,model_name=model_name)
            print(f"{model_name}: Index: {i + 1}, Done!")

        file_path = f"{outpath}/{model_name}_fi.csv"
        im.to_csv(file_path, index=False)
        print(f"{model_name}: Write FI, Done!")
    

In [None]:
X = pd.read_csv('/Data/X.csv')
y = pd.read_csv('/Data/y.csv')
index = pd.read_csv('/Data/Index.csv', header=None)
calculate_fi(X, y, index, models=['catb', 'ligb', 'xgb', 'gbdt', 'rf'], M=5, kopath="", outpath="")