In [5]:
import pandas as pd
import pickle
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder, RobustScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.metrics import make_scorer, accuracy_score, f1_score, precision_score, recall_score
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.inspection import permutation_importance
from xgboost import XGBClassifier

In [16]:
# Unpickling results files
with open('model_results_immune_90.pkl', 'rb') as f:
    model_results_immune_90 = pickle.load(f)

with open('model_results_immune_30.pkl', 'rb') as f:
    model_results_immune_30 = pickle.load(f)

with open('model_results_all_90.pkl', 'rb') as f:
    model_results_all_90 = pickle.load(f)

with open('model_results_all_30.pkl', 'rb') as f:
    model_results_all_30 = pickle.load(f)

**90-day mortality; immune features**

In [7]:
# Printing results
for model in model_results_immune_90.keys():
    print(model)
    print(model_results_immune_90[model]['metrics'])

XGB
{'accuracy': 0.5728643216080402, 'f1': 0.41379310344827586, 'precision': 0.273972602739726, 'recall': 0.8450704225352113, 'rel_impr_accuracy': -0.30275229357798156, 'rel_impr_f1': 413793103448.2759}
KNN
{'accuracy': 0.7437185929648241, 'f1': 0.23880597014925373, 'precision': 0.25396825396825395, 'recall': 0.22535211267605634, 'rel_impr_accuracy': -0.09480122324159015, 'rel_impr_f1': 238805970149.25372}
SVC Linear
{'accuracy': 0.7864321608040201, 'f1': 0.41379310344827586, 'precision': 0.40540540540540543, 'recall': 0.4225352112676056, 'rel_impr_accuracy': -0.04281345565749227, 'rel_impr_f1': 413793103448.2759}
SVC RBF
{'accuracy': 0.7160804020100503, 'f1': 0.45410628019323673, 'precision': 0.34558823529411764, 'recall': 0.6619718309859155, 'rel_impr_accuracy': -0.12844036697247696, 'rel_impr_f1': 454106280193.23676}
SVC Poly
{'accuracy': 0.6633165829145728, 'f1': 0.45528455284552843, 'precision': 0.32, 'recall': 0.7887323943661971, 'rel_impr_accuracy': -0.19266055045871555, 'rel_im

In [10]:
# SVC Polynomial had the best f1 Score
my_model = model_results_immune_90['SVC Poly']
best_model= my_model['best_model']
X_test = my_model['X_test_raw']
y_test = my_model['y_test']

# Compute permutation importance
results = permutation_importance(best_model, X_test, y_test, n_repeats=10, random_state=42)
print(len(results.importances_mean))

# Combine feature names and importance
importances = pd.DataFrame({
    'feature': X_test.columns,
    'importance_mean': results.importances_mean,
    'importance_std': results.importances_std
}).sort_values(by='importance_mean', ascending=False)

print(importances)

4
                     feature  importance_mean  importance_std
2  Absolute Lymphocyte Count         0.112563        0.021841
1    Absolute Monocyte Count         0.038693        0.022951
3  Absolute Neutrophil Count         0.035176        0.009059
0                       SIRI        -0.002261        0.008291


In [None]:
# XGB had the best recall
my_model = model_results_immune_90['XGB']
best_model= my_model['best_model']

xgb_clf = best_model.named_steps['model']
xgb_importances = xgb_clf.feature_importances_

transformer = best_model.named_steps['preprocessor']
feature_names = transformer.get_feature_names_out()

import pandas as pd

feat_imp_df = (
    pd.DataFrame({
        'feature': feature_names,
        'importance': xgb_importances
    })
    .sort_values(by='importance', ascending=False)
)

print(feat_imp_df.head())

                          feature  importance
2  num__Absolute Lymphocyte Count    0.318746
0                       num__SIRI    0.284477
3  num__Absolute Neutrophil Count    0.217734
1    num__Absolute Monocyte Count    0.179043


**90-day mortality; all features**

In [27]:
# Printing results
for model in model_results_all_90.keys():
    print(model)
    print(model_results_all_90[model]['metrics'])

XGB
{'accuracy': 0.8090452261306532, 'f1': 0.5529411764705883, 'precision': 0.47474747474747475, 'recall': 0.6619718309859155, 'rel_impr_accuracy': -0.01529051987767584, 'rel_impr_f1': 552941176470.5883}
KNN
{'accuracy': 0.8190954773869347, 'f1': 0.3333333333333333, 'precision': 0.4864864864864865, 'recall': 0.2535211267605634, 'rel_impr_accuracy': -0.003058103975535114, 'rel_impr_f1': 333333333333.3333}
SVC Linear
{'accuracy': 0.7763819095477387, 'f1': 0.5658536585365853, 'precision': 0.43283582089552236, 'recall': 0.8169014084507042, 'rel_impr_accuracy': -0.055045871559632996, 'rel_impr_f1': 565853658536.5853}
SVC RBF
{'accuracy': 0.7964824120603015, 'f1': 0.5621621621621622, 'precision': 0.45614035087719296, 'recall': 0.7323943661971831, 'rel_impr_accuracy': -0.03058103975535168, 'rel_impr_f1': 562162162162.1622}
SVC Poly
{'accuracy': 0.7713567839195979, 'f1': 0.5560975609756098, 'precision': 0.4253731343283582, 'recall': 0.8028169014084507, 'rel_impr_accuracy': -0.06116207951070336

In [31]:
# SVC Linear had the best f1 score and recall
my_model = model_results_all_90['SVC Linear']
best_model= my_model['best_model']

svc = best_model.named_steps['model']

# Get the feature names after preprocessing
transformer = best_model.named_steps['preprocessor']
feature_names = transformer.get_feature_names_out()

# Get coefficients and intercept
coefs = svc.coef_.flatten()
intercept = svc.intercept_[0]

# Combine into a DataFrame for readability
coef_df = pd.DataFrame({
    'feature': feature_names,
    'coefficient': coefs
}).sort_values('coefficient', ascending=False)

print(coef_df.head(20))

                                               feature  coefficient
17                                        num__Lactate     0.327665
20                                            num__RDW     0.265036
1                                             num__los     0.252692
7                                       num__Anion Gap     0.245467
23                                  num__Urea Nitrogen     0.241819
29                        cat__admission_type_EW EMER.     0.208007
15                          num__Immature Granulocytes     0.164048
0                                      num__anchor_age     0.153284
377  cat__last_careunit_Medical/Surgical Intensive ...     0.149699
365  cat__first_careunit_Medical/Surgical Intensive...     0.149699
13                                              num__I     0.145596
328                        cat__marital_status_missing     0.144567
303                            cat__insurance_Medicare     0.120562
115                      cat__admit_provider_id_

**30-day mortality; immune features**

**30-day mortality; all features**

In [32]:
# Printing results
for model in model_results_all_30.keys():
    print(model)
    print(model_results_all_30[model]['metrics'])

XGB
{'accuracy': 0.8109243697478992, 'f1': 0.5054945054945055, 'precision': 0.4423076923076923, 'recall': 0.5897435897435898, 'rel_impr_accuracy': -0.030150753768844244, 'rel_impr_f1': 505494505494.5055}
KNN
{'accuracy': 0.8550420168067226, 'f1': 0.4566929133858268, 'precision': 0.5918367346938775, 'recall': 0.3717948717948718, 'rel_impr_accuracy': 0.022613065326633083, 'rel_impr_f1': 456692913385.8268}
SVC Linear
{'accuracy': 0.7521008403361344, 'f1': 0.49572649572649574, 'precision': 0.3717948717948718, 'recall': 0.7435897435897436, 'rel_impr_accuracy': -0.1005025125628141, 'rel_impr_f1': 495726495726.4957}
SVC RBF
{'accuracy': 0.7941176470588235, 'f1': 0.5196078431372549, 'precision': 0.42063492063492064, 'recall': 0.6794871794871795, 'rel_impr_accuracy': -0.05025125628140712, 'rel_impr_f1': 519607843137.25494}
SVC Poly
{'accuracy': 0.7668067226890757, 'f1': 0.5194805194805194, 'precision': 0.39215686274509803, 'recall': 0.7692307692307693, 'rel_impr_accuracy': -0.08291457286432158,

In [33]:
# SVC RBF had best f1 score
my_model = model_results_all_30['SVC RBF']
best_model= my_model['best_model']
X_test = my_model['X_test_raw']
y_test = my_model['y_test']

# Compute permutation importance
results = permutation_importance(best_model, X_test, y_test, n_repeats=10, random_state=42)
print(len(results.importances_mean))

# Combine feature names and importance
importances = pd.DataFrame({
    'feature': X_test.columns,
    'importance_mean': results.importances_mean,
    'importance_std': results.importances_std
}).sort_values(by='importance_mean', ascending=False)

print(importances)

35
                      feature  importance_mean  importance_std
27                    Lactate     2.310924e-02        0.003255
17                  Anion Gap     1.155462e-02        0.006593
11                        los     1.155462e-02        0.005734
26                          L     4.411765e-03        0.003037
29             Platelet Count     4.201681e-03        0.005146
31            Red Blood Cells     3.151261e-03        0.004904
25      Immature Granulocytes     2.941176e-03        0.005077
30                        RDW     2.731092e-03        0.010464
15    Absolute Monocyte Count     2.521008e-03        0.003851
22                 Hemoglobin     2.310924e-03        0.005750
20                 Creatinine     1.470588e-03        0.005482
18                Base Excess     1.260504e-03        0.005163
28                        PTT     1.050420e-03        0.002857
24                    INR(PT)     8.403361e-04        0.002521
23                          I    -1.110223e-17      

In [34]:
# SVC Poly had best recall
my_model = model_results_all_30['SVC Poly']
best_model= my_model['best_model']
X_test = my_model['X_test_raw']
y_test = my_model['y_test']

# Compute permutation importance
results = permutation_importance(best_model, X_test, y_test, n_repeats=10, random_state=42)
print(len(results.importances_mean))

# Combine feature names and importance
importances = pd.DataFrame({
    'feature': X_test.columns,
    'importance_mean': results.importances_mean,
    'importance_std': results.importances_std
}).sort_values(by='importance_mean', ascending=False)

print(importances)

35
                      feature  importance_mean  importance_std
27                    Lactate     3.907563e-02        0.007759
17                  Anion Gap     1.281513e-02        0.007121
5              marital_status     1.260504e-02        0.003515
33              Urea Nitrogen     9.033613e-03        0.007863
19                Bicarbonate     8.193277e-03        0.003693
26                          L     7.563025e-03        0.004525
20                 Creatinine     7.352941e-03        0.005167
29             Platelet Count     6.932773e-03        0.004886
22                 Hemoglobin     6.932773e-03        0.005064
2          admission_location     3.991597e-03        0.003571
12    Absolute Basophil Count     2.731092e-03        0.001345
31            Red Blood Cells     2.310924e-03        0.002731
3                   insurance     1.050420e-03        0.004530
16  Absolute Neutrophil Count     8.403361e-04        0.001394
13  Absolute Eosinophil Count     4.440892e-17      