In [1]:
import pandas as pd
import pickle
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder, RobustScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import make_pipeline
from sklearn.metrics import make_scorer, accuracy_score, f1_score, precision_score, recall_score
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer, SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.inspection import permutation_importance
from xgboost import XGBClassifier

In [2]:
# Unpickling results file
with open('model_results_immune_90.pkl', 'rb') as f:
    model_results_immune_90 = pickle.load(f)

configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

    https://xgboost.readthedocs.io/en/stable/tutorials/saving_model.html

for more details about differences between saving model and serializing.



In [3]:
# Printing results
for model in model_results_immune_90.keys():
    print(model)
    print(model_results_immune_90[model]['metrics'])

XGB
{'accuracy': 0.7035175879396985, 'f1': 0.44339622641509435, 'precision': 0.3333333333333333, 'recall': 0.6619718309859155, 'rel_impr_accuracy': -0.14373088685015278, 'rel_impr_f1': 443396226415.09436}
KNN
{'accuracy': 0.7587939698492462, 'f1': 0.29411764705882354, 'precision': 0.3076923076923077, 'recall': 0.28169014084507044, 'rel_impr_accuracy': -0.0764525993883792, 'rel_impr_f1': 294117647058.82355}
SVC Linear
{'accuracy': 0.6582914572864321, 'f1': 0.48091603053435117, 'precision': 0.3298429319371728, 'recall': 0.8873239436619719, 'rel_impr_accuracy': -0.19877675840978593, 'rel_impr_f1': 480916030534.3512}
SVC RBF
{'accuracy': 0.6834170854271356, 'f1': 0.475, 'precision': 0.33727810650887574, 'recall': 0.8028169014084507, 'rel_impr_accuracy': -0.16819571865443425, 'rel_impr_f1': 475000000000.0}
SVC Poly
{'accuracy': 0.6608040201005025, 'f1': 0.4866920152091255, 'precision': 0.3333333333333333, 'recall': 0.9014084507042254, 'rel_impr_accuracy': -0.19571865443425068, 'rel_impr_f1'

In [None]:
# SVC Polynomial had the best recall
my_model = model_results_immune_90['SVC Poly']
best_model= my_model['best_model']
X_test = my_model['X_test_raw']
y_test = my_model['y_test']

# Compute permutation importance
results = permutation_importance(best_model, X_test, y_test, n_repeats=10, random_state=42)
print(len(results.importances_mean))

# Combine feature names and importance
importances = pd.DataFrame({
    'feature': X_test.columns,
    'importance_mean': results.importances_mean,
    'importance_std': results.importances_std
}).sort_values(by='importance_mean', ascending=False)

print(importances)

364
14
                      feature  importance_mean  importance_std
3                   insurance         0.004774        0.003067
1           admit_provider_id         0.003769        0.001685
2          admission_location         0.003015        0.001508
10                       SIRI         0.001759        0.001151
6                        race         0.001256        0.001256
11    Absolute Monocyte Count         0.001256        0.001685
5              marital_status         0.001005        0.001231
13  Absolute Neutrophil Count         0.001005        0.001667
0              admission_type         0.000754        0.001151
7                      gender         0.000503        0.001005
4                    language         0.000251        0.000754
12  Absolute Lymphocyte Count         0.000000        0.000000
8              first_careunit        -0.028392        0.009538
9               last_careunit        -0.028392        0.009538
