# Apply OASIS+ model to a test set

- This notebook and associated OASIS+ model files allow the application of OASIS+ to an external test set.
- The test data should be provided in a comma separated file and the names of the 10 clinical variables used by OASIS+ should follow the names in oasis_variables_dict.
- If the true in-hospital mortality labels are included, the script will report performance using five performance metrisc (accuracy, sensitivity, specificity, Mathew's correlation coffecients, and AUC).
- This code will run properly in an environment that the libraries in requirements.txt installed.
- If you used this code, please cite [1] in README.md file
- If you have questions or suggestions, please email yelmanzalawi@geisinger.edu

In [None]:
# Copyright (c)  2020, Integrative Informatics Research Lab, Geisinger Clinic
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#     * Redistributions of source code must retain the above copyright
#       notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above copyright
#       notice, this list of conditions and the following disclaimer in the
#       documentation and/or other materials provided with the distribution.
#     * Neither the name of the <organization> nor the
#       names of its contributors may be used to endorse or promote products
#       derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL Yasser El-Manzalawy BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

In [None]:
%pip install numpy
%pip install joblib
%pip install pandas
%pip install scikit-learn

import numpy as np
from joblib import load
import pandas as pd

from sklearn.metrics import roc_curve, matthews_corrcoef, accuracy_score, roc_auc_score, confusion_matrix

In [None]:
# Helping functions

def impute_data(in_file):
    """
    Load and impute test data.
    :param in_file csv file including test data. The 10 clinical variables used for computing OASIS score should be named as shown in the oasis_variables_dict.
    :return: imputed data as a DataFrame object
    """
    oasis_variables_dict = {
     'oasis_age': 21.5,  # normal range is [18, 24]
     'oasis_gcs': 15,   # normal range is 15
     'oasis_heartrate': 60.5, # normal range is [33,88]
     'oasis_meanbp': 102.34,  # normal range is [61.33,143.44]
     'oasis_resprate': 17.5,  # normal range is [13,22]
     'oasis_temp': 36.64,  # normal range is [36.40, 36.88]
     'oasis_urineoutput': 4720,  # normal range is [2544, 6896]
     'oasis_mechvent': 0,   # 'No' correspnds to zero subscore
     'oasis_electivesurgery': 1,  # 'Yes' corresponds to zero subscore
     'oasis_preiculos': 14.5    # normal range is [4.95, 24]
    }
    df = pd.read_csv(in_file)
    for key, value in oasis_variables_dict.items():
        df[key] = np.nan_to_num(df[key].values, value)
    df = df.fillna(0)
    return df
    
def get_labels (probs, cutoff=0.5):
    """
    Binarize predicted probabilities using the specified threhsold
    :param probs: Predicted probabilities.
    :param cutoff: Threshold for converting predicted probabilities into binary predictions.
    :return: Binary labels.
    """
    size  = np.shape(probs)[0]
    labels = np.zeros(size)
    for i in range (size):
        if (probs[i] < cutoff):
            labels[i] = 0
        else:
            labels[i] = 1
    return labels

def evaluate(Y_true, Y_pred, cutoff=0.5):
    """
    Given true and predicted probabilities, return Accuracy, Sensitivity, Specificity,  Matthew Correlation Coefficients, and AUC scores.
    :param Y_true:
    :param Y_pred:
    :param cutoff:
    :return: Accuracy, Sensitivity, Specificity,  Matthew Correlation Coefficients, and AUC scores.
    """
    Y_score = get_labels(Y_pred, cutoff)
    mcc = matthews_corrcoef(Y_true, Y_score)
    acc = accuracy_score(Y_true, Y_score)
    auc = roc_auc_score(Y_true, Y_pred)
    cm = confusion_matrix(Y_true, Y_score, labels=[1,0])
    #print(cm)
    tp = cm[0,0]
    fp = cm[1,0]
    tn = cm[1,1]
    fn = cm[0,1]
    ap = tp + fn
    an = tn + fp
    total = ap + an
    # compute Sn and Sp
    sn = tp/ap
    sp = tn/an

    # return TP, FN, FP, TN, total, acc, Sn, Sp, MCC, AUC
    return  np.array([acc, sn, sp, mcc, auc])

## Parameters

In [None]:
# Parameters user-specific parameters

# test data
test_file = './data/meta_severity_clean_test.csv'  # Please insert the file name and path (e.g., ./data/meta_severity_clean_test.csv)
test_lbl = 'in_hospital_mortality'   # Please insert the name of the column including the true labels, otherwise use None
threshold = 0.10     # Please insert the threshold for binarizing OASIS+ scores (default is 0.10)

# file for saving OASIS+ predicted scores
out_file = './data/oasis_preds.csv'   # Please insert the file name and path (e.g., ./data/oasis_preds.csv)

### You might not change the code after this line

In [None]:
# OASIS+ parameters 
model_file = './deploy/oasis_xgb200.joblib'
filter_file = './deploy/oasis_filter.joblib'

oasis_variables = [
 'oasis_age',
 'oasis_gcs',
 'oasis_heartrate',
 'oasis_meanbp',
 'oasis_resprate',
 'oasis_temp',
 'oasis_urineoutput',
 'oasis_mechvent',
 'oasis_electivesurgery', 
 'oasis_preiculos'
]

In [None]:
# load OASIS+ model

oasis_model = load(model_file)
oasis_filter = load(filter_file)


# load and impute test data
test_df = impute_data(test_file)
X_test = test_df[oasis_variables].values
if test_lbl is not None:
    y_test = test_df[test_lbl]
    
# normalize data
X_transformed = oasis_filter.transform(X_test)

# get predictions
preds = oasis_model.predict_proba(X_transformed)[:,1]
pred_lbls = np.zeros_like(preds, dtype=int)
pred_lbls[np.where(preds>=threshold)] = 1

# save predictions
out_df = pd.DataFrame(preds, columns=['OASIS+'])
out_df['in-hospital_mortality'] = pred_lbls
out_df.to_csv(out_file, index=False)
print(out_file + ' saved!')

./data/oasis_preds.csv saved!


In [None]:
# Report performance if labels of test data are provided

if test_lbl is not None:
    print ('ACC Sn Sp MCC AUC')
    print(evaluate(y_test, preds, cutoff=threshold))

ACC Sn Sp MCC AUC
[0.73384905 0.77733598 0.72873832 0.33159336 0.82771577]
