In [18]:
import random
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

In [19]:
data_df = pd.read_csv('preprocessed_data.csv')

In [20]:
X_signal = np.load('X_signal.npy')

In [21]:
random.seed(41)
dataset_length = len(data_df)
test_size = int(0.1 * dataset_length)
test_indices = random.sample(range(dataset_length), test_size)
test_indices

[390,
 340,
 236,
 170,
 394,
 591,
 290,
 566,
 283,
 392,
 590,
 9,
 255,
 18,
 449,
 158,
 153,
 326,
 171,
 263,
 597,
 60,
 122,
 601,
 33,
 442,
 288,
 221,
 74,
 368,
 486,
 128,
 568,
 126,
 101,
 147,
 182,
 54,
 272,
 325,
 230,
 26,
 186,
 445,
 58,
 308,
 409,
 149,
 109,
 95,
 416,
 22,
 505,
 404,
 235,
 87,
 138,
 79,
 469,
 411]

In [22]:
# Common y test
y_test = LabelEncoder().fit_transform(data_df['Outcome'][test_indices])
y_test.shape

(60,)

## For XGBoost

In [23]:
sub_data_df = data_df.iloc[test_indices]

In [None]:
from sklearn.impute import KNNImputer
sub_data_df.drop(['Patient', 'ROSC', 'CPC'], axis=1, inplace=True)

# Fill missing values in categorical columns with mode
categorical_columns = ['Sex', 'OHCA', 'VFib']
for col in categorical_columns:
    sub_data_df[col].fillna(sub_data_df[col].mode().iloc[0], inplace=True)

imputer = KNNImputer(n_neighbors=5)
sub_data_df['TTM'] = imputer.fit_transform(sub_data_df[['TTM']])
sub_data_df['Age'] = imputer.fit_transform(sub_data_df[['Age']])

In [25]:
# Take x test from data_df
X_test_1 = sub_data_df.drop(['Outcome'], axis=1)
X_test_1 = pd.get_dummies(X_test_1, drop_first=True, dtype=int)

In [26]:
X_test_1.shape

(60, 905)

## For LSTM

In [27]:
X_sub_signal = X_signal[test_indices]
X_test_2 = X_sub_signal.reshape(X_sub_signal.shape[0], X_sub_signal.shape[2], X_sub_signal.shape[1])

In [28]:
X_test_2.shape

(60, 30000, 18)

## Combine predictions from both

In [29]:
# load xgboost model
import joblib
xgboost_model = joblib.load('best_xgb_model.joblib')

In [30]:
y_proba_1 = xgboost_model.predict_proba(X_test_1)[:,1]
y_proba_1.shape

(60,)

In [31]:
# load LSTM model
import tensorflow as tf
custom_lstm_model = tf.keras.models.load_model('custom_lstm_model.h5')

In [32]:
y_proba_2 = custom_lstm_model.predict(X_test_2).squeeze()
y_proba_2.shape



(60,)

In [33]:
average_pred_probs = (y_proba_1 + y_proba_2) / 2
y_proba = np.round(average_pred_probs)

In [34]:
from sklearn.metrics import classification_report

y_pred = (y_proba > 0.5).astype(int)
# Print the classification report
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.91      0.84      0.87        25
           1       0.89      0.94      0.92        35

    accuracy                           0.90        60
   macro avg       0.90      0.89      0.90        60
weighted avg       0.90      0.90      0.90        60

