In [2]:
import os
import pickle
import sys

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix, roc_auc_score, average_precision_score, f1_score, accuracy_score, roc_curve
from sklearn.metrics import precision_recall_curve
from scipy import stats

import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras.models import load_model

from hnet import AppendNet

def load_pretrained_model(pre_trained_loc="./PCLR.h5") :
    pre_trained_model = load_model(pre_trained_loc)
    
    return pre_trained_model

def do_bootstrap(pred_vals, trues, threshold=0.5, n=1000):
    auc_list = []
    apr_list = []
    acc_list = []
    f1_list = []
    
    preds = np.array(pred_vals > threshold).astype(int)
    
    rng = np.random.RandomState(seed=1)
    for _ in range(n):
        idxs = rng.choice(len(trues), size=len(trues), replace=True)
        pred_arr= preds[idxs]
        true_arr = trues[idxs]
        pred_val_arr = pred_vals[idxs]

        auc = roc_auc_score(true_arr, pred_arr)
        apr = average_precision_score(true_arr, pred_arr)
        acc = accuracy_score(true_arr, pred_val_arr)
        f1 = f1_score(true_arr, pred_val_arr)

        auc_list.append(auc)
        apr_list.append(apr)
        acc_list.append(acc)
        f1_list.append(f1)

    return np.array(auc_list), np.array(apr_list), np.array(acc_list), np.array(f1_list)

def confidence_interval(values, alpha=0.95):
    lower = np.percentile(values, (1-alpha)/2 * 100)
    upper = np.percentile(values, (alpha + (1-alpha)/2) * 100)
    return lower, upper

In [18]:
def get_ecg(df):
    ecgs = []
    for idx in df.index:
        row = df.loc[idx]
        qid = row['QuantaID']
        doc = row['Date_of_Cath']
        fname = f'/storage/shared/apollo/same-day/{qid}_{doc}.csv'
        x = pd.read_csv(fname).values[...,1:].astype(np.float32)
        x /= 1000
        x = x[:4096, :].T
        ecgs.append(x)
        
    ecgs = np.array(ecgs)
    return np.transpose(ecgs, (0,2,1))

def get_data(batch_size=64):
    df_tab = pd.read_csv(os.path.join('/storage/shared/apollo/same-day/tabular_data.csv'))
    train_ids = np.load("./stores/train_ids.npy")
    val_ids = np.load("./stores/val_ids.npy")
    test_ids = np.load("./stores/test_ids.npy")

    train_ids = train_ids[len(train_ids) // 2 :]
    val_ids = val_ids[len(val_ids) // 2 :]
    test_ids = test_ids[len(test_ids) // 2 :]

    train_df = df_tab[df_tab["QuantaID"].isin(train_ids)]
    val_df = df_tab[df_tab["QuantaID"].isin(val_ids)]
    test_df = df_tab[df_tab["QuantaID"].isin(test_ids)]
    print(len(train_df), len(val_df), len(test_df))

    X_train = get_ecg(train_df)
    X_val = get_ecg(val_df)
    X_test = get_ecg(test_df)

    y_train = (train_df["PCWP_mean"].values >= 18)
    y_val = (val_df["PCWP_mean"].values >= 18)
    y_test = (test_df["PCWP_mean"].values >= 18)

    return X_train, y_train, X_val, y_val, X_test, y_test

In [19]:
os.environ["CUDA_VISIBLE_DEVICES"]="3"

pre_trained_model = load_pretrained_model(pre_trained_loc='./PCLR.h5')
latent = tf.keras.Model(pre_trained_model.inputs, pre_trained_model.get_layer('embed').output)
full_model = AppendNet(latent, new_layers = [128, 1], classification=True)

optimizer = tf.keras.optimizers.Adam() # can modify LR, of course
# loss_fn = tf.keras.losses.BinaryCrossentropy()
loss_fn = rmse_loss

X_train, y_train, X_val, y_val, X_test, y_test = get_data()

2442 893 923


In [20]:
epochs = 50
full_model.compile(optimizer, loss_fn)
full_model.fit(X_train, y_train, epochs=epochs)

Epoch 1/50


TypeError: in user code:

    File "/storage/araghu/.conda/envs/hfnet/lib/python3.9/site-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/tmp/ipykernel_3922324/167188699.py", line 42, in rmse_loss  *
        return tf.sqrt(tf.reduce_mean(tf.square(y_true - y_pred)))

    TypeError: Value passed to parameter 'x' has DataType bool not in list of allowed values: bfloat16, float16, float32, float64, uint8, int8, uint16, int16, int32, int64, complex64, complex128, uint32, uint64


In [6]:
y_pred = full_model.predict(X_test)



In [31]:
full_model.save('./PCLR_finetuned_50epc.pb', save_format='tf')



INFO:tensorflow:Assets written to: ./PCLR_finetuned_50epc.pb/assets


INFO:tensorflow:Assets written to: ./PCLR_finetuned_50epc.pb/assets


In [None]:
# Load saved tf model
loaded_model = tf.keras.models.load_model('./PCLR_finetuned.pb')

# Calculate Classification Performance

In [17]:
auc, apr, acc, f1 = do_bootstrap(y_pred, y_test)
print(confidence_interval(auc), confidence_interval(apr), confidence_interval(acc), confidence_interval(f1))

ValueError: multi_class must be in ('ovo', 'ovr')