## Threshold Analysis

In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm
from pathlib import Path
from scipy import stats
from sklearn.metrics import roc_curve, auc 

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# Find the accuracy on only the subset of samples which the clinicians did their manual analysis on

def get_clinician_patients(clinician_file="/deep/group/ed-monitor/experiments-v8/prna-128-15/fold_8/outfiles/test.csv",
                           matched_file="/deep/u/tomjin/aihc-win21-ed-monitor/scripts/v8/cohort_matched_output_v8.csv"):
    df_clinician = pd.read_csv(clinician_file)
    df_matched = pd.read_csv(matched_file, delimiter=",")[["CSN", "CaseID", "Age", "Gender", "SpO2", "SBP", "DBP", "RR", "HR", "Temp"]]
    df_clinician = pd.merge(df_clinician, df_matched, how="left", left_on="patient_id", right_on="CaseID")
    clinician_pts = set(df_clinician["CSN"].tolist())
    return clinician_pts

### Youden's Index (ECG Only)

#### 1WPP

In [None]:

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-1wpp/II/final-transformer-64/waveform-only/val.csv")
df_cohort = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/consolidated.filtered.val.txt", sep="\t")
df = pd.merge(df, df_cohort, on="patient_id")
print(f"Pos = {sum(df['outcome'].tolist())}")
df.head(1)


In [5]:
# Find average threshold on validation set

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-1wpp/II/final-transformer-64/waveform-only/val.csv")
df_cohort = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/consolidated.filtered.val.txt", sep="\t")
df = pd.merge(df, df_cohort, on="patient_id")
df_neg = df[df["outcome"] == 0]
df_pos = df[df["outcome"] == 1]

thresholds = []
fpr, tpr, ths = roc_curve(df["outcome"], df["preds"])
J = tpr - fpr
ix = np.argmax(J)
best_thresh = ths[ix]
thresholds.append(best_thresh.item())

# thresholds = []
# for i in tqdm(range(10000)):
#     df_neg_s = df_neg.sample(n=150, random_state=i)
#     df_pos_s = df_pos.sample(n=150, random_state=i)

#     df = pd.concat([df_neg_s, df_pos_s])

#     testy = df["actual"]
#     yhat = df["preds"]

#     # calculate roc curves
#     fpr, tpr, ths = roc_curve(testy, yhat)

#     J = tpr - fpr
#     ix = np.argmax(J)
#     best_thresh = ths[ix]
#     thresholds.append(best_thresh.item())

print('Best Threshold=%f' % (np.mean(thresholds)))


Best Threshold=0.089680


In [6]:
best_thresh = np.mean(thresholds)
print(best_thresh)

0.0896803587675094


In [17]:
best_thresh = 0.096377

In [7]:
# Apply threshold to test (all patients)

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-1wpp/II/final-transformer-64/waveform-only/test.csv")
testy = df["actual"]
yhat = df["preds"]

preds_t = (df["preds"] >= best_thresh).to_numpy()
accuracy = sum(preds_t == df["actual"]) / len(df["actual"])
print(f"Average accuracies = {accuracy}")


Average accuracies = 0.6563421828908554


In [13]:
best_thresh

0.0896803587675094

In [14]:
# Apply threshold to test (filtered by clinician patients)

clinician_pts = get_clinician_patients(clinician_file="/deep/group/ed-monitor/experiments-v8/prna-128-15/fold_8/outfiles/test.csv")
df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-1wpp/II/final-transformer-64/waveform-only/test.csv")
df = df[df.apply(lambda x: x['patient_id'] in clinician_pts, axis=1)]
print(df.shape)
testy = df["actual"]
yhat = df["preds"]

# preds_t = (df["preds"] >= best_thresh).to_numpy()
preds_t = (df["preds"] >= best_thresh).to_numpy()
accuracy = sum(preds_t == df["actual"]) / len(df["actual"])
print(f"Average accuracies = {accuracy}")


(200, 3)
Average accuracies = 0.675


#### 10WPP

In [None]:

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-only/val.csv")
df_cohort = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/consolidated.filtered.val.txt", sep="\t")
df = pd.merge(df, df_cohort, on="patient_id")
print(f"Pos = {sum(df['outcome'].tolist())}")
df.head(1)


In [16]:
# Find average threshold on validation set

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-only/val.csv")
df_cohort = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/consolidated.filtered.val.txt", sep="\t")
df = pd.merge(df, df_cohort, on="patient_id")
df_neg = df[df["outcome"] == 0]
df_pos = df[df["outcome"] == 1]

thresholds = []
for i in tqdm(range(10000)):
    df_neg_s = df_neg.sample(n=150, random_state=i)
    df_pos_s = df_pos.sample(n=150, random_state=i)

    df = pd.concat([df_neg_s, df_pos_s])

    testy = df["actual"]
    yhat = df["preds"]

    # calculate roc curves
    fpr, tpr, ths = roc_curve(testy, yhat)

    J = tpr - fpr
    ix = np.argmax(J)
    best_thresh = ths[ix]
    thresholds.append(best_thresh.item())

print('Best Threshold=%f' % (np.mean(thresholds)))


100%|██████████| 10000/10000 [03:01<00:00, 54.96it/s]

Best Threshold=0.096377





In [30]:
best_thresh = np.mean(thresholds)
print(best_thresh)

0.09560824593156574


In [17]:
best_thresh = 0.096377

In [20]:
# Apply threshold to test (all patients)

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-only/test.csv")
testy = df["actual"]
yhat = df["preds"]

preds_t = (df["preds"] >= best_thresh).to_numpy()
accuracy = sum(preds_t == df["actual"]) / len(df["actual"])
print(f"Average accuracies = {accuracy}")


Average accuracies = 0.6998525073746312


In [21]:
# Apply threshold to test (filtered by clinician patients)

clinician_pts = get_clinician_patients(clinician_file="/deep/group/ed-monitor/experiments-v8/prna-128-15/fold_8/outfiles/test.csv")
df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-only/test.csv")
df = df[df.apply(lambda x: x['patient_id'] in clinician_pts, axis=1)]
print(df.shape)
testy = df["actual"]
yhat = df["preds"]

preds_t = (df["preds"] >= best_thresh).to_numpy()
accuracy = sum(preds_t == df["actual"]) / len(df["actual"])
print(f"Average accuracies = {accuracy}")


(200, 3)
Average accuracies = 0.675


In [None]:
df_cohort = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/consolidated.filtered.test.txt", sep="\t")
df = pd.merge(df, df_cohort, on="patient_id")
df.head(2)

In [None]:
df_offsets = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/waveforms/15sec-500hz-1norm-10wpp/II/summary.csv")
df_offsets.head(13)

In [3]:
"""
Custom post-processing of the 12-lead ECG waveforms
"""

from biosppy.signals.tools import filter_signal
import numpy as np


def normalize(seq, smooth=1e-8):
    """
    Normalize each sequence between -1 and 1
    """
    return 2 * (seq - np.min(seq)) / (np.max(seq) - np.min(seq) + smooth) - 1


def apply_filter(signal, filter_bandwidth=[3, 45], fs=500):
    # Calculate filter order
    order = int(0.3 * fs)
    # Filter signal
    try:
        signal, _, _ = filter_signal(signal=signal, ftype='FIR', band='bandpass',
                                     order=order, frequency=filter_bandwidth,
                                     sampling_rate=fs)
    except:
        pass

    return signal


### Metrics Functions

In [7]:
def calculate_metrics(tp, fp, fn, tn):
    m1 = tp + fn
    m2 = fp + tn
    n1 = tp + fp
    n2 = fn + tn
    sen = tp / m1
    sen_se = np.sqrt((sen * (1 - sen)) / m1)
    sen_ci = (sen - 1.96 * sen_se, sen + 1.96 * sen_se)
#     print("sensitivity", sen, sen_ci)
    
    spe = tn / m2
    spe_se = np.sqrt((spe * (1 - spe)) / m2)
    spe_ci = (spe - 1.96 * spe_se, spe + 1.96 * spe_se)
#     print("specificity", spe, spe_ci)
    
    ppv = tp / n1
    ppv_se = np.sqrt((ppv * (1 - ppv)) / n1)
    ppv_ci = (ppv - 1.96 * ppv_se, ppv + 1.96 * ppv_se)
#     print("ppv", ppv, ppv_ci)
    
    npv = tn / n2
    npv_se = np.sqrt((npv * (1 - npv)) / n2)
    npv_ci = (npv - 1.96 * npv_se, npv + 1.96 * npv_se)
#     print("npv", npv, npv_ci)
    
    return sen, sen_se, spe, spe_se, ppv, ppv_se, npv, npv_se



In [4]:
calculate_metrics(20, 180, 10, 1820)

(0.6666666666666666,
 0.08606629658238704,
 0.91,
 0.0063992187023104614,
 0.1,
 0.021213203435596427,
 0.994535519125683,
 0.0017232927439847778)

In [5]:
calculate_metrics(20, 180, 10, 1820)

(0.6666666666666666,
 0.08606629658238704,
 0.91,
 0.0063992187023104614,
 0.1,
 0.021213203435596427,
 0.994535519125683,
 0.0017232927439847778)

In [8]:
def get_confusion_matrix(preds, actual, thrs):
    preds = np.array(preds)
    actual = np.array(actual)

    preds_t = (preds >= thrs)
    accuracy = sum(preds_t == actual) / len(actual)

    tp = sum(np.array(preds_t == 1) & np.array(actual == 1))
    fp = sum(np.array(preds_t == 1) & np.array(actual == 0))
    fn = sum(np.array(preds_t == 0) & np.array(actual == 1))
    tn = sum(np.array(preds_t == 0) & np.array(actual == 0))

    return tp, fp, fn, tn

In [9]:
def get_bootstrap_metrics(orig_preds, orig_actual, thrs, bootstrap_samples=10000, negative_class_ratio_increase=None):
    bootstrap_sensitivities = []
    bootstrap_specificities = []
    bootstrap_ppvs = []
    bootstrap_npvs = []
    for j in tqdm(range(bootstrap_samples)):
        bootstrap_indices = np.random.choice(range(len(orig_preds)), size=len(orig_preds), replace=True)
        preds = [orig_preds[i] for i in bootstrap_indices]
        actual = [orig_actual[i] for i in bootstrap_indices]

        preds = np.array(preds)
        actual = np.array(actual)

#         auroc_test = calculate_output_statistics(actual, preds, plot=False, verbose=False)
        
        preds_t = (preds >= thrs)
        accuracy = sum(preds_t == actual) / len(actual)

        tp = sum(np.array(preds_t == 1) & np.array(actual == 1))
        fp = sum(np.array(preds_t == 1) & np.array(actual == 0))
        fn = sum(np.array(preds_t == 0) & np.array(actual == 1))
        tn = sum(np.array(preds_t == 0) & np.array(actual == 0))
        
        if negative_class_ratio_increase is not None:
            # Increase all "negative" actual class samples by the provided ratios
            tn = tn * negative_class_ratio_increase
            fp = fp * negative_class_ratio_increase

        sen, sen_ci, spe, spe_ci, ppv, ppv_ci, npv, npv_ci = calculate_metrics(tp, fp, fn, tn)

#         print(sen, spe, ppv, npv)
        
        bootstrap_sensitivities.append(sen)
        bootstrap_specificities.append(spe)
        bootstrap_ppvs.append(ppv)
        bootstrap_npvs.append(npv)
    return bootstrap_sensitivities, bootstrap_specificities, bootstrap_ppvs, bootstrap_npvs


In [10]:
def get_analytical_metrics(orig_preds, orig_actual, thrs, negative_class_ratio_increase=None):
    preds = orig_preds
    actual = orig_actual

    preds = np.array(preds)
    actual = np.array(actual)

    preds_t = (preds >= np.mean(thrs))
    accuracy = sum(preds_t == actual) / len(actual)

    tp = sum(np.array(preds_t == 1) & np.array(actual == 1))
    fp = sum(np.array(preds_t == 1) & np.array(actual == 0))
    fn = sum(np.array(preds_t == 0) & np.array(actual == 1))
    tn = sum(np.array(preds_t == 0) & np.array(actual == 0))

    if negative_class_ratio_increase is not None:
        # Increase all "negative" actual class samples by the provided ratios
        tn = tn * negative_class_ratio_increase
        fp = fp * negative_class_ratio_increase

    sen, sen_se, spe, spe_se, ppv, ppv_se, npv, npv_se = calculate_metrics(tp, fp, fn, tn)

    return sen, sen_se, spe, spe_se, ppv, ppv_se, npv, npv_se


In [11]:
def get_basic_metrics(orig_preds, orig_actual, thrs, negative_class_ratio_increase=None):
    preds = orig_preds
    actual = orig_actual

    preds = np.array(preds)
    actual = np.array(actual)

    preds_t = (preds >= np.mean(thrs))
    accuracy = sum(preds_t == actual) / len(actual)

    tp = sum(np.array(preds_t == 1) & np.array(actual == 1))
    fp = sum(np.array(preds_t == 1) & np.array(actual == 0))
    fn = sum(np.array(preds_t == 0) & np.array(actual == 1))
    tn = sum(np.array(preds_t == 0) & np.array(actual == 0))

    if negative_class_ratio_increase is not None:
        # Increase all "negative" actual class samples by the provided ratios
        tn = tn * negative_class_ratio_increase
        fp = fp * negative_class_ratio_increase

    return tp, fp, fn, tn

## Transformer (Baseline)

### 85% Sensitivity

In [5]:
# Find average threshold on validation set

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/baseline/val.csv")
testy = df["actual"]
yhat = df["preds"]

# calculate roc curves
fpr, tpr, thresholds = roc_curve(testy, yhat)
# Find threshold at sensitivity=0.85
index = -1
for k in range(len(tpr)):
    if tpr[k] > 0.85 and index == -1:
        # Choose interval from before 0.85
        index = k - 1
    if tpr[k] == 0.85:
        index = k
if index == -1:
    assert index_alt != -1
    index = index_alt

best_thresh = thresholds[index]
best_thresh

0.3713050675330295

In [12]:
# Apply threshold to test

def print_ci(samples, alpha=0.95):
    p = ((1.0-alpha)/2.0) * 100
    lower = max(0.0, np.percentile(samples, p))
    p = (alpha+((1.0-alpha)/2.0)) * 100
    upper = min(1.0, np.percentile(samples, p))
    return lower, upper

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/baseline/test.csv")
testy = df["actual"]
yhat = df["preds"]

preds_t = (df["preds"] >= best_thresh).to_numpy()
accuracy = sum(preds_t == df["actual"]) / len(df["actual"])

bootstrap_sensitivities, bootstrap_specificities, bootstrap_ppvs, bootstrap_npvs = get_bootstrap_metrics(df["preds"], df["actual"], best_thresh)

baseline_bootstrap_ppvs = bootstrap_ppvs
baseline_bootstrap_npvs = bootstrap_npvs

tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
sen, sen_ci, spe, spe_ci, ppv, ppv_ci, npv, npv_ci = calculate_metrics(tp, fp, fn, tn)

lower, upper = print_ci(bootstrap_sensitivities)
print(f"Sensitivity: {round(sen, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_specificities)
print(f"Specificity: {round(spe, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_ppvs)
print(f"PPV: {round(ppv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_npvs)
print(f"NPV: {round(npv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")


100%|██████████| 10000/10000 [03:02<00:00, 54.70it/s]

Sensitivity: 0.8533 [0.7947, 0.9071]
Specificity: 0.4187 [0.3907, 0.4466]
PPV: 0.1544 [0.1302, 0.1795]
NPV: 0.9583 [0.9403, 0.9744]





In [12]:
df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/baseline/test.csv")
testy = df["actual"]
yhat = df["preds"]
tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
print(f"tp={tp}, fp={fp}, fn={fn}, tn={tn}")

tp=128, fp=701, fn=22, tn=505


### 95% Sensitivity

In [38]:
# Find average threshold on validation set

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/baseline/val.csv")
testy = df["actual"]
yhat = df["preds"]

# calculate roc curves
fpr, tpr, thresholds = roc_curve(testy, yhat)
# Find threshold at sensitivity=0.95
index = -1
for k in range(len(tpr)):
    if tpr[k] > 0.95 and index == -1:
        # Choose interval from before 0.95
        index = k - 1
    if tpr[k] == 0.95:
        index = k
if index == -1:
    assert index_alt != -1
    index = index_alt

best_thresh = thresholds[index]
best_thresh

0.2953649118943529

In [39]:
# Apply threshold to test

def print_ci(samples, alpha=0.95):
    p = ((1.0-alpha)/2.0) * 100
    lower = max(0.0, np.percentile(samples, p))
    p = (alpha+((1.0-alpha)/2.0)) * 100
    upper = min(1.0, np.percentile(samples, p))
    return lower, upper

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/baseline/test.csv")
testy = df["actual"]
yhat = df["preds"]

preds_t = (df["preds"] >= best_thresh).to_numpy()
accuracy = sum(preds_t == df["actual"]) / len(df["actual"])

bootstrap_sensitivities, bootstrap_specificities, bootstrap_ppvs, bootstrap_npvs = get_bootstrap_metrics(df["preds"], df["actual"], best_thresh)
baseline_95_bootstrap_ppvs = bootstrap_ppvs
baseline_95_bootstrap_npvs = bootstrap_npvs

tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
sen, sen_ci, spe, spe_ci, ppv, ppv_ci, npv, npv_ci = calculate_metrics(tp, fp, fn, tn)

lower, upper = print_ci(bootstrap_sensitivities)
print(f"Sensitivity: {round(sen, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_specificities)
print(f"Specificity: {round(spe, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_ppvs)
print(f"PPV: {round(ppv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_npvs)
print(f"NPV: {round(npv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")


100%|██████████| 10000/10000 [03:01<00:00, 55.06it/s]

Sensitivity: 0.9133 [0.8657, 0.9552]
Specificity: 0.2148 [0.1923, 0.2384]
PPV: 0.1264 [0.1072, 0.1464]
NPV: 0.9522 [0.9254, 0.9761]





In [15]:
df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/baseline/test.csv")
testy = df["actual"]
yhat = df["preds"]
tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
print(f"tp={tp}, fp={fp}, fn={fn}, tn={tn}")

tp=137, fp=947, fn=13, tn=259


### 99% Sensitivity

In [16]:
# Find average threshold on validation set

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/baseline/val.csv")
testy = df["actual"]
yhat = df["preds"]

# calculate roc curves
fpr, tpr, thresholds = roc_curve(testy, yhat)
# Find threshold at sensitivity=0.99
index = -1
for k in range(len(tpr)):
    if tpr[k] > 0.99 and index == -1:
        # Choose interval from before 0.99
        index = k - 1
    if tpr[k] == 0.99:
        index = k
if index == -1:
    assert index_alt != -1
    index = index_alt

best_thresh = thresholds[index]
best_thresh

0.2495518257156058

In [17]:
# Apply threshold to test

def print_ci(samples, alpha=0.95):
    p = ((1.0-alpha)/2.0) * 100
    lower = max(0.0, np.percentile(samples, p))
    p = (alpha+((1.0-alpha)/2.0)) * 100
    upper = min(1.0, np.percentile(samples, p))
    return lower, upper

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/baseline/test.csv")
testy = df["actual"]
yhat = df["preds"]

preds_t = (df["preds"] >= best_thresh).to_numpy()
accuracy = sum(preds_t == df["actual"]) / len(df["actual"])

bootstrap_sensitivities, bootstrap_specificities, bootstrap_ppvs, bootstrap_npvs = get_bootstrap_metrics(df["preds"], df["actual"], best_thresh)

tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
sen, sen_ci, spe, spe_ci, ppv, ppv_ci, npv, npv_ci = calculate_metrics(tp, fp, fn, tn)

lower, upper = print_ci(bootstrap_sensitivities)
print(f"Sensitivity: {round(sen, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_specificities)
print(f"Specificity: {round(spe, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_ppvs)
print(f"PPV: {round(ppv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_npvs)
print(f"NPV: {round(npv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")


100%|██████████| 10000/10000 [02:58<00:00, 56.01it/s]

Sensitivity: 0.98 [0.9551, 1.0]
Specificity: 0.1186 [0.1003, 0.137]
PPV: 0.1215 [0.1031, 0.1406]
NPV: 0.9795 [0.9539, 1.0]





In [18]:
df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/baseline/test.csv")
testy = df["actual"]
yhat = df["preds"]
tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
print(f"tp={tp}, fp={fp}, fn={fn}, tn={tn}")

tp=147, fp=1063, fn=3, tn=143


## Waveform Only

### 85% Sensitivity

In [10]:
# Find average threshold on validation set

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-only/val.csv")
testy = df["actual"]
yhat = df["preds"]

# calculate roc curves
fpr, tpr, thresholds = roc_curve(testy, yhat)
# Find threshold at sensitivity=0.85
index = -1
for k in range(len(tpr)):
    if tpr[k] > 0.85 and index == -1:
        # Choose interval from before 0.85
        index = k - 1
    if tpr[k] == 0.85:
        index = k
if index == -1:
    assert index_alt != -1
    index = index_alt

best_thresh = thresholds[index]
best_thresh

0.0449579134583473

In [12]:
# Apply threshold to test

def print_ci(samples, alpha=0.95):
    p = ((1.0-alpha)/2.0) * 100
    lower = max(0.0, np.percentile(samples, p))
    p = (alpha+((1.0-alpha)/2.0)) * 100
    upper = min(1.0, np.percentile(samples, p))
    return lower, upper

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-only/test.csv")
testy = df["actual"]
yhat = df["preds"]

preds_t = (df["preds"] >= best_thresh).to_numpy()
accuracy = sum(preds_t == df["actual"]) / len(df["actual"])

bootstrap_sensitivities, bootstrap_specificities, bootstrap_ppvs, bootstrap_npvs = get_bootstrap_metrics(df["preds"], df["actual"], best_thresh)

tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
sen, sen_ci, spe, spe_ci, ppv, ppv_ci, npv, npv_ci = calculate_metrics(tp, fp, fn, tn)

lower, upper = print_ci(bootstrap_sensitivities)
print(f"Sensitivity: {round(sen, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_specificities)
print(f"Specificity: {round(spe, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_ppvs)
print(f"PPV: {round(ppv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_npvs)
print(f"NPV: {round(npv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")


100%|██████████| 10000/10000 [08:40<00:00, 19.23it/s]

Sensitivity: 0.8667 [0.811, 0.9184]
Specificity: 0.4204 [0.3924, 0.4481]
PPV: 0.1568 [0.1326, 0.1819]
NPV: 0.962 [0.9451, 0.9771]





In [13]:
df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-only/test.csv")
testy = df["actual"]
yhat = df["preds"]
tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
print(f"tp={tp}, fp={fp}, fn={fn}, tn={tn}")

tp=130, fp=699, fn=20, tn=507


### 95% Sensitivity

In [14]:
# Find average threshold on validation set

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-only/val.csv")
testy = df["actual"]
yhat = df["preds"]

# calculate roc curves
fpr, tpr, thresholds = roc_curve(testy, yhat)
# Find threshold at sensitivity=0.95
index = -1
for k in range(len(tpr)):
    if tpr[k] > 0.95 and index == -1:
        # Choose interval from before 0.95
        index = k - 1
    if tpr[k] == 0.95:
        index = k
if index == -1:
    assert index_alt != -1
    index = index_alt

best_thresh = thresholds[index]
best_thresh

0.0300113204866647

In [15]:
# Apply threshold to test

def print_ci(samples, alpha=0.95):
    p = ((1.0-alpha)/2.0) * 100
    lower = max(0.0, np.percentile(samples, p))
    p = (alpha+((1.0-alpha)/2.0)) * 100
    upper = min(1.0, np.percentile(samples, p))
    return lower, upper

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-only/test.csv")
testy = df["actual"]
yhat = df["preds"]

preds_t = (df["preds"] >= best_thresh).to_numpy()
accuracy = sum(preds_t == df["actual"]) / len(df["actual"])

bootstrap_sensitivities, bootstrap_specificities, bootstrap_ppvs, bootstrap_npvs = get_bootstrap_metrics(df["preds"], df["actual"], best_thresh)

tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
sen, sen_ci, spe, spe_ci, ppv, ppv_ci, npv, npv_ci = calculate_metrics(tp, fp, fn, tn)

lower, upper = print_ci(bootstrap_sensitivities)
print(f"Sensitivity: {round(sen, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_specificities)
print(f"Specificity: {round(spe, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_ppvs)
print(f"PPV: {round(ppv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_npvs)
print(f"NPV: {round(npv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")


100%|██████████| 10000/10000 [08:43<00:00, 19.09it/s]

Sensitivity: 0.94 [0.8993, 0.9748]
Specificity: 0.2413 [0.217, 0.2656]
PPV: 0.1335 [0.1125, 0.1546]
NPV: 0.97 [0.9492, 0.9873]





In [16]:
df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-only/test.csv")
testy = df["actual"]
yhat = df["preds"]
tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
print(f"tp={tp}, fp={fp}, fn={fn}, tn={tn}")

tp=141, fp=915, fn=9, tn=291


### 99% Sensitivity

In [17]:
# Find average threshold on validation set

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-only/val.csv")
testy = df["actual"]
yhat = df["preds"]

# calculate roc curves
fpr, tpr, thresholds = roc_curve(testy, yhat)
# Find threshold at sensitivity=0.99
index = -1
for k in range(len(tpr)):
    if tpr[k] > 0.99 and index == -1:
        # Choose interval from before 0.99
        index = k - 1
    if tpr[k] == 0.99:
        index = k
if index == -1:
    assert index_alt != -1
    index = index_alt

best_thresh = thresholds[index]
best_thresh

0.0217214114964008

In [18]:
# Apply threshold to test

def print_ci(samples, alpha=0.95):
    p = ((1.0-alpha)/2.0) * 100
    lower = max(0.0, np.percentile(samples, p))
    p = (alpha+((1.0-alpha)/2.0)) * 100
    upper = min(1.0, np.percentile(samples, p))
    return lower, upper

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-only/test.csv")
testy = df["actual"]
yhat = df["preds"]

preds_t = (df["preds"] >= best_thresh).to_numpy()
accuracy = sum(preds_t == df["actual"]) / len(df["actual"])

bootstrap_sensitivities, bootstrap_specificities, bootstrap_ppvs, bootstrap_npvs = get_bootstrap_metrics(df["preds"], df["actual"], best_thresh)

tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
sen, sen_ci, spe, spe_ci, ppv, ppv_ci, npv, npv_ci = calculate_metrics(tp, fp, fn, tn)

lower, upper = print_ci(bootstrap_sensitivities)
print(f"Sensitivity: {round(sen, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_specificities)
print(f"Specificity: {round(spe, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_ppvs)
print(f"PPV: {round(ppv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_npvs)
print(f"NPV: {round(npv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")


100%|██████████| 10000/10000 [08:43<00:00, 19.09it/s]

Sensitivity: 0.9933 [0.9781, 1.0]
Specificity: 0.0879 [0.0722, 0.1042]
PPV: 0.1193 [0.1015, 0.1373]
NPV: 0.9907 [0.9688, 1.0]





In [19]:
df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-only/test.csv")
testy = df["actual"]
yhat = df["preds"]
tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
print(f"tp={tp}, fp={fp}, fn={fn}, tn={tn}")

tp=149, fp=1100, fn=1, tn=106


## Waveform + Age + Sex + Vital Signs

### 85% Sensitivity

In [3]:
# Find average threshold on validation set

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-demo-vital/val.csv")
testy = df["actual"]
yhat = df["preds"]

# calculate roc curves
fpr, tpr, thresholds = roc_curve(testy, yhat)
# Find threshold at sensitivity=0.85
index = -1
for k in range(len(tpr)):
    if tpr[k] > 0.85 and index == -1:
        # Choose interval from before 0.85
        index = k - 1
    if tpr[k] == 0.85:
        index = k
if index == -1:
    assert index_alt != -1
    index = index_alt

best_thresh = thresholds[index]
best_thresh

0.0434097461402416

## Waveform + Age + Sex + Vital Signs + Risk

### 85% Sensitivity

In [13]:
# Find average threshold on validation set

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-demo-vital-risk/val.csv")
testy = df["actual"]
yhat = df["preds"]

# calculate roc curves
fpr, tpr, thresholds = roc_curve(testy, yhat)
# Find threshold at sensitivity=0.85
index = -1
for k in range(len(tpr)):
    if tpr[k] > 0.85 and index == -1:
        # Choose interval from before 0.85
        index = k - 1
    if tpr[k] == 0.85:
        index = k
if index == -1:
    assert index_alt != -1
    index = index_alt

best_thresh = thresholds[index]
best_thresh

0.0445840992033481

In [14]:
# Apply threshold to test

def print_ci(samples, alpha=0.95):
    p = ((1.0-alpha)/2.0) * 100
    lower = max(0.0, np.percentile(samples, p))
    p = (alpha+((1.0-alpha)/2.0)) * 100
    upper = min(1.0, np.percentile(samples, p))
    return lower, upper

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-demo-vital-risk/test.csv")
testy = df["actual"]
yhat = df["preds"]

preds_t = (df["preds"] >= best_thresh).to_numpy()
accuracy = sum(preds_t == df["actual"]) / len(df["actual"])

bootstrap_sensitivities, bootstrap_specificities, bootstrap_ppvs, bootstrap_npvs = get_bootstrap_metrics(df["preds"], df["actual"], best_thresh)

best_bootstrap_ppvs = bootstrap_ppvs
best_bootstrap_npvs = bootstrap_npvs

tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
sen, sen_ci, spe, spe_ci, ppv, ppv_ci, npv, npv_ci = calculate_metrics(tp, fp, fn, tn)

lower, upper = print_ci(bootstrap_sensitivities)
print(f"Sensitivity: {round(sen, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_specificities)
print(f"Specificity: {round(spe, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_ppvs)
print(f"PPV: {round(ppv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_npvs)
print(f"NPV: {round(npv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")


100%|██████████| 10000/10000 [02:59<00:00, 55.86it/s]

Sensitivity: 0.8667 [0.8105, 0.9184]
Specificity: 0.4469 [0.4189, 0.4752]
PPV: 0.1631 [0.1378, 0.1891]
NPV: 0.9642 [0.9479, 0.9785]





In [11]:
df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-demo-vital-risk/test.csv")
testy = df["actual"]
yhat = df["preds"]
tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
print(f"tp={tp}, fp={fp}, fn={fn}, tn={tn}")

tp=130, fp=667, fn=20, tn=539


### 95% Sensitivity

In [32]:
# Find average threshold on validation set

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-demo-vital-risk/val.csv")
testy = df["actual"]
yhat = df["preds"]

# calculate roc curves
fpr, tpr, thresholds = roc_curve(testy, yhat)
# Find threshold at sensitivity=0.95
index = -1
for k in range(len(tpr)):
    if tpr[k] > 0.95 and index == -1:
        # Choose interval from before 0.95
        index = k - 1
    if tpr[k] == 0.95:
        index = k
if index == -1:
    assert index_alt != -1
    index = index_alt

best_thresh = thresholds[index]
best_thresh

0.0305267199873924

In [33]:
# Apply threshold to test

def print_ci(samples, alpha=0.95):
    p = ((1.0-alpha)/2.0) * 100
    lower = max(0.0, np.percentile(samples, p))
    p = (alpha+((1.0-alpha)/2.0)) * 100
    upper = min(1.0, np.percentile(samples, p))
    return lower, upper

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-demo-vital-risk/test.csv")
testy = df["actual"]
yhat = df["preds"]

preds_t = (df["preds"] >= best_thresh).to_numpy()
accuracy = sum(preds_t == df["actual"]) / len(df["actual"])

bootstrap_sensitivities, bootstrap_specificities, bootstrap_ppvs, bootstrap_npvs = get_bootstrap_metrics(df["preds"], df["actual"], best_thresh)

best_95_bootstrap_ppvs = bootstrap_ppvs
best_95_bootstrap_npvs = bootstrap_npvs

tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
sen, sen_ci, spe, spe_ci, ppv, ppv_ci, npv, npv_ci = calculate_metrics(tp, fp, fn, tn)

lower, upper = print_ci(bootstrap_sensitivities)
print(f"Sensitivity: {round(sen, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_specificities)
print(f"Specificity: {round(spe, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_ppvs)
print(f"PPV: {round(ppv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_npvs)
print(f"NPV: {round(npv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")


100%|██████████| 10000/10000 [03:02<00:00, 54.86it/s]

Sensitivity: 0.9467 [0.9085, 0.9793]
Specificity: 0.2968 [0.2708, 0.3227]
PPV: 0.1434 [0.1218, 0.1656]
NPV: 0.9781 [0.9621, 0.9917]





In [14]:
df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-demo-vital-risk/test.csv")
testy = df["actual"]
yhat = df["preds"]
tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
print(f"tp={tp}, fp={fp}, fn={fn}, tn={tn}")

tp=142, fp=848, fn=8, tn=358


### 99% Sensitivity

In [15]:
# Find average threshold on validation set

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-demo-vital-risk/val.csv")
testy = df["actual"]
yhat = df["preds"]

# calculate roc curves
fpr, tpr, thresholds = roc_curve(testy, yhat)
# Find threshold at sensitivity=0.99
index = -1
for k in range(len(tpr)):
    if tpr[k] > 0.99 and index == -1:
        # Choose interval from before 0.99
        index = k - 1
    if tpr[k] == 0.99:
        index = k
if index == -1:
    assert index_alt != -1
    index = index_alt

best_thresh = thresholds[index]
best_thresh

0.023553904145956

In [16]:
# Apply threshold to test

def print_ci(samples, alpha=0.95):
    p = ((1.0-alpha)/2.0) * 100
    lower = max(0.0, np.percentile(samples, p))
    p = (alpha+((1.0-alpha)/2.0)) * 100
    upper = min(1.0, np.percentile(samples, p))
    return lower, upper

df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-demo-vital-risk/test.csv")
testy = df["actual"]
yhat = df["preds"]

preds_t = (df["preds"] >= best_thresh).to_numpy()
accuracy = sum(preds_t == df["actual"]) / len(df["actual"])

bootstrap_sensitivities, bootstrap_specificities, bootstrap_ppvs, bootstrap_npvs = get_bootstrap_metrics(df["preds"], df["actual"], best_thresh)

tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
sen, sen_ci, spe, spe_ci, ppv, ppv_ci, npv, npv_ci = calculate_metrics(tp, fp, fn, tn)

lower, upper = print_ci(bootstrap_sensitivities)
print(f"Sensitivity: {round(sen, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_specificities)
print(f"Specificity: {round(spe, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_ppvs)
print(f"PPV: {round(ppv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")

lower, upper = print_ci(bootstrap_npvs)
print(f"NPV: {round(npv, 4)} [{round(lower, 4)}, {round(upper, 4)}]")


100%|██████████| 10000/10000 [08:16<00:00, 20.13it/s]


Sensitivity: 0.9667 [0.9346, 0.9931]
Specificity: 0.2032 [0.1807, 0.2263]
PPV: 0.1311 [0.1116, 0.1515]
NPV: 0.98 [0.9607, 0.9959]


In [17]:
df = pd.read_csv(f"/deep/group/ed-monitor/patient_data_v9/predictions/15sec-500hz-1norm-10wpp/II/final-transformer-64/waveform-demo-vital-risk/test.csv")
testy = df["actual"]
yhat = df["preds"]
tp, fp, fn, tn = get_confusion_matrix(df["preds"], df["actual"], best_thresh)
print(f"tp={tp}, fp={fp}, fn={fn}, tn={tn}")

tp=145, fp=961, fn=5, tn=245


## 95% CI Diff

In [29]:
def get_ci_diff(left_aucs, right_aucs, alpha=0.95):
    diffs = np.subtract(left_aucs, right_aucs)
    assert len(diffs) == 10000
    
    p = ((1.0-alpha)/2.0) * 100
    lower = np.percentile(diffs, p)
    p = (alpha+((1.0-alpha)/2.0)) * 100
    upper = min(1.0, np.percentile(diffs, p))

    auc_ci = [round(lower, 3), round(upper, 3)]
    
    print(f"[Bootstrap] 95% CI={auc_ci}")
    

### 85% Sensitivity

In [30]:
# Best - Baseline PPV
get_ci_diff(best_bootstrap_ppvs, baseline_bootstrap_ppvs)


[Bootstrap] 95% CI=[-0.027, 0.044]


In [31]:
# Best - Baseline NPV
get_ci_diff(best_bootstrap_npvs, baseline_bootstrap_npvs)


[Bootstrap] 95% CI=[-0.017, 0.029]


### 95% Sensitivity

In [36]:
np.mean(best_95_bootstrap_ppvs)

0.14333644738068838

In [37]:
np.mean(baseline_bootstrap_ppvs)

0.15438706697994817

In [40]:
# Best - Baseline PPV
get_ci_diff(best_95_bootstrap_ppvs, baseline_95_bootstrap_ppvs)


[Bootstrap] 95% CI=[-0.013, 0.047]


In [41]:
# Best - Baseline NPV
get_ci_diff(best_95_bootstrap_npvs, baseline_95_bootstrap_npvs)


[Bootstrap] 95% CI=[-0.003, 0.056]
