In [None]:
import numpy as np
import pandas as pd
import math
import h5py
import json
from matplotlib import pyplot as plt
from scipy import signal #for downsampling
import samplerate

import pytorch_lightning as pl
import os 
from torch import nn
import torch.nn.functional as F
from torch.autograd import Variable
from torchmetrics import Metric
from torch.utils.data import Dataset,DataLoader
from torchmetrics import R2Score, AUROC, F1Score
import torch
from sklearn.model_selection import train_test_split, GroupShuffleSplit
from sklearn.preprocessing import StandardScaler
from pytorch_lightning.callbacks import LearningRateMonitor
from torch.optim.lr_scheduler import OneCycleLR

# import concordance index
from lifelines.utils import concordance_index

import wandb
from pytorch_lightning.loggers import WandbLogger
wandb.login()

# Labels

In [None]:
summary_file_60 = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.csv"
dfy_60 = pd.read_csv(summary_file_60)
dfy_60

In [None]:
summary_file_90 = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.90min.60sec.csv"
dfy_90 = pd.read_csv(summary_file_90)
dfy_90

In [None]:
summary_file_120 = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.120min.60sec.csv"
dfy_120 = pd.read_csv(summary_file_120)
dfy_120

## Run the following 3 Chunks to Generate Labels

In [None]:
# h5py_file = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.h5"
# summary_file = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.csv"
h5py_file = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.90min.60sec.h5"
summary_file = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.90min.60sec.csv"
# h5py_file = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.120min.60sec.h5"
# summary_file = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.120min.60sec.csv"

dfy_hr = h5py.File(h5py_file, "r").get('numerics_after')["HR"]["vals"][()]
dfy_dbp = h5py.File(h5py_file, "r").get('numerics_after')["NBPd"]["vals"][()]
dfy_sbp = h5py.File(h5py_file, "r").get('numerics_after')["NBPs"]["vals"][()]
dfy_spO2 = h5py.File(h5py_file, "r").get('numerics_after')["SpO2"]["vals"][()]

In [None]:
dfy_hr.shape

In [None]:
dfx_pleth = h5py.File(h5py_file, "r").get('waveforms')["Pleth"]["waveforms"][()]
dfx_ecg = h5py.File(h5py_file, "r").get('waveforms')["II"]["waveforms"][()]

In [None]:
print(dfx_pleth.shape)
print(dfx_ecg.shape)

In [None]:
dfy_hr = h5py.File(h5py_file, "r").get('numerics_after')["HR"]["vals"][()]
dfy_dbp = h5py.File(h5py_file, "r").get('numerics_after')["NBPd"]["vals"][()]
dfy_sbp = h5py.File(h5py_file, "r").get('numerics_after')["NBPs"]["vals"][()]
dfy_map = (dfy_sbp + 2 * dfy_dbp) / 3
dfy_spO2 = h5py.File(h5py_file, "r").get('numerics_after')["SpO2"]["vals"][()]
dfy_hr_labels = np.nanmax(dfy_hr, axis = 1).reshape(dfy_hr.shape[0], 1)
dfy_sbp_labels = np.nanmin(dfy_sbp, axis = 1).reshape(dfy_sbp.shape[0], 1)
dfy_dbp_labels = np.nanmin(dfy_dbp, axis = 1).reshape(dfy_dbp.shape[0], 1)
dfy_map_labels = np.nanmin(dfy_map, axis = 1).reshape(dfy_map.shape[0], 1)
dfy_spO2_labels = np.nanmin(dfy_spO2, axis = 1).reshape(dfy_spO2.shape[0], 1)
idx_zero_hr = np.where(dfy_hr_labels == 0)[0]
idx_zero_sbp = np.where(dfy_sbp_labels == 0)[0]
idx_zero_dbp = np.where(dfy_dbp_labels == 0)[0]
idx_zero_spO2 = np.where(dfy_spO2_labels == 0)[0]
idx_nan_hr =  np.where(np.isnan(dfy_hr_labels))[0]
idx_nan_sbp =  np.where(np.isnan(dfy_sbp_labels))[0]
idx_nan_dbp =  np.where(np.isnan(dfy_dbp_labels))[0]
idx_nan_spO2 =  np.where(np.isnan(dfy_spO2_labels))[0]

In [None]:
dfy = pd.read_csv(summary_file)
labels_df = pd.DataFrame({"CSN": dfy.patient_id, "HR":dfy_hr_labels.reshape(dfy_hr_labels.shape[0]), "SBP":dfy_sbp_labels.reshape(dfy_sbp_labels.shape[0]), 
                          "DBP":dfy_dbp_labels.reshape(dfy_dbp_labels.shape[0]), "MAP":dfy_map_labels.reshape(dfy_map_labels.shape[0]), "SPO2":dfy_spO2_labels.reshape(dfy_spO2_labels.shape[0])})
labels_df.to_csv('final_90min_labels_053022.csv', index=False)

In [None]:
for column in list(labels_df.columns):
    print(f"there are {labels_df[column].isna().sum()} na's in {column}")

## End of Labels

# Splits

In [None]:
def split(h5py_file, summary_file, label_file, time = "60", split_type = "all"):
    """
    generates initial splits that do not have any NaN or 0s
    """
    labels = pd.read_csv(label_file)
    dfx_pleth = h5py.File(h5py_file, "r").get('waveforms')["Pleth"]["waveforms"][()]
    dfx_ecg = h5py.File(h5py_file, "r").get('waveforms')["II"]["waveforms"][()]
    combined = np.stack((dfx_pleth, dfx_ecg))
    dfx = np.moveaxis(combined, [0, 1, 2], [1, 0, 2])
    dfy = pd.read_csv(summary_file)
    
    
    indices = dfy.index[(dfy["II_quality"] == 1) & (dfy['Pleth_quality'] == 1)].tolist()
    print("number of patients dropped bc of waveform quality: {}".format(dfy.shape[0] - len(indices)))
    print("Old Shape = {}".format(dfy.shape))
    print("Old data shape = {}".format(dfx.shape))
    
    dfy_hr_labels = np.array(labels['HR']).reshape(labels.shape[0], 1)
    dfy_spO2_labels = np.array(labels['SPO2']).reshape(labels.shape[0], 1)
    dfy_map_labels = np.array(labels['MAP']).reshape(labels.shape[0], 1)

    idx_zero_hr = np.where(dfy_hr_labels == 0)[0]
    idx_zero_map = np.where(dfy_map_labels == 0)[0]
    idx_zero_spO2 = np.where(dfy_spO2_labels == 0)[0]

    idx_nan_hr =  np.where(np.isnan(dfy_hr_labels))[0]
    idx_nan_map =  np.where(np.isnan(dfy_map_labels))[0]
    idx_nan_spO2 =  np.where(np.isnan(dfy_spO2_labels))[0]

    
    if split_type == "all":
        idx_zero = np.concatenate((idx_zero_hr, idx_zero_map, idx_zero_spO2))
        idx_zero = np.unique(idx_zero)
        idx_nan = np.concatenate((idx_nan_hr, idx_nan_map, idx_nan_spO2))
        idx_nan = np.unique(idx_nan)
        idx_drop = np.unique(np.concatenate((idx_zero, idx_nan)))
        to_drop = list(idx_drop)
        final_ind = list(set(indices) - set(to_drop))
        dfy_labels = np.concatenate((np.array(labels['HR'].iloc[final_ind]), 
                                    np.array(labels['SPO2'].iloc[final_ind]), 
                                    np.array(labels['MAP'].iloc[final_ind])))
        dfx = dfx[final_ind]
        dfy = dfy.iloc[final_ind]
    
    if split_type == "HR":
        idx_zero = np.unique(idx_zero_hr)
        idx_nan = np.unique(idx_nan_hr)
        idx_drop = np.unique(np.concatenate((idx_zero, idx_nan)))
        to_drop = list(idx_drop)
        final_ind = list(set(indices) - set(to_drop))
        dfy_labels = np.array(labels['HR'].iloc[final_ind])
        dfx = dfx[final_ind]
        dfy = dfy.iloc[final_ind]
        
    if split_type == "MAP":
        idx_zero = np.unique(idx_zero_map)
        idx_nan = np.unique(idx_nan_map)
        idx_drop = np.unique(np.concatenate((idx_zero, idx_nan)))
        to_drop = list(idx_drop)
        final_ind = list(set(indices) - set(to_drop))
        dfy_labels = np.array(labels['MAP'].iloc[final_ind])
        dfx = dfx[final_ind]
        dfy = dfy.iloc[final_ind]
    
    if split_type == "SPO2":
        idx_zero = np.unique(idx_zero_spO2)
        idx_nan = np.unique(idx_nan_spO2)
        idx_drop = np.unique(np.concatenate((idx_zero, idx_nan)))
        to_drop = list(idx_drop)
        final_ind = list(set(indices) - set(to_drop))
        dfy_labels = np.array(labels['SPO2'].iloc[final_ind])
        dfx = dfx[final_ind]
        dfy = dfy.iloc[final_ind]

   
    print("Number in Set = {}".format(dfx.shape[0]))
    print("New Data Shape = {}".format(dfx.shape))

    idx_zero = np.where(dfy_labels == 0)[0]
    idx_nan =  np.where(np.isnan(dfy_labels))[0]

    print("num 0 to drop now = {}".format(len(idx_zero)))
    print("num NaN to drop now = {}".format(len(idx_nan)))


    dfy_all = dfy.copy().sort_values(by=['alignment_time'])
    train_len = int(np.ceil(dfy_all.shape[0] * 0.875))
    dfx_trainval = dfx[0:train_len]
    dfy_trainval = dfy_all.iloc[0:train_len]
    dfy_label_placeholder_trainval = np.random.rand(dfy_trainval.shape[0])
    xtest = dfx[train_len:]
    # ytest = np.array(dfy_all.iloc[train_len:].labels)
    ytest_all = dfy_all.iloc[train_len:]
    
    # New splitter
    splitter_train = GroupShuffleSplit(test_size= 0.125 / 0.875, n_splits=1, random_state = 7)

    split = splitter_train.split(dfx_trainval, dfy_label_placeholder_trainval, groups=dfy_trainval['patient_id'])
    train_inds, val_inds = next(split)

    xtrain = dfx_trainval[train_inds]
    # ytrain = dfy_label_placeholder_trainval[train_inds]
    ytrain_all = dfy_trainval.iloc[train_inds]

    xval = dfx_trainval[val_inds]
    # yval = dfy_label_placeholder_trainval[val_inds]
    yval_all = dfy_trainval.iloc[val_inds]

    # generate splits
    # Sets of pt ids
    pt_ids_train = set(ytrain_all['patient_id'])
    pt_ids_val = set(yval_all['patient_id'])
    pt_ids_test = set(ytest_all['patient_id'])
    print('intersections of patient ids = {}'.format(pt_ids_train.intersection(pt_ids_val, pt_ids_test)))

    d = {'train_ids':list(pt_ids_train), 'val_ids':list(pt_ids_val), 'test_ids':list(pt_ids_test)}
    # print("My Patient ID Dictionary :", d)
    f = open("final_ptid_splits_noNaN_" + split_type + "_" + time + ".json", "w")
    json.dump(d, f)
    f.close()

In [None]:
#SPLITS
h5py_file = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.h5"
summary_file = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.csv"
label_file = "final_60min_labels_052722.csv"

In [None]:
split(h5py_file, summary_file, label_file, split_type="HR")

In [None]:
split(h5py_file, summary_file, label_file, split_type="MAP")

In [None]:
split(h5py_file, summary_file, label_file, split_type="SPO2")

In [None]:
split(h5py_file, summary_file, label_file, split_type="all")

In [None]:
#SPLITS 90 min
h5py_file = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.90min.60sec.h5"
summary_file = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.90min.60sec.csv"
label_file = "final_90min_labels_053022.csv"

split(h5py_file, summary_file, label_file, time = "90min", split_type="all")

In [None]:
#SPLITS 120 min
h5py_file = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.120min.60sec.h5"
summary_file = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.120min.60sec.csv"
label_file = "final_120min_labels_053022.csv"

split(h5py_file, summary_file, label_file, time = "120min", split_type="all")

### Double check that patients we dropped actually should have been dropped

In [None]:
# dropped patients 
h5py_file = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.h5"
summary_file = "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.csv"
label_file = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_60min_labels_052722.csv"
split_file = "final_ptid_splits_noNaN_all.json"

all_pts = list(np.array(pd.read_csv(summary_file).patient_id))

with open('/deep/group/ed-monitor-self-supervised/test_models_v1/' + split_file) as json_file:
    splits = json.load(json_file) 

    
in_nan_all = list(np.concatenate((np.array(splits['train_ids']), 
                            np.array(splits['val_ids']), 
                            np.array(splits['test_ids']))))

dropped = np.array(list(set(all_pts) - set(in_nan_all)))

In [None]:
dropped

In [None]:
all_pts = np.array(all_pts)

In [None]:
all_pts

In [None]:
indicies_to_examine = np.where(np.isin(all_pts, dropped))[0]

In [None]:
indicies_to_examine

In [None]:
list(dropped)

In [None]:
list(all_pts)

In [None]:
indicies_to_examine.shape

In [None]:
dropped.shape

In [None]:
def examine_one(indicies_to_examine, labels_file, summary_file, h5py_file=None):

    labels = pd.read_csv(labels_file)
    summary = pd.read_csv(summary_file)
    choice = np.random.randint(indicies_to_examine.shape[0], size=1)[0]
    print(labels.iloc[indicies_to_examine[choice]])
    print(f"II_qual = {summary.iloc[indicies_to_examine[choice]].II_quality}\nPleth_qual = {summary.iloc[indicies_to_examine[choice]].Pleth_quality}")

In [None]:
examine_one(indicies_to_examine, 
            "/deep/group/ed-monitor-self-supervised/test_models_v1/final_60min_labels_052722.csv", 
            "/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.csv")

## Final Task-Specific Splits (remove all who present abnormal initially)

In [None]:
def remove_abnormals(split_file, normal_hr_csn_file, normal_map_csn_file, normal_spo2_csn_file, split_type, time="60min"):
    hr_csns = set(list(pd.read_csv(normal_hr_csn_file)['patient_id']))
    map_csns = set(list(pd.read_csv(normal_map_csn_file)['patient_id']))
    spo2_csns = set(list(pd.read_csv(normal_spo2_csn_file)['patient_id']))

    print("length of hr_csns = {}".format(len(hr_csns)))
    print("length of map_csns = {}".format(len(map_csns)))
    print("length of spo2_csns = {}".format(len(spo2_csns)))

    with open(split_file) as json_file:
        splits = json.load(json_file) 
    
    if split_type == 'all':
        # No abnormalities
        final_csns = hr_csns.intersection(spo2_csns)
        final_csns = list(final_csns.intersection(map_csns))
        print("length of final_csns = {}".format(len(final_csns)))
        train_ids = list(set(splits['train_ids']).intersection(set(final_csns)))
        val_ids = list(set(splits['val_ids']).intersection(set(final_csns)))
        test_ids = list(set(splits['test_ids']).intersection(set(final_csns)))


        print("\n-----no abnormalities-----")
        print("length of original ids = {}".format(len(splits['train_ids']) + len(splits['val_ids']) + len(splits['test_ids'])))
        print("length of train_ids = {}".format(len(train_ids)))
        print("length of val_ids = {}".format(len(val_ids)))
        print("length of test_ids = {}".format(len(test_ids)))
        print("total ids = {}".format(len(train_ids) + len(val_ids) + len(test_ids)))

        d = {'train_ids':train_ids, 'val_ids':val_ids, 'test_ids':test_ids}
        # print("My Patient ID Dictionary :", d)
        f = open("final_ptid_splits_noabnormalities_task_all_" + time + ".json", "w")
        json.dump(d, f)
        f.close()

    
    if split_type == 'tachycardia':
        # No tachycardia
        train_ids = list(set(splits['train_ids']).intersection(set(hr_csns)))
        val_ids = list(set(splits['val_ids']).intersection(set(hr_csns)))
        test_ids = list(set(splits['test_ids']).intersection(set(hr_csns)))


        print("\n-----no tachy-----")
        print("length of original ids = {}".format(len(splits['train_ids']) + len(splits['val_ids']) + len(splits['test_ids'])))
        print("length of train_ids = {}".format(len(train_ids)))
        print("length of val_ids = {}".format(len(val_ids)))
        print("length of test_ids = {}".format(len(test_ids)))
        print("total ids = {}".format(len(train_ids) + len(val_ids) + len(test_ids)))

        d = {'train_ids':train_ids, 'val_ids':val_ids, 'test_ids':test_ids}
        # print("My Patient ID Dictionary :", d)
        f = open("final_ptid_splits_noabnormalities_task_tachycardia_" + time + ".json", "w")
        json.dump(d, f)
        f.close()

    if split_type == 'hypotension':
        # No hypotension
        train_ids = list(set(splits['train_ids']).intersection(set(map_csns)))
        val_ids = list(set(splits['val_ids']).intersection(set(map_csns)))
        test_ids = list(set(splits['test_ids']).intersection(set(map_csns)))


        print("\n-----no hypotension-----")
        print("length of original ids = {}".format(len(splits['train_ids']) + len(splits['val_ids']) + len(splits['test_ids'])))
        print("length of train_ids = {}".format(len(train_ids)))
        print("length of val_ids = {}".format(len(val_ids)))
        print("length of test_ids = {}".format(len(test_ids)))
        print("total ids = {}".format(len(train_ids) + len(val_ids) + len(test_ids)))

        d = {'train_ids':train_ids, 'val_ids':val_ids, 'test_ids':test_ids}
        # print("My Patient ID Dictionary :", d)
        f = open("final_ptid_splits_noabnormalities_task_hypotension_" + time + ".json", "w")
        json.dump(d, f)
        f.close()

    if split_type == 'hypoxia':
        # No hypoxia
        train_ids = list(set(splits['train_ids']).intersection(set(spo2_csns)))
        val_ids = list(set(splits['val_ids']).intersection(set(spo2_csns)))
        test_ids = list(set(splits['test_ids']).intersection(set(spo2_csns)))


        print("\n-----no hypoxia-----")
        print("length of original ids = {}".format(len(splits['train_ids']) + len(splits['val_ids']) + len(splits['test_ids'])))
        print("length of train_ids = {}".format(len(train_ids)))
        print("length of val_ids = {}".format(len(val_ids)))
        print("length of test_ids = {}".format(len(test_ids)))
        print("total ids = {}".format(len(train_ids) + len(val_ids) + len(test_ids)))

        d = {'train_ids':train_ids, 'val_ids':val_ids, 'test_ids':test_ids}
        # print("My Patient ID Dictionary :", d)
        f = open("final_ptid_splits_noabnormalities_task_hypoxia_" + time + ".json", "w")
        json.dump(d, f)
        f.close()

In [None]:
normal_hr_csn_file = '/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.initial_hr_normal.csv'
normal_map_csn_file = '/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.initial_map_normal.csv'
normal_spo2_csn_file = '/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.initial_spo2_normal.csv'

all_split_file = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noNaN_all.json"
hr_split_file = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noNaN_HR.json"
map_split_file = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noNaN_MAP.json"
spo2_split_file = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noNaN_SPO2.json"

In [None]:
remove_abnormals(all_split_file, normal_hr_csn_file, normal_map_csn_file, normal_spo2_csn_file, "all")

In [None]:
remove_abnormals(hr_split_file, normal_hr_csn_file, normal_map_csn_file, normal_spo2_csn_file, "tachycardia")

In [None]:
remove_abnormals(map_split_file, normal_hr_csn_file, normal_map_csn_file, normal_spo2_csn_file, "hypotension")

In [None]:
remove_abnormals(spo2_split_file, normal_hr_csn_file, normal_map_csn_file, normal_spo2_csn_file, "hypoxia")

In [None]:
normal_hr_csn_file = '/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.initial_hr_normal.csv'
normal_map_csn_file = '/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.initial_map_normal.csv'
normal_spo2_csn_file = '/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.initial_spo2_normal.csv'
hr_split_file = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noNaN_HR.json"
map_split_file = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noNaN_MAP.json"
spo2_split_file = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noNaN_SPO2.json"
all_split_file = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noNaN_all_90min.json"
remove_abnormals(all_split_file, normal_hr_csn_file, normal_map_csn_file, normal_spo2_csn_file, "all", "90min")

In [None]:
normal_hr_csn_file = '/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.initial_hr_normal.csv'
normal_map_csn_file = '/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.initial_map_normal.csv'
normal_spo2_csn_file = '/deep/group/ed-monitor-self-supervised/v4/downstream.15min.60min.60sec.initial_spo2_normal.csv'
hr_split_file = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noNaN_HR.json"
map_split_file = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noNaN_MAP.json"
spo2_split_file = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noNaN_SPO2.json"
all_split_file = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noNaN_all_120min.json"
remove_abnormals(all_split_file, normal_hr_csn_file, normal_map_csn_file, normal_spo2_csn_file, "all", "120min")

## Check Number of Events

In [None]:
def num_events(split_file, label_file, split_type):
    
    with open(split_file) as f:
        splits = json.load(f)

    train = np.array(splits['train_ids'])
    val = np.array(splits['val_ids'])
    test = np.array(splits['test_ids'])
    labels = pd.read_csv(label_file)
    all_pts = np.array(labels.CSN)
    indicies_train = np.where(np.isin(all_pts, train))
    indicies_val = np.where(np.isin(all_pts, val))
    indicies_test = np.where(np.isin(all_pts, test))
    labels_train = labels.iloc[indicies_train]
    labels_val = labels.iloc[indicies_val]
    labels_test = labels.iloc[indicies_test]
    
    if split_type == 'tachycardia' or split_type == 'all':
        train_num = np.sum(labels_train.HR > 110)          
        val_num = np.sum(labels_val.HR > 110)
        test_num = np.sum(labels_test.HR > 110)
        
        print(f"Train Tachycardic Pts = {train_num}\nVal Tachycardic Pts = {val_num}\nTest Tachycardic Pts = {test_num}")
        
    if split_type == 'hypotension'or split_type == 'all':
        train_num = np.sum(labels_train.MAP < 65)          
        val_num = np.sum(labels_val.MAP < 65)
        test_num = np.sum(labels_test.MAP < 65)
        
        print(f"Train Hypotensive Pts = {train_num}\nVal Hypotensive Pts = {val_num}\nTest Hypotensive Pts = {test_num}")
    
    if split_type == 'hypoxia' or split_type == 'all':
        train_num = np.sum(labels_train.SPO2 < 90)          
        val_num = np.sum(labels_val.SPO2 < 90)
        test_num = np.sum(labels_test.SPO2 < 90)

        print(f"Train Hypoxic Pts = {train_num}\nVal Hypoxic Pts = {val_num}\nTest Hypoxic Pts = {test_num}")


In [None]:
label_file = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_60min_labels_052722.csv"
label_file_old = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_60min_labels.csv"
tachy_split = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noabnormalities_task_tachycardia.json"
hypotension_split = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noabnormalities_task_hypotension.json"
hypoxia_split = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noabnormalities_task_hypoxia.json"
all_split = "/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noabnormalities_task_all.json"

In [None]:
print("----Final Counts (120 min)----")
num_events("/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noabnormalities_task_all_120min.json", label_file, "all")

In [None]:
print("----Final Counts (90 min)----")
num_events("/deep/group/ed-monitor-self-supervised/test_models_v1/final_ptid_splits_noabnormalities_task_all_90min.json", label_file, "all")

In [None]:
print("----Final Counts----")
num_events(all_split, label_file, "all")

In [None]:
print("----Final Counts----")
num_events(tachy_split, label_file, "tachycardia")

In [None]:
print("----Old Counts----")
num_events("data_v4_ptid_splits_noabnormalities_task_tachycardia.json", label_file_old, "tachycardia")

In [None]:
print("----Final Counts----")
num_events(hypotension_split, label_file, "hypotension")

In [None]:
print("----Old Counts----")
num_events("data_v4_ptid_splits_noabnormalities_task_hypotension.json", label_file_old, "hypotension")

In [None]:
print("----Final Counts----")
num_events(hypoxia_split, label_file, "hypoxia")

In [None]:
print("----Old Counts----")
num_events("data_v4_ptid_splits_noabnormalities_task_hypoxia.json", label_file_old, "hypoxia")