In [1]:
import numpy as np
import pandas as pd
from setup_data import DataSetup

from utils.config import gtn_param

from train_gtn import GatedTransformerNetwork, load_model, initialize_experiment
from utils.loader import make_loader

from torch.utils.data import DataLoader, ConcatDataset

import torch
import torch.nn as nn
import torch.nn.functional as F

import os
import tqdm

In [2]:
# data_file = "final_dataset.pickle"
# training_examples, lengths_list, is_sepsis, writer, destination_path = initialize_experiment(data_file)
# train_loader, test_loader = make_loader(training_examples, lengths_list, is_sepsis, 2048, mode='window')

- **1. Load all the data atonce without windowing. (Cuz we need to predict before hours of sepsis occured for each patient).**

In [3]:
def platelets_sofa(platelets):
    s_score = 0
    if platelets > 150:
        s_score += 0
    elif platelets >= 101 and platelets <= 150:
        s_score += 1
    elif platelets >= 51 and platelets <= 100:
        s_score += 2
    elif platelets >= 21 and platelets <= 50:
        s_score += 3
    elif platelets <= 20:
        s_score += 4

    return s_score


def total_bilirubin_sofa(bilirubin):
    s_score = 0
    if bilirubin < 1.2:
        s_score += 0
    elif bilirubin >= 1.2 and bilirubin <= 1.9:
        s_score += 1
    elif bilirubin >= 2.0 and bilirubin <= 5.9:
        s_score += 2
    elif bilirubin >= 6 and bilirubin <= 11.9:
        s_score += 3
    elif bilirubin >= 12.0:
        s_score += 4

    return s_score


def map_sofa(map):
    s_score = 0
    if map >= 70:
        s_score += 0
    elif map < 70:
        s_score += 1

    return s_score


def sofa_score(row):
    platelets_score = row['Platelets_SOFA']
    bilirubin_score = row['Bilirubin_total_SOFA']
    map_sofa = row['MAP_SOFA']

    return platelets_score + bilirubin_score + map_sofa


def detect_sofa_change(data, time_window=24):
    data['SOFA_score_diff'] = data['SOFA_score'].diff(periods=time_window)
    data['SOFA_deterioration'] = (data['SOFA_score_diff'] >= 2).astype(int)
    data['SOFA_score_diff'] = data['SOFA_score_diff'].fillna(value=0)
    # data['SOFA_score_diff'].fillna(value=0, inplace=True)
    return data


def respiratory_rate_qsofa(respiratory_rate):
    q_score = 0
    if respiratory_rate >= 22.0:
        q_score += 1

    return q_score


def sbp_qsofa(sbp):
    q_score = 0
    if sbp < 100.0:
        q_score += 1

    return q_score


def qsofa_score(row):
    resp_score = row['ResP_qSOFA']
    sbp_score = row['SBP_qSOFA']

    return sbp_score + resp_score


def q_sofa_indicator(row):
    resp = row['ResP_qSOFA']
    sbp = row['SBP_qSOFA']
    q_score = 0
    if resp > 0 and sbp > 0:
        q_score += 1
    return q_score


def sofa_indicator(row):
    # 2+ points indicates organ dysfunction
    platelets = row['Platelets_SOFA']
    bilirubin_total = row['Bilirubin_total_SOFA']
    map = row['MAP_SOFA']

    total_points = platelets + bilirubin_total + map

    q_score = 0
    if total_points > 2:
        q_score += 1
    return q_score


def detect_qsofa_change(data, time_window=24):
    data['qSOFA_score_diff'] = data['qSOFA_score'].diff(periods=time_window)
    data['qSOFA_deterioration'] = (data['qSOFA_score_diff'] >= 2).astype(int)

    data['qSOFA_score_diff'] = data['qSOFA_score_diff'].fillna(value=0)
    # data['qSOFA_score_diff'].fillna(value=0, inplace=True)

    return data


def mortality_sofa(row):
    # 2+ points indicates organ dysfunction
    platelets = row['Platelets_SOFA']
    bilirubin_total = row['Bilirubin_total_SOFA']
    map = row['MAP_SOFA']

    total_points = platelets + bilirubin_total + map

    mortality_rate = 0
    if total_points > 1 and total_points <= 9:
        mortality_rate += 0.30
    elif total_points >= 10 and total_points < 14:
        mortality_rate += 0.50
    elif total_points >= 14:
        mortality_rate += 0.95

    return mortality_rate


def temp_sirs(temp):
    sirs_score = 0
    if temp < 36 or temp >= 38:
        sirs_score += 1

    return sirs_score


def heart_rate_sirs(heart_rate):
    sirs_score = 0
    if heart_rate > 90:
        sirs_score += 1

    return sirs_score


def resp_sirs(resp):
    sirs_score = 0
    if resp > 20:
        sirs_score += 1

    return sirs_score


def paco2_sirs(paco2):
    sirs_score = 0
    if paco2 < 32:
        sirs_score += 1

    return sirs_score


def wbc_sirs(wbc):
    sirs_score = 0
    if wbc * 1000 < 4000 or wbc * 1000 > 12000:
        sirs_score += 1
    return sirs_score


def t_suspicion(patient_data):
    """
    Since we don't have information about IV antibiotics and blood cultures,
    we are is considering that patient have infection if any 2 SIRS criteria are met
    """
    patient_data['infection_proxy'] = (patient_data[['Temp_sirs', 'HR_sirs', 'Resp_sirs']].eq(1).sum(axis=1) >= 2).astype(int)

    # t_suspicion is the first hour of (ICULOS) where infection proxy is positive at time t
    patient_data['t_suspicion'] = patient_data.groupby(['PatientID'])['ICULOS'].transform(
        lambda x: x[patient_data['infection_proxy'] == 1].min() if (patient_data['infection_proxy'] == 1).any() else 0)

    return patient_data


def t_sofa(data):
    """
    Two-point deterioration in SOFA score at time t but within a 24-hour period.
    """
    data['t_sofa'] = data['SOFA_score_diff'].where((abs(data['SOFA_score_diff']) >= 2) & (data['ICULOS'] <= 24),
                                                   other=0)
    return data


def t_sepsis(row):
    if pd.isna(row['t_suspicion']) or row['t_suspicion'] == 0 or row['t_sofa'] == 0:
        return 0
    if row['t_suspicion'] - 24 <= row['t_sofa'] <= row['t_suspicion'] + 12:
        return min(row['t_suspicion'], row['t_sofa'])

In [4]:
def load_sepsis_model():
    config = gtn_param
    d_input, d_channel, d_output = 336, 63, 2  # (time_steps (window_size), channels, num_classes)
    model = GatedTransformerNetwork(d_model=config['d_model'], d_input=d_input, d_channel=d_channel,
                                    d_output=d_output, d_hidden=config['d_hidden'], q=config['q'],
                                    v=config['v'], h=config['h'], N=config['N'], dropout=config['dropout'],
                                    pe=config['pe'], mask=config['mask'], device='cuda').to('cuda')

    return model

def load_challenge_data(file):
    with open(file, 'r') as f:
        header = f.readline().strip()
        column_names = header.split('|')
        data = np.loadtxt(f, delimiter='|')

    # Ignore SepsisLabel column if present.
    if column_names[-1] == 'SepsisLabel':
        column_names = column_names[:-1]
        data = data[:, :-1]

    return data

def get_sepsis_score(data, model):

    columns = ['HR', 'O2Sat', 'Temp', 'SBP', 'MAP', 'DBP', 'Resp',
       'EtCO2', 'BaseExcess', 'HCO3', 'FiO2', 'pH', 'PaCO2', 'SaO2', 'AST',
       'BUN', 'Alkalinephos', 'Calcium', 'Chloride', 'Creatinine',
       'Bilirubin_direct', 'Glucose', 'Lactate', 'Magnesium', 'Phosphate',
       'Potassium', 'Bilirubin_total', 'TroponinI', 'Hct', 'Hgb', 'PTT', 'WBC',
       'Fibrinogen', 'Platelets', 'Age', 'Gender', 'Unit1', 'Unit2',
       'HospAdmTime', 'ICULOS']

    # Reformatting data into DataFrame to add features
    data_df = pd.DataFrame(data, columns=columns)
    patient_data = data_df.fillna(0)
    

    patient_data['MAP_SOFA'] = patient_data['MAP'].apply(map_sofa)
    patient_data['Bilirubin_total_SOFA'] = patient_data['Bilirubin_total'].apply(total_bilirubin_sofa)
    patient_data['Platelets_SOFA'] = patient_data['Platelets'].apply(platelets_sofa)
    patient_data['SOFA_score'] = patient_data.apply(sofa_score, axis=1)
    patient_data = detect_sofa_change(patient_data)

    patient_data['ResP_qSOFA'] = patient_data['Resp'].apply(respiratory_rate_qsofa)
    patient_data['SBP_qSOFA'] = patient_data['SBP'].apply(sbp_qsofa)
    patient_data['qSOFA_score'] = patient_data.apply(qsofa_score, axis=1)
    patient_data = detect_qsofa_change(patient_data)

    patient_data['qSOFA_indicator'] = patient_data.apply(q_sofa_indicator, axis=1)  # Sepsis detected
    patient_data['SOFA_indicator'] = patient_data.apply(sofa_indicator, axis=1)  # Organ Dysfunction occurred
    patient_data['Mortality_sofa'] = patient_data.apply(mortality_sofa, axis=1)  # Morality rate

    patient_data['Temp_sirs'] = patient_data['Temp'].apply(temp_sirs)
    patient_data['HR_sirs'] = patient_data['HR'].apply(heart_rate_sirs)
    patient_data['Resp_sirs'] = patient_data['Resp'].apply(resp_sirs)
    patient_data['paco2_sirs'] = patient_data['PaCO2'].apply(resp_sirs)
    patient_data['wbc_sirs'] = patient_data['WBC'].apply(wbc_sirs)

    patient_data = t_suspicion(patient_data)
    patient_data = t_sofa(patient_data)
    patient_data['t_sepsis'] = patient_data.apply(t_sepsis, axis=1)

    patient_data = torch.tensor(patient_data.values).unsqueeze(0)
    print(patient_data.shape)

    model.eval()
    model.to('cuda')
    predictions = []
    probas = []

    with torch.no_grad():
        patient_data = patient_data.to('cuda').float()
        outputs, _, _, _, _, _, _ = model(patient_data, stage='test')
    
        _, predicted = torch.max(outputs, 1)
        probabilities = F.softmax(outputs, dim=1)

        predictions.extend(predicted.cpu().numpy())
        probas.extend(probabilities.cpu().numpy())

    return patient_data

In [5]:
def evaluate():
    input_directory = "/localscratch/neeresh/data/physionet2019/physionet.org/files/challenge-2019/1.0.0/training/training_setA/"
    # input_directory = "/localscratch/neeresh/data/physionet2019/physionet.org/files/challenge-2019/1.0.0/training/training_setB/"
    output_directory = "./data/test_output/"

    # Find files.
    files = []
    for f in os.listdir(input_directory):
        if os.path.isfile(os.path.join(input_directory, f)) and not f.lower().startswith('.') and f.lower().endswith('psv'):
            files.append(f)
    
    # files.sort()
    if not os.path.isdir(output_directory):
        os.mkdir(output_directory)
    
    # Load model.
    print('Loading sepsis model...')
    model = load_sepsis_model()

    # Iterate over files.
    print('Predicting sepsis labels...')
    num_files = len(files)
    for i, f in enumerate(files):
        print('    {}/{}...'.format(i+1, num_files))

        # Load data.
        input_file = os.path.join(input_directory, f)
        data = load_challenge_data(input_file)

        # Make predictions.
        num_rows = len(data)  # Number of patient recordings
        scores = np.zeros(num_rows)
        labels = np.zeros(num_rows)
        
        for t in range(num_rows):
            current_data = data[:t+1]
            data_df = get_sepsis_score(current_data, model)
            # current_score, current_label = get_sepsis_score(current_data, model)
            # scores[t] = current_score
            # labels[t] = current_labels

            break
        
        break
    
    return model, data, current_data, data_df

model, data, current_data, data_df = evaluate()

Loading sepsis model...
Predicting sepsis labels...
    1/20336...
torch.Size([1, 1, 63])


RuntimeError: The expanded size of the tensor (1) must match the existing size (336) at non-singleton dimension 0.  Target sizes: [1, 256].  Tensor sizes: [336, 256]

In [5]:
data_file = "final_dataset.pickle"
training_examples, lengths_list, is_sepsis, writer, destination_path = initialize_experiment(data_file)
train_loader, test_loader = make_loader(training_examples, lengths_list, is_sepsis, 128, mode='padding')

Datafile used: final_dataset.pickle
Total number of patients: 40336
Min recordings: 8 & Max recordings: 336
Distribution of the SepsisLabel: 
0    37404
1     2932
Name: count, dtype: int64


Padding...: 100%|██████████| 32268/32268 [00:25<00:00, 1257.11it/s]
Padding...: 100%|██████████| 8067/8067 [00:05<00:00, 1378.93it/s]


In [6]:
for inputs, targets in train_loader:
    break

RuntimeError: Caught RuntimeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/localscratch/neeresh/envs/timeseries/lib/python3.11/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)  # type: ignore[possibly-undefined]
           ^^^^^^^^^^^^^^^^^^^^
  File "/localscratch/neeresh/envs/timeseries/lib/python3.11/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch
    return self.collate_fn(data)
           ^^^^^^^^^^^^^^^^^^^^^
  File "/localscratch/neeresh/envs/timeseries/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 316, in default_collate
    return collate(batch, collate_fn_map=default_collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/localscratch/neeresh/envs/timeseries/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 173, in collate
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/localscratch/neeresh/envs/timeseries/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 173, in <listcomp>
    return [collate(samples, collate_fn_map=collate_fn_map) for samples in transposed]  # Backwards compatibility.
            ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/localscratch/neeresh/envs/timeseries/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 141, in collate
    return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/localscratch/neeresh/envs/timeseries/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 222, in collate_numpy_array_fn
    return collate([torch.as_tensor(b) for b in batch], collate_fn_map=collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/localscratch/neeresh/envs/timeseries/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 141, in collate
    return collate_fn_map[elem_type](batch, collate_fn_map=collate_fn_map)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/localscratch/neeresh/envs/timeseries/lib/python3.11/site-packages/torch/utils/data/_utils/collate.py", line 212, in collate_tensor_fn
    out = elem.new(storage).resize_(len(batch), *list(elem.size()))
          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
RuntimeError: Trying to resize storage that is not resizable


In [None]:
sample = pd.read_csv('/localscratch/neeresh/data/physionet2019/physionet.org/files/challenge-2019/1.0.0/training/training_setA/p000001.psv', sep='|')
print(sample.shape)

sample.head()

(54, 41)


Unnamed: 0,HR,O2Sat,Temp,SBP,MAP,DBP,Resp,EtCO2,BaseExcess,HCO3,...,WBC,Fibrinogen,Platelets,Age,Gender,Unit1,Unit2,HospAdmTime,ICULOS,SepsisLabel
0,,,,,,,,,,,...,,,,83.14,0,,,-0.03,1,0
1,97.0,95.0,,98.0,75.33,,19.0,,,,...,,,,83.14,0,,,-0.03,2,0
2,89.0,99.0,,122.0,86.0,,22.0,,,,...,,,,83.14,0,,,-0.03,3,0
3,90.0,95.0,,,,,30.0,,24.0,,...,,,,83.14,0,,,-0.03,4,0
4,103.0,88.5,,122.0,91.33,,24.5,,,,...,,,,83.14,0,,,-0.03,5,0


In [None]:
pad = (366 - len(sample))
print(pad)
np.pad(sample.values, ((0, 0), (pad, 0)), mode='constant').shape

312


(54, 353)

In [None]:
# def get_sepsis_score(loader, model):
#     model.eval()
#     model.to('cuda')
#     predictions = []
#     probas = []

#     with torch.no_grad():
#         for inputs, targets in tqdm.tqdm(loader, desc="Generating Predictions", total=len(loader)):
#             inputs = inputs.to('cuda').float()
#             targets = targets.to('cuda')
#             outputs, _, _, _, _, _, _ = model(inputs, stage='test')
        
#             _, predicted = torch.max(outputs, 1)
#             probabilities = F.softmax(outputs, dim=1)

#             predictions.extend(predicted.cpu().numpy())
#             probas.extend(probabilities.cpu().numpy())

#             break

#     return predictions, probas

In [None]:
# model = load_sepsis_model()
# labels, scores = get_sepsis_score(test_loader, model)