In [1]:
import numpy as np
import pandas as pd

from utils.evaluate_helper_methods import *
from utils.path_utils import project_root

import tqdm

- **Labels Directory: all patients true labels in each file**<br>
- **Predictions Directory: all patients "score, predictions" for each time step in the format (PredictedProbability|PredictedLabel)**<br>
- **These can be created using driver.py file.**<br>
- **First finish it!!!**

- **When applying windowing method, make sure the following:**<br>
- **1. After model is trained on windowing technique, model accepts: (6, 63, 2) -> (window_size, features, num_classes)**
- **2. Since the predictions are based on t, t+1, t+2, ... t+n, fix the shape of the sample for <= t+6 (padding or what ever)**
- **3. But if the size of the window is larger, we need to pad for longer iterations and thus we cannot get good score.**

In [3]:
d_input, d_channel, d_output = 336, 63, 2

def get_sepsis_score(data, model):

    columns = ['HR', 'O2Sat', 'Temp', 'SBP', 'MAP', 'DBP', 'Resp',
       'EtCO2', 'BaseExcess', 'HCO3', 'FiO2', 'pH', 'PaCO2', 'SaO2', 'AST',
       'BUN', 'Alkalinephos', 'Calcium', 'Chloride', 'Creatinine',
       'Bilirubin_direct', 'Glucose', 'Lactate', 'Magnesium', 'Phosphate',
       'Potassium', 'Bilirubin_total', 'TroponinI', 'Hct', 'Hgb', 'PTT', 'WBC',
       'Fibrinogen', 'Platelets', 'Age', 'Gender', 'Unit1', 'Unit2',
       'HospAdmTime', 'ICULOS']

    # Reformatting data into DataFrame to add features
    patient_data = pd.DataFrame(data, columns=columns)
    patient_data = patient_data.fillna(0)
    
    patient_data['MAP_SOFA'] = patient_data['MAP'].apply(map_sofa)
    patient_data['Bilirubin_total_SOFA'] = patient_data['Bilirubin_total'].apply(total_bilirubin_sofa)
    patient_data['Platelets_SOFA'] = patient_data['Platelets'].apply(platelets_sofa)
    patient_data['SOFA_score'] = patient_data.apply(sofa_score, axis=1)
    patient_data = detect_sofa_change(patient_data)

    patient_data['ResP_qSOFA'] = patient_data['Resp'].apply(respiratory_rate_qsofa)
    patient_data['SBP_qSOFA'] = patient_data['SBP'].apply(sbp_qsofa)
    patient_data['qSOFA_score'] = patient_data.apply(qsofa_score, axis=1)
    patient_data = detect_qsofa_change(patient_data)

    patient_data['qSOFA_indicator'] = patient_data.apply(q_sofa_indicator, axis=1)  # Sepsis detected
    patient_data['SOFA_indicator'] = patient_data.apply(sofa_indicator, axis=1)  # Organ Dysfunction occurred
    patient_data['Mortality_sofa'] = patient_data.apply(mortality_sofa, axis=1)  # Morality rate

    patient_data['Temp_sirs'] = patient_data['Temp'].apply(temp_sirs)
    patient_data['HR_sirs'] = patient_data['HR'].apply(heart_rate_sirs)
    patient_data['Resp_sirs'] = patient_data['Resp'].apply(resp_sirs)
    patient_data['paco2_sirs'] = patient_data['PaCO2'].apply(resp_sirs)
    patient_data['wbc_sirs'] = patient_data['WBC'].apply(wbc_sirs)

    patient_data = t_suspicion(patient_data)
    patient_data = t_sofa(patient_data)
    patient_data['t_sepsis'] = patient_data.apply(t_sepsis, axis=1)
    
    # Padding remaning rows to meet the model requirements
    # Each patient file will be (336, 63) -> (Timestamps, features)

    # 336 rows are padded dynamically based on how each timestamp for each patient
    max_rows = 336
    num_features = patient_data.shape[1]
    if len(patient_data) < max_rows:
        padding = np.zeros((max_rows - len(patient_data), num_features))
        patient_data = np.vstack((patient_data, padding))
    elif len(patient_data) > max_rows:
        patient_data = patient_data.iloc[:max_rows]

    patient_data = torch.tensor(patient_data).unsqueeze(0)
    
    model.eval()
    model.to(device)
    predictions = []
    probas = []

    with torch.no_grad():
        patient_data = patient_data.to(torch.float32).to(device)
        outputs, _, _, _, _, _, _ = model(patient_data, stage='test')
    
        _, predicted = torch.max(outputs, 1)
        probabilities = F.softmax(outputs, dim=1)
        
        predicted_class = predicted.detach().cpu().numpy()[0]

        predictions.append(predicted_class)
        probas.append(probabilities.detach().cpu().numpy()[0][predicted_class])

    return predictions, probas, patient_data

In [4]:
def evaluate():

    # Gathering Files
    # input_directory = os.path.join(project_root(), 'physionet.org', 'files', 'challenge-2019', '1.0.0', 'training','training_setA')
    
    input_directory = "/localscratch/neeresh/data/physionet2019/physionet.org/files/challenge-2019/1.0.0/training/training_setA/"
    # input_directory = "/localscratch/neeresh/data/physionet2019/physionet.org/files/challenge-2019/1.0.0/training/training_setB/"
    output_directory = "./predictions"

    # Find files.
    files = []
    for f in os.listdir(input_directory):
        if os.path.isfile(os.path.join(input_directory, f)) and not f.lower().startswith('.') and f.lower().endswith('psv'):
            files.append(f)
    
    # files.sort()
    if not os.path.isdir(output_directory):
        os.mkdir(output_directory)
    
    # Load Sepsis Model
    model = load_sepsis_model(d_input=d_input, d_channel=d_channel, d_output=d_output)

    # Iterate over files.
    print('Predicting sepsis labels...')
    num_files = len(files)
    for i, f in tqdm.tqdm(enumerate(files), desc="Remaining Files: ", total=num_files):
        # print('    {}/{}...'.format(i+1, num_files))

        # Load data.
        input_file = os.path.join(input_directory, f)
        data = load_challenge_data(input_file)

        # Make predictions.
        num_rows = len(data)  # Number of patient recordings
        scores = np.zeros(num_rows)
        labels = np.zeros(num_rows)
        
        for t in range(num_rows):
            current_data = data[:t+1]
            current_labels, current_score, data_df = get_sepsis_score(current_data, model)
            scores[t] = current_score[0]
            labels[t] = current_labels[0]
        
        output_file = os.path.join(output_directory, f)
        save_challenge_predictions(output_file, scores, labels)
    
    return model, data, current_data, data_df

model, data, current_data, data_df = evaluate()

Loading GTN model...
Model is set to eval() mode...
Model is on the deivce: cuda
Predicting sepsis labels...


Remaining Files:   0%|          | 35/20336 [00:35<5:39:49,  1.00s/it] 


KeyboardInterrupt: 

In [None]:
from utils.evaluate_sepsis_score import evaluate_sepsis_score

# Numbers of label and prediction files must be the same
evaluate_sepsis_score(label_directory='./labels/', prediction_directory='./predictions/')

- **1. Load data from training_setA/training_setB only.**
- **2. Divide it into train and test.**
- **3. Train the model and evaluate it on test.**
- **4. Use the evaluate method to get the predictions.**
- **5. Run the get_true_labels for training_setA files.**
- **6. run evaluate_sepsis_score.py use: from utils.evaluate_sepsis_score import evaluate_sepsis_score**

In [3]:
import pandas as pd 
import numpy as np

from utils.evaluate_sepsis_score import evaluate_sepsis_score
import os

In [7]:
auroc, auprc, accuracy, f_measure, normalized_observed_utility = evaluate_sepsis_score(label_directory='./labels/', prediction_directory='./predictions_weight_ls/')

print(f"Model's ability to distinguish between positive and negative classes (AUROC): {auroc}")
print(f"Model's precision-recall trade-off (AUPRC): {auprc}")
print(f"Model's overall accuracy: {accuracy}")
print(f"Model's balance between precision and recall (F-measure): {f_measure}")
print(f"Normalized utility score: {normalized_observed_utility}")



Model's ability to distinguish between positive and negative classes (AUROC): 0.7421306751717418
Model's precision-recall trade-off (AUPRC): 0.0809671261481644
Model's overall accuracy: 0.15723062710781244
Model's balance between precision and recall (F-measure): 0.04804590176119706
Normalized utility score: -0.1468316803384114


In [8]:
auroc, auprc, accuracy, f_measure

(0.7421306751717418,
 0.0809671261481644,
 0.15723062710781244,
 0.04804590176119706)

In [1]:
import pandas as pd
import os
from utils.path_utils import project_root

def initialize_experiment(data_file=None):

    if data_file is not None:
        data_file = "training_ffill_bfill_zeros.pickle"
    data_file = "final_dataset.pickle"

    print(f"Dataset used: {data_file}")

    # [[patient1], [patient2], [patient3], ..., [patientN]]
    training_examples = pd.read_pickle(os.path.join(project_root(), 'data', 'processed', data_file))

    with open(os.path.join(project_root(), 'data', 'processed', 'lengths.txt')) as f:
        lengths_list = [int(length) for length in f.read().splitlines()]
    with open(os.path.join(project_root(), 'data', 'processed', 'is_sepsis.txt')) as f:
        is_sepsis = [int(is_sep) for is_sep in f.read().splitlines()]

    return training_examples, lengths_list, is_sepsis

training_examples, lengths_list, is_sepsis = initialize_experiment()

Dataset used: final_dataset.pickle


In [2]:
from utils.loader import make_loader
train_loader, test_loader, train_indicies, test_indicies = make_loader(training_examples, lengths_list, is_sepsis, 128, mode='padding')

Padding...: 100%|██████████| 16268/16268 [00:27<00:00, 590.89it/s]
Padding...: 100%|██████████| 4067/4067 [00:06<00:00, 587.95it/s]
