In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install bayesian-optimization
from bayes_opt import BayesianOptimization
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, Subset, WeightedRandomSampler
from torch.nn.utils.rnn import pack_sequence
from sklearn.metrics import f1_score,roc_auc_score
import torch.optim as optim
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.metrics import precision_recall_fscore_support as score

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting bayesian-optimization
  Downloading bayesian_optimization-1.4.3-py3-none-any.whl (18 kB)
Collecting colorama>=0.4.6
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama, bayesian-optimization
Successfully installed bayesian-optimization-1.4.3 colorama-0.4.6


In [None]:
class VitalSignsDataset(Dataset):
    def __init__(self, slice_size, method):
        self.method = method
        if self.method == 'train':        
          self.df = pd.read_csv('/content/drive/MyDrive/DataScience/project/no_additional_features.zip (Unzipped Files)/train_set_interpolation_with_multivariate.csv')
        elif self.method =='test':
          self.df = pd.read_csv('/content/drive/MyDrive/DataScience/project/no_additional_features.zip (Unzipped Files)/test_set_interpolation_with_multivariate.csv')
        elif self.method =='val':
          self.df = pd.read_csv('/content/drive/MyDrive/DataScience/project/no_additional_features.zip (Unzipped Files)/val_set_interpolation_with_multivariate.csv')

        self.slice_size = slice_size
        index_conversion = self.df[
            self.df.groupby("Patient_ID").cumcount(ascending=False) >= self.slice_size - 1
        ]
        index_conversion = index_conversion.reset_index()
        self.idx_to_idx = index_conversion[["index"]].to_numpy().reshape(-1)

        # self.vitals = self.df.drop(
        #     # ["Patient_ID", "SepsisLabel", "ICULOS", "HospAdmTime", "Gender", "Age"],
        #     ["SepsisLabel", "ICULOS", "HospAdmTime", "Gender", "Age"],
        #     axis=1,
        # ).to_numpy(np.float32)
        self.vitals = self.df.drop(["SepsisLabel"],axis=1).to_numpy(np.float32)
        self.labels = self.df[["SepsisLabel"]].to_numpy(np.float32)

    def __len__(self):
        return len(self.idx_to_idx)

    def __getitem__(self, idx):
        index = self.idx_to_idx[idx]
        return (
            self.vitals[(index) : (index + self.slice_size)],
            self.labels[index + self.slice_size - 1],
        )

    def get_labels(self):
        # x = [
        #     ([0 for x in range(0, i)] + [1 for x in range(i, self.slice_size)])
        #     for i in range(0, self.slice_size + 1)
        # ]
        # return ["".join([str(i) for i in l]) for l in x]
        return [0, 1]

In [None]:
class TimeSeriesModel(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.lstm = nn.LSTM(
            input_size=input_size,
            hidden_size=40,
            num_layers=2,
            batch_first=True,
        )
        self.fc1 = nn.Linear(40, 1)

    def forward(self, x):
        output, (h_n, c_n) = self.lstm(x)
        output = self.fc1(output[:, -1])
        return output

In [None]:
def compute_prediction_utility(labels, predictions, dt_early=-12, dt_optimal=-6, dt_late=3.0, max_u_tp=1, min_u_fn=-2, u_fp=-0.05, u_tn=0):
    # Check inputs for errors.
    # Does the patient eventually have sepsis?
    if np.any(labels):
        is_septic = True
        t_sepsis = np.argmax(labels) - dt_optimal 
    else:
        is_septic = False
        t_sepsis = float('inf')

    n = len(labels)

    # Define slopes and intercept points for utility functions of the form
    # u = m * t + b.
    m_1 = float(max_u_tp) / float(dt_optimal - dt_early)
    b_1 = -m_1 * dt_early
    m_2 = float(-max_u_tp) / float(dt_late - dt_optimal)
    b_2 = -m_2 * dt_late
    m_3 = float(min_u_fn) / float(dt_late - dt_optimal)
    b_3 = -m_3 * dt_optimal

    # Compare predicted and true conditions.
    u = np.zeros(n)
    for t in range(n):
        if t <= t_sepsis + dt_late:
            # TP
            if is_septic and predictions[t]:
                if t <= t_sepsis + dt_optimal:
                    u[t] = max(m_1 * (t - t_sepsis) + b_1, u_fp)
                elif t <= t_sepsis + dt_late:
                    u[t] = m_2 * (t - t_sepsis) + b_2
            # FP
            elif not is_septic and predictions[t]:
                u[t] = u_fp
            # FN
            elif is_septic and not predictions[t]:
                if t <= t_sepsis + dt_optimal:
                    u[t] = 0
                elif t <= t_sepsis + dt_late:
                    u[t] = m_3 * (t - t_sepsis) + b_3
            # TN
            elif not is_septic and not predictions[t]:
                u[t] = u_tn

    # Find total utility for patient.
    return np.sum(u)

In [None]:
def Uscore(y_pred, y_actual, X_actual):
  # Group Patients by ID
  val_y_withID = y_actual.to_frame().join(X_actual['Patient_ID'])
  grouped = val_y_withID.groupby('Patient_ID').groups
  u_list=[]

  # Initilalise Utilities
  num_patients = val_y_withID['Patient_ID'].nunique()

  observed_utilities = np.zeros(num_patients)
  best_utilities = np.zeros(num_patients)
  inaction_utilities = np.zeros(num_patients)

  k = 0

  for id, idx in grouped.items():
    patient_actual = y_actual[idx[0]:idx[-1]+1]
    patient_pred = y_pred[idx[0]:idx[-1]+1]

    best_predictions = np.zeros(len(patient_actual))
    inaction_predictions = np.zeros(len(patient_actual))

    if np.any(patient_actual):
      t_sepsis = np.argmax(patient_actual) + 6
      best_predictions[max(0, t_sepsis - 12) : min(t_sepsis + 3 + 1, len(best_predictions))] = 1

    
    observed_utilities[k] = compute_prediction_utility(patient_actual,patient_pred)
    best_utilities[k] = compute_prediction_utility(patient_actual,best_predictions)
    inaction_utilities[k] = compute_prediction_utility(patient_actual, inaction_predictions)

    k += 1 

  unnormalized_observed_utility = np.sum(observed_utilities)
  unnormalized_best_utility = np.sum(best_utilities)
  unnormalized_inaction_utility = np.sum(inaction_utilities)

  return (unnormalized_observed_utility - unnormalized_inaction_utility) / (unnormalized_best_utility - unnormalized_inaction_utility)

In [None]:
# batch size 1024, epochs 50
def rf_val(batch_size, epochs, Window_Size, hiddensize, learningrate):

    batch_size = int(batch_size)
    epochs= int(epochs)
    Window_Size = int(Window_Size)
    hiddensize = int(hiddensize)

      


    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


    train_dataset = VitalSignsDataset(slice_size=Window_Size, method = "train")
    test_dataset = VitalSignsDataset(slice_size=Window_Size, method = "test")
    # val_dataset = VitalSignsDataset(slice_size=Window_Size, method = "test")

    labels = [int(label[0]) for _, label in train_dataset]
    # class_weights = {"0": class_Weights_nonspesis,"1":class_Weights_spesis}
    class_weights = {"0": 0.023471489401092338,"1":0.9765285105989077}

    sample_weights = [class_weights[str(label)] for label in labels]
    sampler = WeightedRandomSampler(sample_weights, len(sample_weights))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, sampler=sampler)
    test_loader = DataLoader(test_dataset, batch_size=1)
    # val_loader = DataLoader(val_dataset, batch_size=1)

    model = TimeSeriesModel(input_size=36,hidden_size= hiddensize)
    model.to(device)
    # print(model)

    optimizer = optim.Adam(model.parameters(), lr=learningrate, betas=(0.9, 0.999))
    loss_fn = nn.BCEWithLogitsLoss()

    n_epochs = 50
    for epoch in range(n_epochs):
        # print(f"Epoch: {epoch}")

        model.train()
        for i, (X_batch, y_batch) in enumerate(train_loader):
            X_batch = X_batch.to(device)
            y_batch = y_batch.to(device)

            
            y_pred = model(X_batch)
            loss = loss_fn(y_pred, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()


    test_df= VitalSignsDataset(slice_size=1,method = "test")
    # df = pd.read_csv('/content/drive/MyDrive/DataScience/project/no_additional_features.zip (Unzipped Files)/val_set_interpolation_with_multivariate.csv')
    df = pd.read_csv('/content/drive/MyDrive/DataScience/project/no_additional_features.zip (Unzipped Files)/test_set_interpolation_with_multivariate.csv')
    y_test = df.SepsisLabel
    X_actual = df.drop('SepsisLabel',axis=1)
    test_loader = DataLoader(test_df, batch_size=len(y_test))
    inputs, classes = next(iter(test_loader))

    model = model.to(device)
    inputs=inputs.to(device)

    y_pred=model(inputs)

    y_pred = (y_pred.cpu().flatten() > torch.Tensor([0.5])).float()
    
    # return -(1-roc_auc_score(y_test, preds))
    return -(1-Uscore(y_pred, y_test, X_actual))

# Main

In [None]:

# class_Weights_spesis, class_Weights_nonspesis,batch_size, epochs, Window_Size
# set bounds for search
pbounds = {
        'batch_size': (12, 60),
        'epochs': (48, 100),
       'Window_Size': (10, 15),
        'hiddensize' : (30,60),
        'learningrate': (5e-6,1e-5)
    }

optimizer = BayesianOptimization(
    f=rf_val,
    pbounds=pbounds,
    random_state=42
)

optimizer.maximize(init_points=10, n_iter=100)

|   iter    |  target   | Window... | batch_... |  epochs   | hidden... | learni... |
-------------------------------------------------------------------------------------


KeyboardInterrupt: ignored