In [1]:
import pickle
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
import torch.nn as nn
import torch.optim as optim
from focal_loss.focal_loss import FocalLoss
from torch.utils.data import Dataset, DataLoader, TensorDataset, WeightedRandomSampler, RandomSampler
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
from big_utils import *

# Filepath to embeddings
fname = '/mnt/mimic/data/HAIM/mimic_extras/embeddings.csv'


In [2]:
quantization_config = BitsAndBytesConfig(load_in_4bit=True, 
                                         bnb_4bit_use_double_quant=True,
                                         bnb_4bit_quant_type="nf4",
                                         bnb_4bit_compute_dtype=torch.bfloat16)

tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
gemma = AutoModelForCausalLM.from_pretrained("google/gemma-2b", device_map="auto", quantization_config=quantization_config)

# Read data & extract labels and features
df = pd.read_csv(fname)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
# Load train/val sets and create data loaders
batch_size = 8

Data = DataSplit(df)
Data.split_data('all')

X, V = Data.get_data()

torch.manual_seed(42)

train_set = CustomDataset(X.values.tolist(), Data.y_train.tolist())
val_set = CustomDataset(V.values.tolist(), Data.y_val.tolist())

transposed_Y = list(map(list, zip(*Data.y_train.tolist())))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_death_small48['y'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_alive_big48['y'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_death_big48['y'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the

In [4]:
print(Data.y_train.tolist()[:10])


[[nan, nan, nan, nan, nan, 1.0, nan, 0.0, 1.0, nan, 0, 0], [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 0, 0], [nan, nan, nan, nan, nan, 1.0, nan, 0.0, 1.0, nan, 0, 0], [nan, nan, nan, nan, nan, nan, nan, 0.0, nan, nan, 0, 0], [nan, nan, nan, nan, nan, 1.0, nan, 0.0, -1.0, 0.0, 0, 0], [nan, nan, nan, nan, nan, 1.0, nan, 0.0, 0.0, 1.0, 0, 0], [nan, nan, nan, nan, 0.0, nan, 1.0, nan, 1.0, -1.0, 0, 0], [nan, nan, -1.0, nan, nan, 1.0, 1.0, 0.0, nan, nan, 0, 0], [nan, nan, nan, nan, nan, 1.0, nan, 0.0, 1.0, nan, 0, 0], [nan, nan, nan, nan, nan, 1.0, nan, 0.0, 1.0, 0.0, 0, 0]]


In [5]:
for y in transposed_Y:
    print(y[:10])

[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan, nan, -1.0, nan, nan]
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan, 0.0, nan, nan, nan]
[1.0, nan, 1.0, nan, 1.0, 1.0, nan, 1.0, 1.0, 1.0]
[nan, nan, nan, nan, nan, nan, 1.0, 1.0, nan, nan]
[0.0, nan, 0.0, 0.0, 0.0, 0.0, nan, 0.0, 0.0, 0.0]
[1.0, nan, 1.0, nan, -1.0, 0.0, 1.0, nan, 1.0, 1.0]
[nan, nan, nan, nan, 0.0, 1.0, -1.0, nan, nan, 0.0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [6]:
weight_per_class = []
for y in transposed_Y[:-2]:
    y = torch.tensor(y)
    w0 = len(y)/(2*sum(y == 0))
    w1 = len(y)/(2*sum(y == 1))
    w2 = len(y)/(2*sum(y == -1))
    weight_per_class.append(torch.tensor([w0, w1, w2], dtype = torch.float).to("cuda"))

for y in transposed_Y[-2:]:
    y = torch.tensor(y)
    w0 = len(y)/(2*sum(y == 0))
    w1 = len(y)/(2*sum(y == 1))
    weight_per_class.append(torch.tensor([w0, w1], dtype = torch.float).to("cuda"))
print(weight_per_class)

[tensor([531.0000,  28.2259, 566.4000], device='cuda:0'), tensor([447.1579,  29.7063, 189.8547], device='cuda:0'), tensor([24.2570,  9.5087,  6.3724], device='cuda:0'), tensor([26.6124,  5.7757, 19.4862], device='cuda:0'), tensor([7.0215, 5.6005, 6.4645], device='cuda:0'), tensor([54.2875,  1.5393,  9.8220], device='cuda:0'), tensor([40.7971,  1.6119, 24.8785], device='cuda:0'), tensor([ 1.6313,  6.9998, 55.2585], device='cuda:0'), tensor([3.9512, 2.3762, 6.4547], device='cuda:0'), tensor([ 6.2528,  1.6355, 11.2530], device='cuda:0'), tensor([0.5520, 5.3042], device='cuda:0'), tensor([ 0.5133, 19.2981], device='cuda:0')]


In [7]:
sampler = RandomSampler(train_set, replacement=False)

train_loader = DataLoader(train_set, batch_size=batch_size, sampler=sampler, num_workers=5)
val_loader = DataLoader(val_set, batch_size=batch_size, num_workers=5)

In [8]:
# Setting model and hyperparameters
vd_model = AutoEncoder(1024,2048)
ts_model = AutoEncoder(451,2048)
n_rad_model = AutoEncoder(768,2048)
vd_optimizer_bce = optim.Adam(vd_model.parameters(), lr=0.0005, weight_decay=0.0003)
ts_optimizer_bce = optim.Adam(ts_model.parameters(), lr=0.0005, weight_decay=0.0003)
n_rad_optimizer_bce = optim.Adam(n_rad_model.parameters(), lr=0.003, weight_decay=0.003)
vd_optimizer_mse = optim.Adam(vd_model.parameters(), lr=0.0005, weight_decay=0.0003)
ts_optimizer_mse = optim.Adam(vd_model.parameters(), lr=0.0005, weight_decay=0.0003)
n_rad_optimizer_mse = optim.Adam(vd_model.parameters(), lr=0.003, weight_decay=0.003)
optimizers = [vd_optimizer_bce, ts_optimizer_bce, n_rad_optimizer_bce, vd_optimizer_mse, ts_optimizer_mse, n_rad_optimizer_mse]
#optimizers = [vd_optimizer, ts_optimizer, n_rad_optimizer]
optimizers = [n_rad_optimizer_bce, n_rad_optimizer_mse]

In [9]:
loss_mse = nn.MSELoss()
loss_fns = []
for weight in weight_per_class:
    loss_fns.append(nn.CrossEntropyLoss(weight=weight))

In [10]:
print(vd_model)

AutoEncoder(
  (encoder): Sequential(
    (0): Linear(in_features=1024, out_features=2048, bias=True)
    (1): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=2048, out_features=1024, bias=True)
    (1): ReLU()
  )
)


In [11]:
# vd_model.to('cuda')
# for batch_index, (x, y) in enumerate(train_loader, 1):
#         #print('stuck')
#         inputs, labels = x, y.to('cuda')
#         vd_inputs = inputs['vd'].to('cuda')
#         has_nan = torch.isnan(vd_inputs).any()
#         if has_nan:
#                 print('orig', has_nan)
#                 break
#         #print(vd_inputs.shape)
#         enc = vd_model.encoder(vd_inputs)
#         has_nan = torch.isnan(enc).any()
#         if has_nan:
#                 print('enc',has_nan)
#                 break
#         #print(enc)

In [12]:
num_epochs = 10
beta = 0.1

fine_tuned_vd, fine_tuned_ts, fine_tuned_n_rad, train_losses, val_losses = training_loop(vd_model, ts_model, n_rad_model, optimizers, loss_mse, loss_fns, train_loader, val_loader, num_epochs, gemma, beta)

Starting training
tensor(7.0012, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(0.2924, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(7.0305, device='cuda:0', grad_fn=<AddBackward0>)
True
tensor(9.5531, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(0.2198, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(9.5751, device='cuda:0', grad_fn=<AddBackward0>)
True
tensor(8.9036, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(0.9636, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(9.0000, device='cuda:0', grad_fn=<AddBackward0>)
True
tensor(6.4226, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(0.5981, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(6.4825, device='cuda:0', grad_fn=<AddBackward0>)
True
tensor(3.8935, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(0.5545, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(3.9489, device='cuda:0', grad_fn=<AddBackward0>)
True
tensor(4.9673, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x7f9ff7584470>>
Traceback (most recent call last):
  File "/home/edgelab/miniconda3/envs/gemma/lib/python3.12/site-packages/ipykernel/ipkernel.py", line 770, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


tensor(2.1532, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(0.3828, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(2.1914, device='cuda:0', grad_fn=<AddBackward0>)
True
tensor(1.8883, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(1.2233, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(2.0106, device='cuda:0', grad_fn=<AddBackward0>)
True
tensor(2.4978, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(9.0313, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(3.4009, device='cuda:0', grad_fn=<AddBackward0>)
True
tensor(0.9757, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(0.9574, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(1.0714, device='cuda:0', grad_fn=<AddBackward0>)
True
tensor(1.1836, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(0.3836, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(1.2220, device='cuda:0', grad_fn=<AddBackward0>)
True
tensor(2.1644, device='cuda:0', grad_fn=<DivBackward0>)
True
tensor(1.3014, device='c