In [1]:
import pickle
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
import torch.nn as nn
import torch.optim as optim
from focal_loss.focal_loss import FocalLoss
from torch.utils.data import Dataset, DataLoader, TensorDataset, WeightedRandomSampler, RandomSampler
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"
from big_utils import *

# Filepath to embeddings
fname = '/mnt/mimic/data/HAIM/mimic_extras/embeddings.csv'


In [2]:
quantization_config = BitsAndBytesConfig(load_in_4bit=True, 
                                         bnb_4bit_use_double_quant=True,
                                         bnb_4bit_quant_type="nf4",
                                         bnb_4bit_compute_dtype=torch.bfloat16)

tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
gemma = AutoModelForCausalLM.from_pretrained("google/gemma-2b", device_map="auto", quantization_config=quantization_config)

# Read data & extract labels and features
df = pd.read_csv(fname)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
# Load train/val sets and create data loaders
batch_size = 8

Data = DataSplit(df)
Data.split_data('all')

X, V = Data.get_data()

torch.manual_seed(42)

train_set = CustomDataset(X.values.tolist(), Data.y_train.tolist())
val_set = CustomDataset(V.values.tolist(), Data.y_val.tolist())

transposed_Y = list(map(list, zip(*Data.y_train.tolist())))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_death_small48['y'] = 1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_alive_big48['y'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_death_big48['y'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the

In [4]:
print(Data.y_train.tolist()[:10])


[[nan, nan, nan, nan, 1.0, nan, nan, nan, 1.0, 1.0, 0, 0], [nan, nan, nan, nan, nan, nan, nan, 0.0, 1.0, 1.0, 0, 0], [nan, nan, nan, nan, nan, 1.0, nan, 0.0, 1.0, nan, 0, 0], [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, 0, 0], [nan, nan, nan, nan, nan, 1.0, nan, 0.0, 1.0, nan, 0, 0], [nan, nan, nan, nan, nan, nan, nan, 0.0, nan, nan, 0, 0], [nan, nan, nan, nan, nan, 1.0, nan, 0.0, -1.0, 0.0, 0, 0], [nan, nan, nan, nan, nan, 1.0, nan, 0.0, 0.0, 1.0, 0, 0], [nan, nan, nan, nan, 0.0, nan, 1.0, nan, 1.0, -1.0, 0, 0], [nan, nan, -1.0, nan, nan, 1.0, 1.0, 0.0, nan, nan, 0, 0]]


In [5]:
for y in transposed_Y:
    print(y[:10])

[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
[nan, nan, nan, nan, nan, nan, nan, nan, nan, -1.0]
[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]
[1.0, nan, nan, nan, nan, nan, nan, nan, 0.0, nan]
[nan, nan, 1.0, nan, 1.0, nan, 1.0, 1.0, nan, 1.0]
[nan, nan, nan, nan, nan, nan, nan, nan, 1.0, 1.0]
[nan, 0.0, 0.0, nan, 0.0, 0.0, 0.0, 0.0, nan, 0.0]
[1.0, 1.0, 1.0, nan, 1.0, nan, -1.0, 0.0, 1.0, nan]
[1.0, 1.0, nan, nan, nan, nan, 0.0, 1.0, -1.0, nan]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [6]:
weight_per_class = []
for y in transposed_Y[:-2]:
    y = torch.tensor(y)
    mask = ~torch.isnan(y)
    w0 = len(y[mask])/(2*sum(y[mask] == 0))
    w1 = len(y)/(2*sum(y == 1))
    w2 = len(y)/(2*sum(y == -1))
    weight_per_class.append(torch.tensor([w0, w1, w2], dtype = torch.float).to("cuda"))

for y in transposed_Y[-2:]:
    y = torch.tensor(y)
    w0 = len(y)/(2*sum(y == 0))
    w1 = len(y)/(2*sum(y == 1))
    weight_per_class.append(torch.tensor([w0, w1], dtype = torch.float).to("cuda"))
print(weight_per_class)

[tensor([ 10.3571,  29.7640, 566.4758], device='cuda:0'), tensor([  9.1859,  30.1991, 182.9245], device='cuda:0'), tensor([3.7322, 9.3533, 6.3534], device='cuda:0'), tensor([ 3.4092,  5.7080, 19.2552], device='cuda:0'), tensor([1.6664, 5.5757, 6.3904], device='cuda:0'), tensor([20.9051,  1.5318,  9.9890], device='cuda:0'), tensor([13.8496,  1.6084, 24.7509], device='cuda:0'), tensor([ 0.6262,  7.2866, 54.1163], device='cuda:0'), tensor([1.6278, 2.3627, 6.4884], device='cuda:0'), tensor([ 2.7703,  1.6306, 11.3699], device='cuda:0'), tensor([0.5517, 5.3336], device='cuda:0'), tensor([ 0.5127, 20.1385], device='cuda:0')]


In [7]:
sampler = RandomSampler(train_set, replacement=False)

train_loader = DataLoader(train_set, batch_size=batch_size, sampler=sampler, num_workers=5)
val_loader = DataLoader(val_set, batch_size=batch_size, num_workers=5)

In [8]:
# Setting model and hyperparameters
vd_model = AutoEncoder(1024,2048)
ts_model = AutoEncoder(451,2048)
n_rad_model = AutoEncoder(768,2048)
vd_optimizer = optim.Adam(vd_model.parameters(), lr=0.0005, weight_decay=0.0003)
ts_optimizer = optim.Adam(ts_model.parameters(), lr=0.0005, weight_decay=0.0003)
n_rad_optimizer = optim.Adam(n_rad_model.parameters(), lr=0.003, weight_decay=0.003)
optimizers = [vd_optimizer, ts_optimizer, n_rad_optimizer]
#optimizers = [n_rad_optimizer]

In [9]:
loss_mse = nn.MSELoss()
loss_fns = []
for weight in weight_per_class:
    loss_fns.append(nn.CrossEntropyLoss(weight=weight))

In [10]:
print(vd_model)

AutoEncoder(
  (encoder): Sequential(
    (0): Linear(in_features=1024, out_features=2048, bias=True)
    (1): ReLU()
  )
  (decoder): Sequential(
    (0): Linear(in_features=2048, out_features=1024, bias=True)
    (1): ReLU()
  )
)


In [11]:
n_rad_model.to('cuda')
for batch_index, (x, y) in enumerate(train_loader, 1):
        #print('stuck')
        inputs, labels = x, y.to('cuda')
        vd_inputs = inputs['n_rad'].to('cuda')
        has_nan = torch.isnan(vd_inputs).any()
        if has_nan:
                print('orig', has_nan)
                break
        print(vd_inputs.shape)
        enc = n_rad_model.encoder(vd_inputs)
        has_nan = torch.isnan(enc).any()
        if has_nan:
                print('enc',has_nan)
                break
        print(enc)

torch.Size([8, 768])
tensor([[0.0074, 0.0000, 0.0000,  ..., 0.0000, 0.0271, 0.1658],
        [0.0720, 0.0000, 0.1096,  ..., 0.0000, 0.1047, 0.0000],
        [0.1739, 0.0940, 0.0000,  ..., 0.0000, 0.0793, 0.0000],
        ...,
        [0.1107, 0.1327, 0.0000,  ..., 0.0010, 0.0852, 0.0000],
        [0.1733, 0.0655, 0.0000,  ..., 0.0934, 0.1671, 0.0352],
        [0.1070, 0.0841, 0.0000,  ..., 0.0000, 0.0449, 0.0000]],
       device='cuda:0', grad_fn=<ReluBackward0>)
torch.Size([8, 768])
tensor([[0.0503, 0.1842, 0.0275,  ..., 0.0553, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0010, 0.0133],
        [0.0000, 0.1979, 0.2553,  ..., 0.0958, 0.0000, 0.0000],
        ...,
        [0.0886, 0.0704, 0.0000,  ..., 0.0000, 0.0263, 0.0000],
        [0.2032, 0.0000, 0.1534,  ..., 0.1251, 0.0459, 0.0735],
        [0.0616, 0.0000, 0.0000,  ..., 0.0000, 0.2095, 0.2468]],
       device='cuda:0', grad_fn=<ReluBackward0>)
torch.Size([8, 768])
tensor([[0.1316, 0.0506, 0.0418,  ..., 0.00

In [12]:
num_epochs = 10
beta = 0.1

fine_tuned_vd, fine_tuned_ts, fine_tuned_n_rad, train_losses, val_losses = training_loop(vd_model, ts_model, n_rad_model, optimizers, loss_mse, loss_fns, train_loader, val_loader, num_epochs, gemma, beta)

Starting training
tensor(True, device='cuda:0')
tensor(True, device='cuda:0')
tensor(False, device='cuda:0')
tensor(False, device='cuda:0')
torch.Size([8, 768])
torch.Size([8, 768])


RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn