# Notebook for testing mutlimodal capability of Gemma

In [27]:
import pickle
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from focal_loss.focal_loss import FocalLoss
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os
from pytictoc import TicToc
from utils import *
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Filepath to embeddings
fname = "/mnt/mimic/data/HAIM/mimic_extras/embeddings.csv"

# YES-TOKEN: 3276
# NO-TOKEN: 956

In [28]:
quantization_config = BitsAndBytesConfig(load_in_4bit=True, 
                                         bnb_4bit_use_double_quant=True,
                                         bnb_4bit_quant_type="nf4",
                                         bnb_4bit_compute_dtype=torch.bfloat16)

tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b")
gemma = AutoModelForCausalLM.from_pretrained("google/gemma-2b", device_map="auto", quantization_config=quantization_config)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Setup functions for training

*Main*

In [24]:
input_text = "Based on the following image, output yes if the patient is likely to die and no otherwise."
input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
word_embs = gemma.get_input_embeddings().weight[input_ids.input_ids].to("cuda")

In [39]:
batch_size = 8
x_train, x_val, _, y_train, y_val, _ = data_split(df, pkl_list)

np.random.seed(42)

x_train_small = select_random_subset(x_train)
y_train_small = select_random_subset(y_train)

x_val_small = select_random_subset(x_val)
y_val_small = select_random_subset(y_val)

#x_train_small, y_train_small = extract_points(x_train, y_train, 100)
#x_val_small, y_val_small = extract_points(x_val, y_val, 20)

train_set = CustomDataset(x_train_small, y_train_small)
val_set = CustomDataset(x_val_small, y_val_small)

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=5)
val_loader = DataLoader(val_set, batch_size=batch_size, num_workers=5)

w0 = len(y_train_small)/(2*sum(y_train_small == 0))
w1 = len(y_train_small)/(2*sum(y_train_small == 1))
weights = torch.tensor([w0, w1], dtype = torch.float).to("cuda")

model = ProjectionNN(embedding_size, projection_size)
optimizer = optim.Adam(model.parameters(), lr=0.0003)
loss_fn = nn.CrossEntropyLoss(weight=weights)
#loss_fn = FocalLoss(gamma=5)

num_epochs = 10

fine_tuned, train_losses, train_accs, val_losses, val_accs = training_loop(model, gemma, optimizer, loss_fn, train_loader, val_loader, num_epochs)

# Save model and results
folder = 'results_balanced_bce_001'
torch.save(fine_tuned, f"{folder}/finetuned.pth")

with open(f"{folder}/train_losses.pkl", 'wb') as f1:
    pickle.dump(train_losses, f1)

with open(f"{folder}/train_accs.pkl", 'wb') as f2:
    pickle.dump(train_accs, f2)

with open(f"{folder}/val_losses.pkl", 'wb') as f3:
    pickle.dump(val_losses, f3)

with open(f"{folder}/val_accs.pkl", 'wb') as f4:
    pickle.dump(val_accs, f4)



Starting training
Epoch 1/10: Train loss: 1.501, Train acc.: 0.808, Val. loss: 0.772, Val. acc.: 0.884
Epoch 2/10: Train loss: 0.855, Train acc.: 0.871, Val. loss: 0.698, Val. acc.: 0.952
Epoch 3/10: Train loss: 0.731, Train acc.: 0.882, Val. loss: 0.637, Val. acc.: 0.912
Epoch 4/10: Train loss: 0.672, Train acc.: 0.893, Val. loss: 0.598, Val. acc.: 0.937
Epoch 5/10: Train loss: 0.617, Train acc.: 0.900, Val. loss: 0.629, Val. acc.: 0.925
Epoch 6/10: Train loss: 0.590, Train acc.: 0.916, Val. loss: 0.564, Val. acc.: 0.886
Epoch 7/10: Train loss: 0.542, Train acc.: 0.920, Val. loss: 0.521, Val. acc.: 0.939
Epoch 8/10: Train loss: 0.559, Train acc.: 0.915, Val. loss: 0.558, Val. acc.: 0.963
Epoch 9/10: Train loss: 0.561, Train acc.: 0.918, Val. loss: 0.525, Val. acc.: 0.938
Epoch 10/10: Train loss: 0.545, Train acc.: 0.930, Val. loss: 0.525, Val. acc.: 0.943


RuntimeError: Parent directory results_balanced_bce_001 does not exist.

# Testing out gemma instruct on text generation

In [None]:
train, _, _, _ = data_split(df, pkl_list)

In [None]:
model = ProjectionNN().to('cuda')
emb = torch.tensor(train[0]).float().to(device='cuda')
projected = model(emb).to(device='cuda').to(torch.float16)
concatted = torch.cat((word_embs, projected), dim=1).to(torch.float16)
test = custom_output(concatted,gemma)
print(torch.softmax(test, dim=-1))