In [None]:
# PEFT LAMA with MIMIC dataset

In [None]:
! pip install transformers datasets torch peft accelerate bitsandbytes


Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.45.2-py3-none-manylinux_2_24_x86_64.whl.metadata (5.8 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_6

In [None]:
import numpy as np
import pandas as pd

import torch

import matplotlib.pyplot as plt
import seaborn as sns

from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model
from datasets import load_dataset


In [None]:
# HuggingFace login

!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: fineGrained).
The token `Colab_MIMIC` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `Colab_

In [None]:
# load model

model_name = "meta-llama/Llama-2-7b-hf"
# define tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
# add padding token
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_name,
                                             device_map="auto",
                                             load_in_4bit=True,
                                             torch_dtype=torch.float16)




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

In [None]:
# load dataset
dataset = load_dataset("ChayanM/mimic-llama2-150k", split="train")

dataset_train, dataset_test = dataset.train_test_split(test_size=0.1, shuffle=True, seed=42).values()


In [None]:
# Preprocessing function for "Findings"
def preprocess_function(examples):
    text = examples['FINDINGS:'] if examples['FINDINGS:'] else "No findings available."
    encoding = tokenizer(text, padding="max_length", truncation=True, max_length=512)
    encoding['labels'] = encoding['input_ids'].copy()

    return encoding

tokenized_datasets_train = dataset_train.map(preprocess_function, batched=True, remove_columns=dataset.column_names)
tokenized_datasets_test = dataset_test.map(preprocess_function, batched=True, remove_columns=dataset.column_names)


Map:   0%|          | 0/134780 [00:00<?, ? examples/s]

Map:   0%|          | 0/14976 [00:00<?, ? examples/s]

In [None]:
# LoRA Configuration
lora_config = LoraConfig(
    r=8,  # Low-rank adaptation
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["q_proj", "v_proj"]
)

model = get_peft_model(model, lora_config)


In [None]:
# train

training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=3,
    logging_steps=10,
    save_steps=1000,
    save_total_limit=2,
    fp16=True
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets_train,
    eval_dataset=tokenized_datasets_test,
    tokenizer=tokenizer,
)

trainer.train()

model.save_pretrained("llama_mimic_finetuned")
tokenizer.save_pretrained("llama_mimic_finetuned")


  trainer = Trainer(


Step,Training Loss
10,19.4279
20,1.2161
30,0.5218
40,0.4447
50,0.3813
60,0.3223
70,0.3179
80,0.2674
90,0.2727
100,0.2945


KeyboardInterrupt: 

In [None]:
# evaluate

model.eval()

result = trainer.evaluate()
result

Step,Training Loss
10,19.4279
20,1.2161
30,0.5218
40,0.4447
50,0.3813
60,0.3223
70,0.3179
80,0.2674
90,0.2727
100,0.2945


KeyboardInterrupt: 

In [None]:
# save results
!tar -cvf ./results/ckpnt1000.tar ./results/checkpoint-1000/*

./results/checkpoint-1000/adapter_config.json
./results/checkpoint-1000/adapter_model.safetensors
./results/checkpoint-1000/optimizer.pt
./results/checkpoint-1000/README.md
./results/checkpoint-1000/rng_state.pth
./results/checkpoint-1000/scheduler.pt
./results/checkpoint-1000/special_tokens_map.json
./results/checkpoint-1000/tokenizer_config.json
./results/checkpoint-1000/tokenizer.json
./results/checkpoint-1000/tokenizer.model
./results/checkpoint-1000/trainer_state.json
./results/checkpoint-1000/training_args.bin


In [None]:
dataset_test[0]

{'FINDINGS:': '<s>[INST] What are the key findings of this report? [INST] there is a moderate left pleural effusion. there is increased density in the retrocardiac area consistent with partial atelectasis or consolidation. the right lung is clear. the right costophrenic sulcus is blunted. mediastinal structures are unremarkable. the bony thorax is grossly intact </s>'}

In [None]:
# inference

input_text = 'there is a moderate left pleural effusion. there is increased density in the retrocardiac area consistent with partial atelectasis or consolidation.'
input_ids = tokenizer.encode(input_text, return_tensors="pt").to(model.device)

# Generate predictions
outputs = model.generate(input_ids, max_length=100)
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(generated_text)



there is a moderate left pleural effusion. there is increased density in the retrocardiac area consistent with partial atelectasis or consolidation. there is no focal consolidation concerning for pneumonia. there is no pneumothorax. the cardiomediastinal silhouette is within normal limits. there is no acute osseous abnormality. 
