<a href="https://colab.research.google.com/github/pampamda/Chexpert_LLaVA-1.5-7B/blob/finetune/finetune_script/Unsloth_llava.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Fine Tune

In [None]:
from google.colab import drive
import os
from pathlib import Path

print("\nMounting Google Drive...")
drive.mount('/content/drive')


Mounting Google Drive...
Mounted at /content/drive


In [None]:
DRIVE_DATA_PATH = "/content/drive/MyDrive/Group_Project/IS"
CSV_FILE = "data/processed_chexpert_reports_2000.csv"
IMAGE_FOLDER = "data/filtered_chexpert_images_2000"
MODEL_FOLDER = "llava_finetuned_mvp2000_v2"
HF_FOLDER = "hf/llava_mvp2000_v2"

MODEL_ID = "llava-hf/llava-1.5-7b-hf"

csv_path = os.path.join(DRIVE_DATA_PATH, CSV_FILE)
image_folder_path = os.path.join(DRIVE_DATA_PATH, IMAGE_FOLDER)
model_output_path = os.path.join(DRIVE_DATA_PATH, MODEL_FOLDER)

hf_output_path = os.path.join(DRIVE_DATA_PATH, HF_FOLDER)

print("checkpoint folder: ", model_output_path)
print("hf folder: ", hf_output_path)

checkpoint folder:  /content/drive/MyDrive/Group_Project/IS/llava_finetuned_mvp2000_v2
hf folder:  /content/drive/MyDrive/Group_Project/IS/hf/llava_mvp2000_v2


In [None]:
!pip install -U transformers
!pip install peft
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

Collecting transformers
  Using cached transformers-4.57.1-py3-none-any.whl.metadata (43 kB)
Using cached transformers-4.57.1-py3-none-any.whl (12.0 MB)
[0mInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.56.2
    Uninstalling transformers-4.56.2:
      Successfully uninstalled transformers-4.56.2
Successfully installed transformers-4.57.1
[0mCollecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-f0kr6rh5/unsloth_3fd45a6a13374793b1b21061f886d592
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-f0kr6rh5/unsloth_3fd45a6a13374793b1b21061f886d592
  Resolved https://github.com/unslothai/unsloth.git to commit 3d98df6e0f7da49437731eadd5bde767fc1b20af
  Installing build dependencies ... [?25l[?25hd

In [None]:
import torch
from unsloth import FastLanguageModel

# Select model
print(f"\nUsing model: {MODEL_ID}")

max_seq_length = 1024 # Set max sequence length
dtype = torch.float16 # Set dtype

try:
    model, processor = FastLanguageModel.from_pretrained(
        model_name = MODEL_ID,
        max_seq_length = max_seq_length,
        dtype = dtype,
        load_in_4bit = True, # Use 4-bit quantization
        device_map="auto",
    )

    tokenizer = processor.tokenizer


    print("✅ Model loaded successfully with unsloth!")
    print("Model config vocab size:", model.config.vocab_size)
    print("Tokenizer vocab size:", tokenizer.vocab_size)
except Exception as e:
    print(f"❌ Model loading failed: {e}")
    import sys
    sys.exit(1)


Using model: llava-hf/llava-1.5-7b-hf
==((====))==  Unsloth 2025.10.3: Fast Clip patching. Transformers: 4.56.2.
   \\   /|    NVIDIA L4. Num GPUs = 1. Max memory: 22.161 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 8.9. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/4.04G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/136 [00:00<?, ?B/s]

processor_config.json:   0%|          | 0.00/173 [00:00<?, ?B/s]

chat_template.json:   0%|          | 0.00/701 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/505 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/41.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/552 [00:00<?, ?B/s]

✅ Model loaded successfully with unsloth!
Model config vocab size: 32064
Tokenizer vocab size: 32000


In [None]:
# Configure LoRA parameters with unsloth...
print("\nConfigure LoRA parameters with unsloth's LoraConfig...")

FastLoraConfig = None
try:
    # Attempt to import unsloth's LoraConfig
    from unsloth.lora import LoraConfig as FastLoraConfig
    print("Successfully imported LoraConfig from unsloth.lora.")
    using_unsloth_config = True
except ImportError:
    print("Could not import LoraConfig from unsloth.lora.")
    # Fallback to peft.LoraConfig
    try:
        from peft import LoraConfig as FastLoraConfig
        print("Falling back to using peft.LoraConfig.")
        using_unsloth_config = False
    except ImportError:
        print("FATAL ERROR: Could not import LoraConfig from either unsloth.lora or peft.")
        FastLoraConfig = None # Ensure FastLoraConfig is None on fatal failure
        using_unsloth_config = False # Indicate failure to use any config


if FastLoraConfig is not None:
    lora_config = FastLoraConfig(
        r=16,  # LoRA rank
        lora_alpha=32,  # LoRA alpha
        target_modules=["q_proj", "v_proj"], # only modify the attention layers
        lora_dropout=0.05,  # Dropout rate
        bias="none",  # don't train bias
        task_type="CAUSAL_LM"  # task type: causal language model
    )

    print("\nLoRA configuration details:")
    print(f"  - Using unsloth's config: {using_unsloth_config}")
    print(f"  - Rank (r): {lora_config.r}")
    print(f"  - Alpha: {lora_config.lora_alpha}")
    print(f"  - Target modules: {len(lora_config.target_modules)} layers")
    print(f"  - Dropout: {lora_config.lora_dropout}")
    print("\n✅ LoRA configuration prepared.")
else:
    print("\n❌ Failed to prepare LoRA configuration due to import errors.")



Configure LoRA parameters with unsloth's LoraConfig...
Could not import LoraConfig from unsloth.lora.
Falling back to using peft.LoraConfig.

LoRA configuration details:
  - Using unsloth's config: False
  - Rank (r): 16
  - Alpha: 32
  - Target modules: 2 layers
  - Dropout: 0.05

✅ LoRA configuration prepared.


In [None]:
from torch.utils.data import Dataset
from PIL import Image
import os
import torch

class ChestXrayDataset(Dataset):
    def __init__(self, data_list, processor, image_root="", max_length=1024):
        self.data = data_list
        self.processor = processor
        self.image_root = image_root
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        # get image path (supports either "image_path" or "image")
        img_path = item.get("image_path") or item.get("image")
        if self.image_root and not os.path.isabs(img_path):
            img_path = os.path.join(self.image_root, img_path)

        # load image
        image = Image.open(img_path).convert("RGB")

        # extract question and answer
        conversations = item['conversations']
        question = conversations[0]['value']
        answer = conversations[1]['value']

        # If question already contains "<image>", don't add another one.
        if "<image>" in question:
            prompt_text = f"User: {question}\nAssistant:"
        else:
            prompt_text = f"User: <image>\n{question}\nAssistant:"

        full_text = prompt_text + " " + answer

        # Use consistent tokenization settings for both prompt and full sequence
        prompt_tokens = self.processor(
            text=prompt_text,
            images=image,
            return_tensors="pt",
            padding="max_length",
            truncation=True,
            max_length=self.max_length
        )
        full_tokens = self.processor(
            text=full_text,
            images=image,
            return_tensors="pt",
            padding="max_length",
            truncation=True,
            max_length=self.max_length
        )

        # get tensors (remove batch dim and clone)
        prompt_input_ids = prompt_tokens["input_ids"][0].clone() # Clone here
        input_ids = full_tokens["input_ids"][0].clone() # Clone here
        attention_mask = full_tokens["attention_mask"][0].clone() # Clone here

        pixel_values = full_tokens.get("pixel_values")
        if pixel_values is not None:
            pixel_values = pixel_values[0].clone() # Clone here

        # compute prompt length robustly
        pad_token_id = self.processor.tokenizer.pad_token_id
        if pad_token_id is None:
            pad_token_id = self.processor.tokenizer.eos_token_id

        prompt_len = (prompt_input_ids != pad_token_id).sum().item()

        # create labels by masking prompt portion to -100
        labels = input_ids.clone() # Clone here
        if prompt_len > 0:
            labels[:prompt_len] = -100

        out = {
            "input_ids": input_ids,
            "attention_mask": attention_mask,
            "labels": labels
        }
        if pixel_values is not None:
            out["pixel_values"] = pixel_values

        return out

In [None]:
import json
from transformers import (
    TrainingArguments,
    Trainer
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

print("\nLoading training data and creating dataset...")
output_json = "/content/drive/MyDrive/Group_Project/IS/data/training_data_2000.json"

with open(output_json, 'r', encoding='utf-8') as f:
    training_data = json.load(f)
train_dataset = ChestXrayDataset(training_data, processor)


Loading training data and creating dataset...


In [None]:
if getattr(model, "peft_config", None) is None:
    print("\nApplying LoRA configuration to the model...")
    model = get_peft_model(model, lora_config)
    print("✅ LoRA applied successfully using get_peft_model!")
else:
    print("\nModel is already a PEFT model. Skipping get_peft_model.")

print("\nParameter statistics after applying LoRA:")
model.print_trainable_parameters()

OUTPUT_DIR = model_output_path

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=1, # Keep 1 epoch for MVP
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    learning_rate=2e-5,
    warmup_steps=100,
    fp16=True,
    logging_steps=5,
    save_strategy="epoch",
    optim="paged_adamw_32bit",
    load_best_model_at_end=False,
    report_to="none",
    remove_unused_columns=False,
)

print("\nTraining configuration:")
print(f"  - Training epochs: {training_args.num_train_epochs}")
print(f"  - Actual batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}")
print(f"  - Learning rate: {training_args.learning_rate}")
# Calculate total steps
total_steps = len(train_dataset) * training_args.num_train_epochs // (training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps)
print(f"  - Total training steps: ~{total_steps}")
print(f"  - Output directory: {OUTPUT_DIR}")

print("\nInstantiating Trainer...")
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    tokenizer=processor.tokenizer, # Use tokenizer from the unsloth processor
)

print("✅ Trainer created successfully with PEFT model and unsloth processor/tokenizer.")



Applying LoRA configuration to the model...
✅ LoRA applied successfully using get_peft_model!

Parameter statistics after applying LoRA:
trainable params: 9,961,472 || all params: 7,073,388,544 || trainable%: 0.1408

Training configuration:
  - Training epochs: 1
  - Actual batch size: 4
  - Learning rate: 2e-05
  - Total training steps: ~498
  - Output directory: /content/drive/MyDrive/Group_Project/IS/data/llava_finetuned_mvp2000_v2

Instantiating Trainer...
✅ Trainer created successfully with PEFT model and unsloth processor/tokenizer.


  trainer = Trainer(


In [None]:
print("\n" + "🚀" * 30)
print("Starting training!")
print("🚀" * 30)

print("\nEstimated training time (with unsloth acceleration):")
if 'total_steps' not in locals():
     print("Warning: total_steps variable not found. Recalculating...")
     # Recalculate total steps based on current training_args and train_dataset
     try:
         total_steps = len(train_dataset) * training_args.num_train_epochs // (training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps)
         print(f"  - Total training steps: ~{total_steps}")
     except Exception as e:
         print(f"Could not calculate total steps: {e}")
         total_steps = "Unknown" # Set to unknown if calculation fails
else:
    print(f"  - Total training steps: {total_steps}")


print("  - Training speedup expected compared to previous runs.")
print("\nStarting training, please keep Colab tab open...\n")

try:
    train_result = trainer.train()
    print("\n" + "🎉" * 30)
    print("Training complete!")
    print("🎉" * 30)
    print(f"\nTraining statistics:")
    print(f"  - Total steps: {train_result.global_step}")
    print(f"  - Training loss: {train_result.training_loss:.4f}")

except Exception as e:
    print(f"\n❌ Training failed: {e}")
    print("\nPossible reasons:")
    print("1. Insufficient VRAM → reduce batch_size or r")
    print("2. Data issues → check images and reports (check dataset __getitem__ output)")
    print("3. Interrupted → re-run")
    import traceback
    traceback.print_exc()
    import sys
    sys.exit(1)

The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'eos_token_id': 2, 'bos_token_id': 1}.



🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀
Starting training!
🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀🚀

Estimated training time (with unsloth acceleration):
  - Total training steps: 498
  - Training speedup expected compared to previous runs.

Starting training, please keep Colab tab open...



==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 1,995 | Num Epochs = 1 | Total steps = 499
O^O/ \_/ \    Batch size per device = 1 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (1 x 4 x 1) = 4
 "-____-"     Trainable parameters = 9,961,472 of 7,073,388,544 (0.14% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
5,7.8904
10,7.4716
15,7.714
20,6.3711
25,8.3319
30,7.5501
35,7.7114
40,8.0412
45,7.2858
50,7.6207


Step,Training Loss
5,7.8904
10,7.4716
15,7.714
20,6.3711
25,8.3319
30,7.5501
35,7.7114
40,8.0412
45,7.2858
50,7.6207



🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉
Training complete!
🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉🎉

Training statistics:
  - Total steps: 499
  - Training loss: 3.9836


In [None]:
trainer.save_model(OUTPUT_DIR)
processor.save_pretrained(OUTPUT_DIR)

print(f"✅ Model saved to: {OUTPUT_DIR}")

✅ Model saved to: /content/drive/MyDrive/Group_Project/IS/data/llava_finetuned_mvp2000_v2


# Save as hf file

In [None]:
type(model)

In [None]:
model = model.merge_and_unload()
save_dir = hf_output_path

model.save_pretrained(save_dir)
processor.save_pretrained(save_dir)