### Mount G Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### Setup Project





In [None]:
# Upload and unzip project code

from google.colab import files
import os
import sys

# Upload the zip file
if not os.path.exists('/content/fineTune.zip'):
  uploaded = files.upload()
  zip_name = list(uploaded.keys())[0]
else:
  zip_name = '/content/fineTune.zip'
  print("FineTune.zip already exists")

# Unzip into the main content directory
!unzip -q {zip_name}

# IMPORTANT: Add your project's root directory to the Python path
# Add your project's root directory to Python's search path
project_root = '/content/fineTune'
if project_root not in sys.path:
    sys.path.append(project_root)
    print(f"✅ Added '{project_root}' to Python's path.")

In [None]:
cwd = %pwd
print(f'Current Working Directory: {cwd}')

In [None]:
# %rm -r /content/fineTune

### Install Dependencies

In [None]:
# Install all required packages

%%capture
import os, re
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    import torch; v = re.match(r"[0-9\.]{3,}", str(torch.__version__)).group(0)
    xformers = "xformers==" + ("0.0.32.post2" if v == "2.8.0" else "0.0.29.post3")
    !pip install --no-deps bitsandbytes accelerate {xformers} peft trl triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets==4.0.0" "huggingface_hub>=0.34.0" hf_transfer
    !pip install --no-deps unsloth
!pip install transformers==4.55.4
!pip install --no-deps trl==0.22.2


!pip install -q -r /content/fineTune/colab_requirements.txt

%%capture
!pip install "timm==1.0.19"   # Only for Gemma 3N
!pip install "gdown==5.2.0"

import unsloth
# print(unsloth.__version__)
import torch; torch._dynamo.config.recompile_limit = 64;

### GPU | Memory

In [None]:
from utils import get_gpu_status, get_gpu_usage_stats
get_gpu_status()

In [None]:
from utils import capture_key_dependency_versions
capture_key_dependency_versions()

### Setup *Hugging Face*

In [None]:
# Log in to Hugging Face

from huggingface_hub import login
from google.colab import userdata

hf_token = userdata.get('HF_TOKEN')
login(token=hf_token)

print(f"HF Token: {hf_token[-5:]}")

### Modify *'configs.yaml'* for Google Colab

In [None]:
# Modify config paths for a hybrid Colab + Drive environment

import yaml
import os

CONFIG_FILE_PATH = '/content/fineTune/configs/configs.yaml'

# --- DEFINE STORAGE LOCATIONS ---
# 1. Temporary, fast storage on the Colab machine itself
COLAB_DISK_BASE_PATH = '/content/fineTune'

# 2. Permanent, persistent storage in your Google Drive
DRIVE_BASE_PATH = '/content/drive/MyDrive/FineTunning/Gemma'                  # A dedicated folder in your Drive

# Create the permanent storage directory in Google Drive if it doesn't exist
from fineTune.utils import make_clean_dir

make_clean_dir(DRIVE_BASE_PATH)
make_clean_dir(f'{DRIVE_BASE_PATH}/models')
make_clean_dir(f'{DRIVE_BASE_PATH}/logs')


with open(CONFIG_FILE_PATH, 'r') as f:
    config = yaml.safe_load(f)

# --- UPDATE PATHS IN THE CONFIG ---

# 1. DATASET PATHS: Point to the large, fast, TEMPORARY Colab disk
config['dataset']['paths']['base_dir'] = f'{COLAB_DISK_BASE_PATH}/dataset/vizWiz'
config['dataset']['paths']['temp_zip_dir'] = f'{COLAB_DISK_BASE_PATH}/dataset/vizWiz/zips'
config['dataset']['paths']['images_dir'] = f'{COLAB_DISK_BASE_PATH}/dataset/vizWiz/images'
config['dataset']['paths']['annotations_dir'] = f'{COLAB_DISK_BASE_PATH}/dataset/vizWiz/annotations'
config['dataset']['paths']['processed_dir'] = f'{COLAB_DISK_BASE_PATH}/dataset/vizWiz/processed_for_tuning'

# 2. FINE-TUNING OUTPUTS: Point to your PERMANENT Google Drive
config['fineTune']['adapters_output_dir'] = f'{DRIVE_BASE_PATH}/models/gemma-3n-adapters'
config['fineTune']['trainer_output_dir'] = f'{DRIVE_BASE_PATH}/models/training_checkpoints'
config['fineTune']['merged_model_output_dir'] = f'{DRIVE_BASE_PATH}/models/gemma-3n-merged-model'
config['fineTune']['logging_dir'] = f'{DRIVE_BASE_PATH}/logs'
config['fineTune']['dataset_path'] = f'{COLAB_DISK_BASE_PATH}/dataset/vizWiz/processed_for_tuning'

# Write the updated configuration back to the file
with open(CONFIG_FILE_PATH, 'w') as f:
    yaml.dump(config, f, default_flow_style=False)

print("✅ Configuration file updated for a hybrid Colab + Google Drive environment.")
print("*"* 20)
print("--- New Config ---")
print("*"* 20)
!cat {CONFIG_FILE_PATH}

In [None]:
pwd

In [None]:
%ls

## Prepare Data

In [None]:
# Run the data preparation and transformation
import prepare_data

# Run the main function from `prepare_data.py`
prepare_data.main()

## Visualize Data

In [None]:
import visualize_data

visualize_data.VIEW_SPLIT = "train"   # The split to view ('train' or 'validation').
visualize_data.VIEW_INDEX = 12        # The index of the example to view.

# Run the main function from `visualize_data.py`
visualize_data.main()

## Train

#### Imports

In [None]:
# --- Unsloth must be imported before transformers, trl, peft ---
from unsloth import FastVisionModel  # FastLanguageModel
from unsloth.trainer import UnslothVisionDataCollator
from unsloth import get_chat_template

import os
import sys
import io
import time
from dataclasses import dataclass
from typing import Any
from PIL import Image

import torch
import torch.cuda
import yaml
from data_pipeline import load_dataset
from trl import SFTConfig, SFTTrainer

# --- Local Utilities ---
from utils import (
    DetailedLoggingCallback,
    display_evaluation_summary,
    display_training_summary,
    get_gpu_usage_stats,
    make_clean_dir,
    push_merged_model_to_hub,
    save_merged_model_locally,
    setupTensorboard,
)


#### Load Configuration

In [None]:
# --- Load Configuration ---
CONFIG_FILE_PATH = "fineTune/configs/configs.yaml"

try:
    with open(CONFIG_FILE_PATH, "r") as f:
        config = yaml.safe_load(f)
except Exception as e:
    print(f"❌ ERROR: Failed to load configuration file '{CONFIG_FILE_PATH}': {e}")
    sys.exit(1)

#### Define Constants

In [None]:
# --- Define Constants ---
MODEL_ID = config["fineTune"]["model_to_use"]
PROCESSED_DATASET_PATH = config["dataset"]["paths"]["processed_dir"]
FINAL_ADAPTERS_OUTPUT_DIR = config["fineTune"]["adapters_output_dir"]
TRAINER_OUTPUT_DIR = config["fineTune"]["trainer_output_dir"]
LOGGING_DIR = config["fineTune"]["logging_dir"]

# Assume these are defined earlier in your script
start_gpu_memory: float = 0.0
max_memory: float = 1.0

#### Prepare Directories

In [None]:
# --- Prepare Directories ---
make_clean_dir(FINAL_ADAPTERS_OUTPUT_DIR)
make_clean_dir(TRAINER_OUTPUT_DIR)
make_clean_dir(LOGGING_DIR)

#### Load Model and Processor

In [None]:
# --- Load Model and Processor ---
print(f"--- Loading model with Unsloth: {MODEL_ID} ---")
model, vision_processor = FastVisionModel.from_pretrained(
    model_name=MODEL_ID,
    load_in_4bit=True,                      # We can safely re-enable 4-bit!
    use_gradient_checkpointing="unsloth",   # True or "unsloth" for long context,
    # max_seq_length=1024,                  # You can define this here
    # dtype=torch.bfloat16,                 # Use bfloat16
    # device_map="auto",
)
print("✅ Model and processor loaded and optimized by Unsloth.")

#### Configure LoRA (PEFT Adapters) with Unsloth

In [None]:
# --- Configure LoRA (PEFT Adapters) with Unsloth ---
# Unsloth patches the model to prepare it for LoRA
model = FastVisionModel.get_peft_model(
    model=model,
    finetune_vision_layers=True,      # False if not finetuning vision layers
    finetune_language_layers=True,    # False if not finetuning language layers
    finetune_attention_modules=True,  # False if not finetuning attention layers
    finetune_mlp_modules=True,        # False if not finetuning MLP layers
    r=16,                             # The larger, the higher the accuracy, but might overfit
    lora_alpha=16,                    # Recommended alpha == r at least
    lora_dropout=0,                   # 0.05
    bias="none",
    random_state=3407,
    use_rslora=False,                 # We support rank stabilized LoRA
    loftq_config=None,                # And LoftQ
    target_modules="all-linear",      # Optional now! Can specify a list if needed
    modules_to_save=[
        "lm_head",
        "embed_tokens",
    ],
)
print("✅ LoRA configured.")

#### Load Dataset

In [None]:
# --- Load Dataset ---
dataset = load_dataset(config)

# print("\n--- Decoding images in memory for the trainer ---")
# dataset = dataset.map(decode_images, batched=False)
# print("✅ Images decoded successfully.")

In [None]:
# Inspect a sample from the dataset to understand the structure of the 'messages' field

print("--- Inspecting a sample from the training dataset ---")

# Access a sample from the training dataset
sample = dataset["train"][0]
print("\nStructure of the first sample:")
print(sample)

# Specifically look at the 'messages' field
messages = sample.get("messages")
print("\nStructure of the 'messages' field:")
print(messages)

# Inspect the content within each message, focusing on image information
print("\nContent within each message:")
for i, message in enumerate(messages):
    print(f"\nMessage {i}:")
    print(message)
    content = message.get("content")
    if content:
        for j, item in enumerate(content):
            print(f"  Item {j}:")
            print(item)
            if item.get("type") == "image":
                print("    >>> Found image information here. Checking type:")
                print(f"    >>> Type of 'image' field: {type(item.get('image'))}")
                print(f"    >>> Value of 'image' field: {item.get('image')}")

#### Create custom logging callback

In [None]:
# --- Create custom logging callback ---
logging_callback = DetailedLoggingCallback(logging_dir=LOGGING_DIR)

#### Configure Training using SFTConfig

In [None]:
# --- Configure Training using SFTConfig ---

# Enable for training!
FastVisionModel.for_training(model)

print("\n--- Configuring the SFTTrainer ---")
training_args = SFTConfig(
    output_dir=TRAINER_OUTPUT_DIR,
    per_device_train_batch_size=1,                            # Increased from 1 to 4!
    gradient_accumulation_steps=4,                            # Can be reduced to 4 (Effective batch size = 16, 8)
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},   # Modern way to configure
    max_grad_norm=0.3,                                        # max gradient norm based on QLoRA paper
    warmup_ratio=0.03,
    max_steps=30,
    # num_train_epochs=1,                                     # Set this instead of max_steps for full training runs
    per_device_eval_batch_size=4,
    learning_rate=2e-4,                                       # 2e-4, 2e-5
    save_strategy="steps",                                    # epoch, steps
    eval_strategy="steps",                                    # no, epoch, steps
    logging_steps=10,                                         # 1, 10, 50
    logging_dir=LOGGING_DIR,                                  # Specify logging directory
    logging_first_step=True,                                  # Log the first step
    eval_steps=10,                                            # Evaluate every 50 steps (in addition to epoch)
    save_steps=30,                                            # Save checkpoint every 100 steps
    load_best_model_at_end=True,                              # Load best model at end of training
    metric_for_best_model="eval_loss",                        # Use eval loss to determine best model
    greater_is_better=False,                                  # Lower eval loss is better
    optim="adamw_torch_fused",                                # Use the standard optimizer, adamw_8bit, adamw_torch, adamw_torch_fused
    weight_decay=0.01,
    lr_scheduler_type="cosine",
    seed=3407,
    # bf16=True,
    push_to_hub=False,
    report_to=["tensorboard"],                                # Use list format for multiple reporters
    # packing=False,
    # Additional logging configurations
    dataloader_pin_memory=False,                              # Can help with performance
    dataset_text_field="",
    remove_unused_columns=False,                              # Keep all columns for debugging
    dataset_kwargs={"skip_prepare_dataset": True},
    max_length=2048,  # 1024
)

# --- Chat Template ---
processor = get_chat_template(vision_processor, "gemma-3")

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["val"],
    processing_class=processor.tokenizer,
    data_collator=UnslothVisionDataCollator(model, processor),
    callbacks=[logging_callback],                             # Add custom logging callback
    args=training_args,
    # formatting_func=formatting_func,
)
print("✅ SFTTrainer configured with enhanced logging.")

#### Start Training

In [None]:
# --- Start Training ---
start_gpu_memory: float = 0.0
if torch.cuda.is_available():
    torch.cuda.reset_peak_memory_stats()                                                # Reset stats to get a clean measurement for the training phase
    start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)

print("\n🚀 --- Starting the fine-tuning process --- 🚀")
print("-" * 47)
print(f"TensorBoard logs will be saved to: {LOGGING_DIR}")
print(f"Training checkpoints will be saved to: {TRAINER_OUTPUT_DIR}")

# Configure and setup TensorBoard based on config settings
setupTensorboard(config, LOGGING_DIR)

print("-" * 60)

os.environ["UNSLOTH_RETURN_LOGITS"] = "1"
training_start_time = time.time()
trainer_stats = trainer.train()
training_end_time = time.time()
training_duration = training_end_time - training_start_time

print(f"\n🏁 --- Fine-tuning complete --- 🏁")
print(
    f"Total training time: {training_duration/3600:.2f} hours ({training_duration/60:.1f} minutes)"
)

# --- Calculate and Display GPU Memory Usage (After training) ---
memory_stats = get_gpu_usage_stats(start_gpu_memory=start_gpu_memory)

print("\n--- GPU Memory Usage Summary ---")
# The function handles the check, but we can check again for a cleaner print message
if torch.cuda.is_available():
    print(f"Peak reserved memory = {memory_stats['peak_memory_gb']} GB.")
    print(
        f"Peak reserved memory for training = {memory_stats['training_memory_gb']} GB."
    )
    print(
        f"Peak reserved memory % of max memory = {memory_stats['peak_memory_percent']} %."
    )
    print(
        f"Peak reserved memory for training % of max memory = {memory_stats['training_memory_percent']} %."
    )
else:
    print("No GPU was used, so no memory stats were recorded.")

print("-" * 60)

#### Analyze and Display Results

In [None]:
# --- Analyze and Display Results ---
print("\n--- Training Performance Summary ---")
training_summary = display_training_summary(trainer_stats)
print(training_summary)

print("\n--- Evaluation Performance Summary ---")
evaluation_summary = display_evaluation_summary(trainer)
print(evaluation_summary)

#### Save Model

##### Save the Fine-Tuning Artifacts

In [None]:
# --- Save the Fine-Tuning Artifacts ---
print(
    f"\n--- Saving lightweight LoRA adapters and processor to: {FINAL_ADAPTERS_OUTPUT_DIR} ---"
)
# This saves the tiny adapter files (your "blueprint" for the changes)
trainer.save_model(FINAL_ADAPTERS_OUTPUT_DIR)
# This saves the processor files (tokenizer.json, preprocessor_config.json, etc.)
# into the exact same directory, making it a complete, self-contained model folder.
vision_processor.save_pretrained(FINAL_ADAPTERS_OUTPUT_DIR)
print(f"✅ Fine-tuned artifacts saved successfully to {FINAL_ADAPTERS_OUTPUT_DIR}")

##### Export To HuggingFace Hub

In [None]:
# --- Export Merged Model to Hugging Face Hub ---
# Get the export configuration from your YAML file
export_config = config.get("fineTune", {}).get("export", {})
# Conditionally push the full, merged model to the Hub
if export_config.get("push_to_hub", False):
    push_merged_model_to_hub(model, vision_processor, config)

In [None]:
# # Start the fine-tuning process...
# print("🚀 Running Model training process...")
# %run /content/fineTune/train.py

In [None]:
# help(SFTTrainer)