In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import os
import sys
import subprocess

# 1️⃣ Uninstall any existing bitsandbytes build
#    to avoid mismatched CUDA binaries
print("🎯 Uninstalling old bitsandbytes...")
subprocess.run([sys.executable, "-m", "pip", "uninstall", "-y", "bitsandbytes"], check=True)

# 2️⃣ Install bitsandbytes >=0.43.2 which adds upstream support for CUDA 12.4+
#    (see HF docs: supports CUDA 11.0–12.5, including 12.4+) :contentReference[oaicite:0]{index=0}
print("🎯 Installing bitsandbytes>=0.43.2 for CUDA 12.4 support...")
subprocess.run([sys.executable, "-m", "pip", "install", "--upgrade", "bitsandbytes>=0.43.2"], check=True)

# 3️⃣ Ensure the CUDA 12.4 runtime path is on LD_LIBRARY_PATH
#    so bitsandbytes can locate libcudart and its own libbitsandbytes_cuda124 :contentReference[oaicite:1]{index=1}
cuda_lib="/usr/local/cuda-12.4/lib64"
prev_ld = os.environ.get("LD_LIBRARY_PATH", "")
os.environ["LD_LIBRARY_PATH"] = f"{cuda_lib}:{prev_ld}"
print(f"🎯 LD_LIBRARY_PATH set to include {cuda_lib}")

# 4️⃣ Restart prompt reminder (mandatory in Colab to pick up the new .so files)
#    In standalone scripts this is not needed, but in notebooks you must restart runtime.
print("\n⚠️  If you’re in a Colab notebook, **please restart the runtime now**\n    (Runtime → Restart runtime) and re-run this cell/script.\n")

# 5️⃣ Diagnostic: confirm bitsandbytes is seeing CUDA 12.4
print("🔍 Running bitsandbytes diagnostic...")
res = subprocess.run([sys.executable, "-m", "bitsandbytes"], capture_output=True, text=True)
print(res.stdout)
if "CUDA" not in res.stdout:
    raise RuntimeError("bitsandbytes did not detect CUDA—check your LD_LIBRARY_PATH and installation.")


In [None]:
!pip install datasets peft torch torchvision 

In [None]:
!pip install -q transformers torch evaluate scikit-learn pillow

In [None]:
import os
import ast
import pandas as pd
from PIL import Image
import torch
from sklearn.model_selection import train_test_split  # scikit-learn split :contentReference[oaicite:2]{index=2}
import evaluate                                        # metrics :contentReference[oaicite:3]{index=3}
from transformers import AutoProcessor, AutoModelForVision2Seq , BitsAndBytesConfig, get_linear_schedule_with_warmup, Seq2SeqTrainingArguments, Seq2SeqTrainer, DataCollatorForSeq2Seq
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model, PeftModel, TaskType
from datasets import Dataset

CSV_PATH = "/kaggle/input/images-with-vqas/train_80_withoutyesno.csv"
train_df = pd.read_csv(CSV_PATH)

train_df["image_path"] = train_df["image_path"].str.replace(
    r"^Dataset/final_dataset/",
    "/kaggle/input/images-with-vqas/final_dataset/final_dataset/",
    regex=True
)

train_df["vqa_response"] = train_df["vqa_response"].apply(ast.literal_eval)
train_df = train_df.explode("vqa_response").reset_index(drop=True)
train_df[["question", "answer"]] = pd.DataFrame(train_df["vqa_response"].tolist(), index=train_df.index)

exists = train_df["image_path"].apply(os.path.exists)
print(f"Skipping {(~exists).sum()} rows with missing images")
train_df = train_df[exists]

train_df = train_df[["image_path", "question", "answer"]].reset_index(drop=True)
print(f"Total examples: {len(train_df)}")

In [None]:
CSV_PATH = "/kaggle/input/images-with-vqas/test_20_withoutyesno.csv"
test_df = pd.read_csv(CSV_PATH)

test_df["image_path"] = test_df["image_path"].str.replace(
    r"^Dataset/final_dataset/",
    "/kaggle/input/images-with-vqas/final_dataset/final_dataset/",
    regex=True
)

test_df["vqa_response"] = test_df["vqa_response"].apply(ast.literal_eval)
test_df = test_df.explode("vqa_response").reset_index(drop=True)
test_df[["question", "answer"]] = pd.DataFrame(test_df["vqa_response"].tolist(), index=test_df.index)

exists = test_df["image_path"].apply(os.path.exists)
print(f"Skipping {(~exists).sum()} rows with missing images")
test_df = test_df[exists]

test_df = test_df[["image_path", "question", "answer"]].reset_index(drop=True)
print(f"Total examples: {len(test_df)}")

In [None]:
other_df = pd.read_csv("/kaggle/input/other-data3/train.csv")
print(len(other_df))

In [None]:
train_df = pd.concat([other_df, train_df], axis=0, ignore_index=True)

In [None]:
print(len(train_df))

In [None]:
import os
import pandas as pd
from PIL import Image
from transformers import AutoProcessor, AutoModelForVisualQuestionAnswering, Trainer, TrainingArguments
import torch
from accelerate import Accelerator
from sklearn.model_selection import train_test_split
from peft import LoraConfig, get_peft_model
from transformers.data.data_collator import default_data_collator

# Initialize Accelerator for efficient multi-GPU training
accelerator = Accelerator()

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
)

# Load BLIP-1 VQA processor and model with fast image processing
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
processor = AutoProcessor.from_pretrained("Salesforce/blip-vqa-base", use_fast=True)
model = AutoModelForVisualQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base", 
                                                            quantization_config = quantization_config
                                                           ).to(device)

In [None]:
from torch.utils.data import DataLoader
class VQADataset(torch.utils.data.Dataset):
    def __init__(self, df, processor, max_length=32):
        self.df = df
        self.processor = processor
        self.max_length = max_length
        

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        image = Image.open(row.image_path).convert("RGB")
        question, answer = row.question, row.answer
        
        enc = self.processor(
            images = image, text = question,
            padding="max_length", truncation=True,
            max_length=self.max_length, return_tensors="pt",
            return_attention_mask=True
        )

        labels = self.processor.tokenizer(
            answer,
            padding="max_length", truncation=True,
            max_length=self.max_length, return_tensors="pt"
        )["input_ids"]

        enc = {k: v.squeeze(0) for k, v in enc.items()}
        enc["labels"] = labels.squeeze(0)

        return enc

In [None]:
train_df.head()

In [None]:
import time
from transformers import ProgressCallback
lora_config = LoraConfig(
    r=16,  # Rank of low-rank matrices
    lora_alpha=32,  # Scaling factor
    target_modules=["q_proj", "k_proj","v_proj","mlp.fc1","mlp.fc2","output.dense"],  # Target attention layers in BLIP-1
    lora_dropout=0.1,  # Dropout for regularization
    bias="none"  # No bias adaptation
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)
print("LoRA applied to the model")

# Prepare model with Accelerator
# model = accelerator.prepare(model)

# Define training arguments
training_args = TrainingArguments(
    output_dir="/kaggle/working/blip_finetuned",
    run_name="blip_vqa_lora_finetune",  # Unique run name
    num_train_epochs=3,
    per_device_train_batch_size=4,  # Reduced for memory stability
    gradient_accumulation_steps=4,  # Simulate larger batch size (effective batch size = 16)
    learning_rate=5e-5,
    weight_decay=0.01,
    logging_dir='/kaggle/working/logs',
    logging_steps=10,
    save_strategy="epoch",
    fp16=True,  # Mixed precision for efficiency
    remove_unused_columns=False,  # Keep all dataset columns
    report_to="none"  # Disable W&B and other logging integrations
)

# Create Trainer instance with default data collator
trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=default_data_collator,  # Handle tensor stacking
    callbacks=[ProgressCallback]
)

# Check GPU memory usage before training
# if torch.cuda.is_available():
#     print("GPU Memory Usage Before Training:")
#     print(torch.cuda.memory_summary())

CHUNK_SIZE = 3000
n_rows     = len(train_df)

for start in range(0, n_rows, CHUNK_SIZE):
    end   = min(start + CHUNK_SIZE, n_rows)
    batch = train_df.iloc[start:end]                          # grab rows [start:end)

    train_ds = VQADataset(batch, processor)
    trainer.train_dataset = train_ds
    
    start_time = time.time()
    print("Starting training...")
    # — train on just this chunk:
    trainer.train()
    elapsed = (time.time() - start_time) / 60
    print(f"Training completed in {elapsed:.2f} minutes.")

    print(f"Trained on rows {start}–{end-1} ({end-start} examples).")

trainer.save_model("/kaggle/working/blip_finetuned/model")
print("Model saved to '/kaggle/working/blip_finetuned/model'")