In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
!unzip -q /content/drive/MyDrive/abo-images-small.zip -d /content/preprocessed_img


In [None]:
import os
import torch
import pickle
from PIL import Image
import pandas as pd
from tqdm import tqdm
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
from transformers import BlipProcessor, BlipForQuestionAnswering, default_data_collator

In [None]:
import torch.nn as nn
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from torch.utils.data import Dataset

In [None]:
# Load the CSV file into a pandas DataFrame
try:
    df = pd.read_csv('/content/inference_final.csv')
except FileNotFoundError:
    print("Error: The file 'inference_final.csv' was not found. Make sure it is in the correct directory.")
    exit()

# Define the train and test sizes based on the 80:20 ratio
train_size = 0.9
test_size = 0.1

# Split the DataFrame into training and testing sets
train_df, test_df = train_test_split(df, train_size=train_size, test_size=test_size, random_state=42)

# Save the training DataFrame to train_1_new.csv
train_df.to_csv('train_1_new.csv', index=False)
print(f"Training data saved to 'train_1_new.csv' with {len(train_df)} rows.")

# Save the testing DataFrame to test_1_new.csv
test_df.to_csv('test_1_new.csv', index=False)
print(f"Testing data saved to 'test_1_new.csv' with {len(test_df)} rows.")

In [None]:
# Set device
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Memory optimization
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
torch.cuda.empty_cache()

# Paths and parameters
IMAGE_ROOT = "/content/preprocessed_img/abo-images-small"
TRAIN_CSV = "/content/train_1_new.csv"
TEST_CSV = "/content/test_1_new.csv"
MODEL_SAVE_PATH = "/content/Model/blip-saved-model-final"
BATCH_SIZE = 8  # Increased with gradient accumulation
ACCUMULATION_STEPS = 2  # Effective batch size = 32
NUM_EPOCHS = 4
PATIENCE = 0  # More aggressive early stopping

os.environ["TOKENIZERS_PARALLELISM"] = "false"
# print("jbp")

In [None]:
from torch.utils.data import Dataset

class VQADataset(Dataset):
    def __init__(self, dataframe, image_root, processor, max_question_len=32, max_answer_len=8):
        self.dataframe = dataframe
        self.image_root = image_root
        self.processor = processor
        self.max_question_len = max_question_len
        self.max_answer_len = max_answer_len

    def __len__(self):
        return len(self.dataframe)

    def _load_image(self, row):
        base_path = os.path.join(self.image_root, os.path.splitext(row['path'])[0])
        for ext in ('.jpeg', '.jpg'):
            full = base_path + ext
            if os.path.exists(full):
                return Image.open(full).convert("RGB")
        raise FileNotFoundError(f"Image not found for {row['path']} at {base_path}.[jpeg/jpg]")

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image = self._load_image(row)

        prompt = "Answer in a single word: " + row['question']

        inputs = self.processor(
            images=image,
            text=prompt,
            padding="max_length",
            truncation=True,
            max_length=self.max_question_len,
            return_tensors="pt",
        )

        labels = self.processor.tokenizer(
            str(row['answer']),
            padding="max_length",
            truncation=True,
            max_length=self.max_answer_len,
            return_tensors="pt"
        ).input_ids

        return {
            'pixel_values':   inputs.pixel_values.squeeze(0),
            'input_ids':      inputs.input_ids.squeeze(0),
            'attention_mask': inputs.attention_mask.squeeze(0),
            'labels':         labels.squeeze(0),
        }


class VQATestDataset(Dataset):
    def __init__(self, dataframe, image_root, processor, max_question_len=32):
        self.dataframe = dataframe
        self.image_root = image_root
        self.processor = processor
        self.max_question_len = max_question_len

    def __len__(self):
        return len(self.dataframe)

    def _load_image(self, row):
        base_path = os.path.join(self.image_root, os.path.splitext(row['path'])[0])
        for ext in ('.jpeg', '.jpg'):
            full = base_path + ext
            if os.path.exists(full):
                return Image.open(full).convert("RGB")
        raise FileNotFoundError(f"Image not found for {row['path']} at {base_path}.[jpeg/jpg]")

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image = self._load_image(row)

        prompt = "Answer in a single word: " + row['question']

        inputs = self.processor(
            images=image,
            text=prompt,
            padding="max_length",
            truncation=True,
            max_length=self.max_question_len,
            return_tensors="pt",
        )

        return {
            'pixel_values':   inputs.pixel_values.squeeze(0),
            'input_ids':      inputs.input_ids.squeeze(0),
            'attention_mask': inputs.attention_mask.squeeze(0),
        }


In [None]:
train_df.shape

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Define your file paths (assuming these are defined elsewhere)
# TRAIN_CSV = 'train.csv'  # Replace with your actual train CSV path
# TEST_CSV = 'test.csv'    # Replace with your actual test CSV path

# Load data
full_train_df = pd.read_csv("/content/train_1_new.csv")
test_df       = pd.read_csv("/content/test_1_new.csv")

# Step 1: Sample 10% from train.csv (≈8.5% train, 1.5% val)
train_val_df = full_train_df.sample(frac=1, random_state=42)
train_df, val_from_train_df = train_test_split(
    train_val_df,
    test_size=0.15,  # 15% of that 10% → 1.5% of full_train_df
    random_state=42,
    shuffle=True
)

# Step 2: Sample 5% from test.csv for validation
val_from_test_df = test_df.sample(frac=0.05, random_state=42)

# Step 3: Build final splits
val_df = pd.concat([val_from_train_df, val_from_test_df]).reset_index(drop=True)
train_df = train_df.reset_index(drop=True)


# Sanity check - Print the sizes of the final DataFrames
print(f"Final Dataframe sizes: Train: {len(train_df)}, Val: {len(val_df)}, Test: {len(test_df)}")


In [None]:
# Initialize model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForQuestionAnswering.from_pretrained(
    "Salesforce/blip-vqa-base",
    torch_dtype=torch.float16,
    low_cpu_mem_usage=True
)

# Enable gradient checkpointing BEFORE wrapping with DataParallel
# model.gradient_checkpointing_enable()

lora_config = LoraConfig(
    r=10,
    lora_alpha=16,
    target_modules=["query", "value"],
    lora_dropout=0.1,
    bias="none",
)
# Prepare model for LoRA fine-tuning (QLoRA-compatible setup)
# model = prepare_model_for_kbit_training(model)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)

# Print trainable parameters to verify LoRA
model.print_trainable_parameters()

# Move model to device FIRST
model = model.to(DEVICE)

# Wrap with DataParallel ONLY ONCE if multiple GPUs are available
if torch.cuda.device_count() > 1:
    print(f"Using {torch.cuda.device_count()} GPUs for DataParallel")
    model = nn.DataParallel(model)


train_loader = DataLoader(
    VQADataset(train_df, IMAGE_ROOT, processor),
    batch_size=BATCH_SIZE,
    collate_fn=default_data_collator,
    num_workers=2,
    pin_memory=True,
    prefetch_factor=2
)

val_loader = DataLoader(
    VQADataset(val_df, IMAGE_ROOT, processor),
    batch_size=BATCH_SIZE,
    collate_fn=default_data_collator,
    num_workers=2
)

test_loader = DataLoader(
    VQATestDataset(test_df, IMAGE_ROOT, processor),
    batch_size=BATCH_SIZE,
    num_workers=2
)

In [None]:
# Optimizer and GradScaler
optimizer = torch.optim.AdamW(model.parameters(), lr=4e-5)
scaler = torch.amp.GradScaler()

# Training setup
tracking_info = []
best_val_loss = float("inf")

# Training loop with gradient accumulation
NUM_EPOCHS = 5
for epoch in range(NUM_EPOCHS):
    model.train()
    optimizer.zero_grad()
    train_loss = 0

    for i, batch in enumerate(tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS}")):
        batch = {k: v.to(DEVICE) for k, v in batch.items()}

        with torch.amp.autocast('cuda'):
            outputs = model(**batch)
            loss = outputs.loss.mean() / ACCUMULATION_STEPS

        scaler.scale(loss).backward()

        if (i + 1) % ACCUMULATION_STEPS == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        train_loss += loss.item() * ACCUMULATION_STEPS

    # Validation
    model.eval()
    val_loss = 0
    for batch in tqdm(val_loader, desc="Validating"):
        with torch.no_grad():
            outputs = model(**{k: v.to(DEVICE) for k, v in batch.items()})
            val_loss += outputs.loss.item()

    avg_val_loss = val_loss / len(val_loader)
    epoch_train_loss = train_loss / len(train_loader)
    print(f"Epoch {epoch+1}/{NUM_EPOCHS} — " f"Train Loss: {epoch_train_loss:.4f} | Val Loss: {avg_val_loss:.4f}")
    tracking_info.append((train_loss/len(train_loader), avg_val_loss))

    # Early stopping
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        model.save_pretrained(MODEL_SAVE_PATH)
        early_stop_counter = 0
    else:
        early_stop_counter += 1
        if early_stop_counter >= 0:
            break

# Save results
with open("training_logs.pkl", "wb") as f:
    pickle.dump(tracking_info, f)

In [None]:
def generate_predictions(model, processor, test_loader, device):
    model.eval()
    predictions = []
    for batch in tqdm(test_loader, desc="Generating Predictions"):
        batch = {k: v.to(device) for k, v in batch.items()}
        with torch.no_grad():
            outputs = model.module.generate(**batch) if hasattr(model, "module") else model.generate(**batch)
        preds = processor.batch_decode(outputs, skip_special_tokens=True)
        predictions.extend(preds)
    return predictions

# Load model and generate predictions
# Specify `local_files_only=True` to load from the local path
model = BlipForQuestionAnswering.from_pretrained(MODEL_SAVE_PATH, local_files_only=True).to(DEVICE)
test_preds = generate_predictions(model, processor, test_loader, DEVICE)

# Save to CSV
test_df["predicted_answer"] = test_preds
test_df.to_csv("test_predictions_final.csv", index=False)

In [None]:
import matplotlib.pyplot as plt
import pickle

# Load the training logs
with open("training_logs.pkl", "rb") as f:
    tracking_info = pickle.load(f)

# Extract training and validation losses
train_losses = [item[0] for item in tracking_info]
val_losses = [item[1] for item in tracking_info]
epochs = range(1, len(tracking_info) + 1)

# Create the plot
plt.figure(figsize=(10, 6))
plt.plot(epochs, train_losses, label='Training Loss', marker='o')
plt.plot(epochs, val_losses, label='Validation Loss', marker='x')
plt.title('Training and Validation Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.xticks(epochs)
plt.tight_layout()
plt.show()

In [None]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import pandas as pd

# Load single result CSV
df = pd.read_csv("/content/test_predictions_final.csv")

# Ensure lowercase, string type
df['answer'] = df['answer'].astype(str).str.lower()
df['predicted_answer'] = df['predicted_answer'].astype(str).str.lower()

# Extract lists for comparison
predictions = df['predicted_answer'].tolist()
refs = df['answer'].tolist()

# Compute exact-match binary metrics
y_pred_bin = [int(p == r) for p, r in zip(predictions, refs)]
y_true_bin = [1] * len(refs)

acc = accuracy_score(y_true_bin, y_pred_bin)
prec, rec, f1, _ = precision_recall_fscore_support(
    y_true_bin, y_pred_bin, average="binary", zero_division=0
)

# Print metrics
print(f"Exact-match Accuracy: {acc:.3f}")
print(f"Exact-match Precision: {prec:.3f}")
print(f"Exact-match Recall:    {rec:.3f}")
print(f"Exact-match F1:        {f1:.3f}")


In [None]:
!pip install bert score

In [None]:
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
from bert_score import score as bert_score
import pandas as pd

# Load single result CSV
df = pd.read_csv("test_predictions_final.csv")

# Ensure lowercase, string type
df['answer'] = df['answer'].astype(str).str.lower()
df['predicted_answer'] = df['predicted_answer'].astype(str).str.lower()

# Extract lists for comparison
predictions = df['predicted_answer'].tolist()
refs = df['answer'].tolist()

# Compute exact-match binary metrics
y_pred_bin = [int(p == r) for p, r in zip(predictions, refs)]
y_true_bin = [1] * len(refs)

acc = accuracy_score(y_true_bin, y_pred_bin)
prec, rec, f1 = precision_recall_fscore_support(
    y_true_bin, y_pred_bin, average="binary", zero_division=0
)[:3]

# Compute BERTScore F1
P, R, F1 = bert_score(predictions, refs, lang="en", rescale_with_baseline=True)
bert_f1 = F1.mean().item()

# Print metrics
print(f"Exact-match Accuracy: {acc:.3f}")
print(f"Exact-match Precision: {prec:.3f}")
print(f"Exact-match Recall:    {rec:.3f}")
print(f"Exact-match F1:        {f1:.3f}")
print(f"BERTScore F1:          {bert_f1:.3f}")