In [1]:
from google.colab import drive
drive.mount('/content/drive')

import os
import sys
from pathlib import Path

# If you're inside the notebook and know your folder is under
root_folder_name = "image-captioning-flickr8k"

for p in Path("/content/drive/MyDrive").rglob(root_folder_name):
    project_root = p
    break

os.chdir(project_root)
sys.path.append(str(project_root))
print(f"[INFO] Working directory: {project_root}")

Mounted at /content/drive
[INFO] Working directory: /content/drive/MyDrive/TalentSprint/Project/MMRCS/Image-Captioning/image-captioning-flickr8k


In [2]:
# setup_paths.py
import os
import sys
import kagglehub
from pathlib import Path


def add_project_root_to_path():
    root_path = Path(__file__).resolve().parents[1]
    if str(root_path) not in sys.path:
        sys.path.append(str(root_path))

def setup_paths(dataset,data_source,dataset_url):

    try:
        import google.colab
        from google.colab import drive
        drive.mount('/content/drive', force_remount=False)
    except:
        pass  # Skip if not in interactive Colab environment

    # Ensure script is running inside Google Drive
    script_path = Path(os.getcwd()).resolve()
    if "/content/drive/" not in str(script_path):
        raise RuntimeError("Script is not running from inside Google Drive.")

    # Root directory where this notebook or script resides
    root = script_path
    print(f"[INFO] Root directory detected: {root}")

    # Load dataset + source from cfg
    dataset = dataset.lower()
    data_source = data_source.lower()


    if data_source == "google_drive":
        image_path = root / f"{dataset}_dataset" / "Images"
        captions_path = root / f"{dataset}_dataset" / "captions.txt"
    elif data_source == "kaggle":
        dataset_url = Path(dataset_url)  # from config.yaml
        if dataset == "flickr30k":
            image_path = Path("Images/flickr30k_images")
            dataset_source = str(dataset_url / "flickr30k")
        elif dataset == "flickr8k":
            image_path = Path("Images")
            dataset_source = str(dataset_url / "flickr8k")
        else:
            raise ValueError("Invalid dataset. Use 'flickr8k' or 'flickr30k'.")

        captions_path = Path("captions.txt")

        # root folder in the drive

        path = kagglehub.dataset_download(dataset_source, force_download=True)
        #path = kagglehub.download(dataset_source, force_download=True)
        print(f"[INFO] Kaggle dataset downloaded to: {path}")
        path = Path(path)
        image_path = path / image_path
        captions_path = path / captions_path

    else:
        raise ValueError("Invalid data_source. Use 'google_drive' or 'kaggle'.")




    # Update cfg
    image_path = str(image_path)

    print("\n✅ Paths configured:")
    print(f"📂 Image path:     {image_path}")
    return image_path


In [3]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from pathlib import Path
from PIL import Image

class ImageFolderDataset(torch.utils.data.Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = Path(image_dir)
        self.image_files = list(self.image_dir.glob("*.jpg"))
        self.transform = transform

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        image_path = self.image_files[idx]
        image = Image.open(image_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image

def compute_mean_std(loader):
    mean = 0.
    std = 0.
    total_images_count = 0

    for images in loader:
        batch_samples = images.size(0)
        images = images.view(batch_samples, images.size(1), -1)
        mean += images.mean(2).sum(0)
        std += images.std(2).sum(0)
        total_images_count += batch_samples

    mean /= total_images_count
    std /= total_images_count
    return mean, std




In [4]:
dataset="flickr8k"  # "flickr8k" or "flickr30k"
data_source="kaggle"  # "kaggle" or "google_drive"

# used for kaggle data only
dataset_url="adityajn105"

image_path=setup_paths(dataset,data_source,dataset_url)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[INFO] Root directory detected: /content/drive/My Drive/TalentSprint/Project/MMRCS/Image-Captioning/image-captioning-flickr8k
[INFO] Kaggle dataset downloaded to: /kaggle/input/flickr8k

✅ Paths configured:
📂 Image path:     /kaggle/input/flickr8k/Images


In [None]:

# Change these paths to point to your dataset folder
image_dir = image_path

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),  # just convert to tensor first, no normalization
])

dataset = ImageFolderDataset(image_dir, transform=transform)
loader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=2)

mean, std = compute_mean_std(loader)

print(f"\nComputed dataset mean: {mean}")
print(f"Computed dataset std: {std}")


Computed dataset mean: tensor([0.4580, 0.4461, 0.4039])
Computed dataset std: tensor([0.2320, 0.2230, 0.2272])


In [None]:
dataset="flickr30k"  # "flickr8k" or "flickr30k"
data_source="kaggle"  # "kaggle" or "google_drive"

# used for kaggle data only
dataset_url="adityajn105"

image_path=setup_paths(dataset,data_source,dataset_url)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
[INFO] Root directory detected: /content/drive/My Drive/TalentSprint/Project/MMRCS/Image-Captioning/image-captioning-flickr8k
Downloading from https://www.kaggle.com/api/v1/datasets/download/adityajn105/flickr30k?dataset_version_number=1...


100%|██████████| 8.16G/8.16G [01:16<00:00, 114MB/s]

Extracting files...





[INFO] Kaggle dataset downloaded to: /root/.cache/kagglehub/datasets/adityajn105/flickr30k/versions/1

✅ Paths configured:
📂 Image path:     /root/.cache/kagglehub/datasets/adityajn105/flickr30k/versions/1/Images/flickr30k_images


In [None]:
# Change these paths to point to your dataset folder
image_dir = image_path

transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),  # just convert to tensor first, no normalization
])

dataset = ImageFolderDataset(image_dir, transform=transform)
loader = DataLoader(dataset, batch_size=32, shuffle=False, num_workers=2)

mean, std = compute_mean_std(loader)

print(f"\nComputed dataset mean: {mean}")
print(f"Computed dataset std: {std}")


Computed dataset mean: tensor([0.4441, 0.4211, 0.3847])
Computed dataset std: tensor([0.2416, 0.2311, 0.2328])
