# Saving images for use in Colab

In [1]:
import os
import json
import torch
from torchvision import transforms
from PIL import Image
from tqdm import tqdm

  Referenced from: <EB3FF92A-5EB1-3EE8-AF8B-5923C1265422> /opt/anaconda3/envs/reid-attack/lib/python3.11/site-packages/torchvision/image.so
  warn(


In [2]:
def preprocess_image(img_path, transform):
    try:
        img = Image.open(img_path).convert("L")
        img = transform(img)
        return img
    except Exception as e:
        print(f"Failed to load {img_path}: {e}")
        return None

def save_image_batches(
    json_path,
    image_root,
    transform,
    batch_size=512,
    save_dir="cnn",
    device="cpu"
):
    os.makedirs(save_dir, exist_ok=True)

    with open(json_path, "r") as f:
        data = json.load(f)
    
    images, paths, labels, ages, sexes = [], [], [], [], []
    batch_idx = 0

    for entry in tqdm(data, desc="Processing images"):
        img_path = os.path.join(image_root, entry["Path"])
        img_tensor = preprocess_image(img_path, transform)

        if img_tensor is None:
            continue

        images.append(img_tensor)
        paths.append(entry["Path"])
        labels.append(entry["Label"])
        ages.append(entry["Age"])
        sexes.append(1 if entry["Sex"] == "Male" else 0)

        if len(images) == batch_size:
            # Save batch
            save_path = os.path.join(save_dir, f"batch_{batch_idx}.pt")
            torch.save({
                "images": torch.stack(images).to(device),
                "paths": paths,
                "labels": torch.tensor(labels, dtype=torch.long),
                "ages": torch.tensor(ages, dtype=torch.float32),
                "sexes": torch.tensor(sexes, dtype=torch.long)
            }, save_path)

            images, paths, labels, ages, sexes = [], [], [], [], []
            batch_idx += 1

    # Save any remaining
    if images:
        save_path = os.path.join(save_dir, f"batch_{batch_idx}.pt")
        torch.save({
            "images": torch.stack(images).to(device),
            "paths": paths,
            "labels": torch.tensor(labels, dtype=torch.long),
            "ages": torch.tensor(ages, dtype=torch.float32),
            "sexes": torch.tensor(sexes, dtype=torch.long)
        }, save_path)

    print(f"Saved {batch_idx + 1} batches to {save_dir}")

In [3]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.repeat(3, 1, 1)),
    transforms.Normalize([0.5], [0.5])
])

In [10]:
# CNN

image_root = '../../data/CheXpert_Sample'

save_image_batches(
    'cnn_images.json',
    image_root=image_root,
    transform=transform,
    batch_size=512,
    save_dir="cnn",
    device="cpu"
)

Processing images: 100%|██████████| 4000/4000 [00:09<00:00, 428.95it/s]


Saved 8 batches to cnn


In [12]:
# Overfit CNN

save_image_batches(
    'cnn_overfit_images.json',
    image_root=image_root,
    transform=transform,
    batch_size=40,
    save_dir='cnn_overfit',
    device='cpu'
)

Processing images: 100%|██████████| 40/40 [00:00<00:00, 393.17it/s]

Saved 2 batches to cnn_overfit





In [13]:
# ViT

save_image_batches(
    'vit_images.json',
    image_root=image_root,
    transform=transform,
    batch_size=512,
    save_dir='vit',
    device='cpu'
)

Processing images: 100%|██████████| 4000/4000 [00:08<00:00, 498.06it/s]


Saved 8 batches to vit


In [14]:
# Overfit ViT

save_image_batches(
    'vit_overfit_images.json',
    image_root=image_root,
    transform=transform,
    batch_size=100,
    save_dir='vit_overfit',
    device='cpu'
)

Processing images: 100%|██████████| 100/100 [00:00<00:00, 484.18it/s]

Saved 2 batches to vit_overfit





In [4]:
# Regression

image_root = '../../data/CheXpert_Sample'

save_image_batches(
    'reg_images.json',
    image_root=image_root,
    transform=transform,
    batch_size=512,
    save_dir='reg',
    device='cpu'
)

Processing images: 100%|██████████| 2500/2500 [00:06<00:00, 370.04it/s]


Saved 5 batches to reg


In [5]:
# Overfit CNN wtih features

image_root = '../../data/CheXpert_Feats'

save_image_batches(
    'feats_images.json',
    image_root=image_root,
    transform=transform,
    batch_size=512,
    save_dir='feats',
    device='cpu'
)

Processing images: 100%|██████████| 40/40 [00:00<00:00, 536.41it/s]

Saved 1 batches to feats



