In [1]:
import zipfile
import os

# Path
zip_path = "/content/Breast Thermography.zip"
extract_path = "/content/Breast_Thermography"

# Extract ZIP
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Extracted to:", extract_path)


Extracted to: /content/Breast_Thermography


In [2]:
!pip install --quiet torchvision

import os
from pathlib import Path
from PIL import Image, ImageFilter
from torchvision import transforms
from torchvision.utils import save_image
import random
from tqdm import tqdm


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m94.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m77.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m40.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
# Define complex yet safe augmentations for thermal images
def get_transform():
    return transforms.Compose([
        transforms.RandomRotation(15),                             # slight rotation
        transforms.RandomHorizontalFlip(),                         # horizontal flip
        transforms.RandomResizedCrop(224, scale=(0.9, 1.1)),       # random zoom and crop
        transforms.ColorJitter(brightness=0.1, contrast=0.1),      # contrast & brightness change
        transforms.GaussianBlur(3, sigma=(0.1, 1.5)),              # subtle blur (simulates sensor noise)
        transforms.ToTensor()
    ])


In [4]:
def expand_dataset(input_dir, output_dir, target_total_images=10000):
    Path(output_dir).mkdir(parents=True, exist_ok=True)

    all_original_images = []

    # Step 1: Collect all image paths
    for cls in ['Benign', 'Malignant']:
        cls_path = os.path.join(input_dir, cls)
        for folder in os.listdir(cls_path):
            folder_path = os.path.join(cls_path, folder)
            if not os.path.isdir(folder_path): continue
            for img_file in os.listdir(folder_path):
                if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    all_original_images.append(os.path.join(folder_path, img_file))

    original_count = len(all_original_images)
    aug_per_image = target_total_images // original_count
    print(f"Original images: {original_count}, Target: {target_total_images}, Aug/Img: {aug_per_image}")

    transform = get_transform()

    # Step 2: Augment and save
    for img_path in tqdm(all_original_images, desc="Augmenting dataset"):
        rel_path = img_path.replace(input_dir, "").strip("/")
        class_name = rel_path.split("/")[0]
        folder_name = rel_path.split("/")[1]
        image_name = os.path.splitext(os.path.basename(img_path))[0]

        out_folder = os.path.join(output_dir, class_name, folder_name)
        Path(out_folder).mkdir(parents=True, exist_ok=True)

        # Load and save original image
        image = Image.open(img_path).convert("L")
        image.save(os.path.join(out_folder, f"{image_name}.jpg"))

        for i in range(aug_per_image):
            try:
                aug_img = transform(image)
                save_image(aug_img, os.path.join(out_folder, f"{image_name}_aug{i}.jpg"))
            except Exception as e:
                print(f"Failed to augment {img_path}: {e}")


In [5]:
# Run it
expand_dataset(
    input_dir="/content/Breast_Thermography/Breast Thermography",
    output_dir="/content/Augmented_Breast_Thermography",
    target_total_images=10000
)


Original images: 357, Target: 10000, Aug/Img: 28


Augmenting dataset: 100%|██████████| 357/357 [01:15<00:00,  4.74it/s]


In [7]:
import shutil
from google.colab import files

# Zip the folder
shutil.make_archive("/content/Augmented_Breast_Thermography", 'zip', "/content/Augmented_Breast_Thermography")

# Download the zip
files.download("/content/Augmented_Breast_Thermography.zip")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>