<a href="https://colab.research.google.com/github/rehabib/PRECISE-Africa-Breast-Ultrasound-Segmentation-and-Classification-Challenge-PACE-/blob/main/PACE_Augmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Step 1: Install Albumentations
!pip install albumentations --quiet
!pip install opencv-python --quiet


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:

# Step 2: Import Libraries
import albumentations as A
import cv2
import os
from tqdm import tqdm


In [None]:
import zipfile
import os

def unzip_if_needed(zip_path, extract_dir):
    if not os.path.exists(extract_dir):
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_dir)
        print(f"Extracted {zip_path} to {extract_dir}")
    else:
        print(f"Folder {extract_dir} already exists.")

train_zip = "/content/drive/MyDrive/PACE 2025/PRECISE_Challenge2025_Train.zip"
val_zip = "/content/drive/MyDrive/PACE 2025/PRECISE_Challenge2025_Val.zip"

train_dir = "/content/PRECISE_Challenge2025_Train"
val_dir = "/content/PRECISE_Challenge2025_Val"

unzip_if_needed(train_zip, train_dir)
unzip_if_needed(val_zip, val_dir)

Extracted /content/drive/MyDrive/PACE 2025/PRECISE_Challenge2025_Train.zip to /content/PRECISE_Challenge2025_Train
Extracted /content/drive/MyDrive/PACE 2025/PRECISE_Challenge2025_Val.zip to /content/PRECISE_Challenge2025_Val


In [None]:
"""transform = A.Compose([
    A.Rotate(limit=10, border_mode=cv2.BORDER_REFLECT_101, p=0.7),
    A.ShiftScaleRotate(
        shift_limit=0.1, scale_limit=0.08, rotate_limit=10,
        border_mode=cv2.BORDER_REFLECT_101, p=0.7
    ),
    A.Affine(scale=1.0, shear=12, mode=cv2.BORDER_REFLECT_101, p=0.5),
    A.HorizontalFlip(p=0.5),
])
"""
transform = A.Compose([
A.Rotate(limit=23, border_mode=cv2.BORDER_REFLECT_101, p=0.7),
A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=10, border_mode=cv2.BORDER_REFLECT_101, p=0.7),
A.HorizontalFlip(p=0.5),
A.ElasticTransform(alpha=1, sigma=30, alpha_affine=10, p=0.3),
])


# Step 2: Paths
image_dir = '/content/PRECISE_Challenge2025_Train/PRECISE_Challenge2025_Train/normal'
save_image_dir = '/content/drive/MyDrive/PACE 2025/aug_normal_images'
save_mask_dir = '/content/drive/MyDrive/PACE 2025/aug_normal_masks'
os.makedirs(save_image_dir, exist_ok=True)
os.makedirs(save_mask_dir, exist_ok=True)

# Step 3: Augment paired BUSI images
num_aug = 5

# Loop through files and find BUSI pairs
for filename in tqdm(os.listdir(image_dir)):
    if filename.endswith('_BUSI.png'):
        base_name = filename.replace('_BUSI.png', '')
        mask_name = base_name + '_BUSI_mask.png'

        img_path = os.path.join(image_dir, filename)
        mask_path = os.path.join(image_dir, mask_name)

        if not os.path.exists(mask_path):
            print(f"Mask not found for {filename}, skipping.")
            continue

        image = cv2.imread(img_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        for i in range(num_aug):
            augmented = transform(image=image, mask=mask)
            aug_img = augmented['image']
            aug_mask = augmented['mask']

            # Save augmented image and mask
            img_save_path = os.path.join(save_image_dir, f"{base_name}_aug{i}_BUSI.png")
            mask_save_path = os.path.join(save_mask_dir, f"{base_name}_aug{i}_BUSI_mask.png")

            cv2.imwrite(img_save_path, aug_img)
            cv2.imwrite(mask_save_path, aug_mask)


  A.ElasticTransform(alpha=1, sigma=28, alpha_affine=10, p=0.3),
100%|██████████| 454/454 [01:53<00:00,  3.98it/s]


In [None]:
import os
import cv2
import albumentations as A

# Paths
malignant_dir = "/content/drive/MyDrive/PACE 2025/PRECISE_Challenge2025_Train/PRECISE_Challenge2025_Train/malignant"
save_img_dir = "/content/drive/MyDrive/PACE 2025/aug_malignant"
save_mask_dir = "/content/drive/MyDrive/PACE 2025/aug_malignant_mask"

os.makedirs(save_img_dir, exist_ok=True)
os.makedirs(save_mask_dir, exist_ok=True)

# Augmentation pipeline
transform = A.Compose([
    A.Rotate(limit=23, border_mode=cv2.BORDER_REFLECT_101, p=0.7),
    A.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=10, border_mode=cv2.BORDER_REFLECT_101, p=0.7),
    A.HorizontalFlip(p=0.45),
    A.ElasticTransform(alpha=1, sigma=30, alpha_affine=10, p=0.3),
])

# List BUSI malignant files only
malignant_files = [f for f in os.listdir(malignant_dir) if f.endswith(".png") and "mask" not in f and "BUSI" in f]
print("Found BUSI malignant images:", len(malignant_files))

num_aug = 3  # number of augmentations per image

for img_file in malignant_files:
    img_path = os.path.join(malignant_dir, img_file)
    mask_path = img_path.replace(".png", "_mask.png")

    if not os.path.exists(mask_path):
        continue  # skip if mask is missing

    # Read image + mask
    image = cv2.imread(img_path)
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

    for i in range(num_aug):
        augmented = transform(image=image, mask=mask)
        aug_img = augmented["image"]
        aug_mask = augmented["mask"]

        base_name = img_file.replace(".png", f"_aug{i}.png")
        mask_name = img_file.replace(".png", f"_aug{i}_mask.png")

        cv2.imwrite(os.path.join(save_img_dir, base_name), aug_img)
        cv2.imwrite(os.path.join(save_mask_dir, mask_name), aug_mask)

print("Augmentation finished! Saved to:")
print("Images ->", save_img_dir)
print("Masks  ->", save_mask_dir)


  A.ElasticTransform(alpha=1, sigma=30, alpha_affine=10, p=0.3),


Found BUSI malignant images: 359
Augmentation finished! Saved to:
Images -> /content/drive/MyDrive/PACE 2025/aug_malignant
Masks  -> /content/drive/MyDrive/PACE 2025/aug_malignant_mask


In [None]:
import os

# Paths
aug_images = "/content/drive/MyDrive/PACE 2025/aug_images"
aug_masks = "/content/drive/MyDrive/PACE 2025/aug_masks"

# Count augmented files
num_aug_images = len([f for f in os.listdir(aug_images) if f.endswith(".png")])
num_aug_masks = len([f for f in os.listdir(aug_masks) if f.endswith(".png")])

print(f"Augmented images: {num_aug_images}")
print(f"Augmented masks: {num_aug_masks}")

# ✅ Check they are equal
if num_aug_images == num_aug_masks:
    print("✅ Perfect: Every augmented image has a mask.")
else:
    print("⚠️ Warning: mismatch! Some masks or images are missing.")


Augmented images: 1135
Augmented masks: 1135
✅ Perfect: Every augmented image has a mask.


In [None]:
import os

malignant_dir = "/content/drive/MyDrive/PACE 2025/PRECISE_Challenge2025_Train/PRECISE_Challenge2025_Train/malignant"

malignant_files = [f for f in os.listdir(malignant_dir) if f.endswith(".png") and "mask" not in f]

# Separate by source
busi_files = [f for f in malignant_files if "BUSI" in f]
busra_files = [f for f in malignant_files if "BUSBRA" in f]
breast_files = [f for f in malignant_files if "BREAST" in f]

print("Total Malignant:", len(malignant_files))
print("BUSI:", len(busi_files))
print("BUSBRA:", len(busra_files))
print("BREAST:", len(breast_files))


Total Malignant: 956
BUSI: 359
BUSBRA: 511
BREAST: 86


In [None]:
import os

benign_dir = "/content/drive/MyDrive/PACE 2025/PRECISE_Challenge2025_Train/PRECISE_Challenge2025_Train/benign"

benign_files = [f for f in os.listdir(malignant_dir) if f.endswith(".png") and "mask" not in f]

# Separate by source
busi_files = [f for f in benign_files if "BUSI" in f]
busra_files = [f for f in benign_files if "BUSBRA" in f]
breast_files = [f for f in benign_files if "BREAST" in f]

print("Total Benign:", len(benign_files))
print("BUSI:", len(busi_files))
print("BUSBRA:", len(busra_files))
print("BREAST:", len(breast_files))

Total Benign: 1937
BUSI: 725
BUSBRA: 1079
BREAST: 133


In [None]:
import os
import shutil
from tqdm import tqdm

# Paths
orig_root = "/content/drive/MyDrive/PACE 2025/PRECISE_Challenge2025_Train/PRECISE_Challenge2025_Train"
aug_normal_images = "/content/drive/MyDrive/PACE 2025/aug_normal_images"
aug_normal_masks = "/content/drive/MyDrive/PACE 2025/aug_normal_masks"
aug_malignant_images = "/content/drive/MyDrive/PACE 2025/aug_malignant"
aug_malignant_masks = "/content/drive/MyDrive/PACE 2025/aug_malignant_mask"

final_root = "/content/drive/MyDrive/PACE 2025/final_dataset"
os.makedirs(final_root, exist_ok=True)

# Classes we care about
classes = ["benign", "malignant", "normal"]

# 1. Copy original dataset into final_dataset
for cls in classes:
    src_dir = os.path.join(orig_root, cls)
    dst_dir = os.path.join(final_root, cls)
    os.makedirs(dst_dir, exist_ok=True)

    print(f"Copying original {cls}...")
    for fname in tqdm(os.listdir(src_dir)):
        shutil.copy(os.path.join(src_dir, fname), os.path.join(dst_dir, fname))

# 2. Add augmented NORMAL (image+mask pairs)
normal_dst = os.path.join(final_root, "normal")
print("Copying augmented normal...")
for fname in tqdm(os.listdir(aug_normal_images)):
    img_src = os.path.join(aug_normal_images, fname)
    mask_src = os.path.join(aug_normal_masks, fname.replace(".png", "_mask.png"))

    if os.path.exists(mask_src):
        shutil.copy(img_src, os.path.join(normal_dst, os.path.basename(img_src)))
        shutil.copy(mask_src, os.path.join(normal_dst, os.path.basename(mask_src)))

# 3. Add augmented MALIGNANT (only BUSI augmented)
malignant_dst = os.path.join(final_root, "malignant")
print("Copying augmented malignant...")
for fname in tqdm(os.listdir(aug_malignant_images)):
    img_src = os.path.join(aug_malignant_images, fname)
    mask_src = os.path.join(aug_malignant_masks, fname.replace(".png", "_mask.png"))

    if os.path.exists(mask_src):
        shutil.copy(img_src, os.path.join(malignant_dst, os.path.basename(img_src)))
        shutil.copy(mask_src, os.path.join(malignant_dst, os.path.basename(mask_src)))

print("✅ Final dataset prepared at:", final_root)


Copying original benign...


100%|██████████| 3882/3882 [04:17<00:00, 15.07it/s]


Copying original malignant...


100%|██████████| 1912/1912 [00:56<00:00, 34.07it/s]


Copying original normal...


100%|██████████| 454/454 [00:17<00:00, 25.83it/s]


Copying augmented normal...


100%|██████████| 1135/1135 [01:01<00:00, 18.37it/s]


Copying augmented malignant...


100%|██████████| 1077/1077 [00:46<00:00, 23.32it/s]

✅ Final dataset prepared at: /content/drive/MyDrive/PACE 2025/final_dataset



