In [None]:
import zipfile

def unzip_file(zip_filename, output_dir):
    with zipfile.ZipFile(zip_filename, 'r') as zipf:
        zipf.extractall(output_dir)
    print(f"Files extracted to {output_dir}")

# Example usage:
unzip_file('/content/Cells.zip', '/content/')

Files extracted to /content/


In [None]:
import os
import cv2
import numpy as np
import albumentations as A
from tqdm import tqdm

def augment_and_save_images_with_masks(input_image_folder, input_mask_folder, output_image_folder, output_mask_folder, total_samples=2000):
    # Define augmentations with resizing to 256x256
    transform = A.Compose([
        #A.Resize(256, 256, always_apply=True),  # Resize both images & masks to 256x256
        A.HorizontalFlip(p=0.5),
        A.Rotate(limit=30, p=0.5),
        #A.RandomCrop(height=256, width=256, p=0.5),  # Crop within 256x256
    ])

    # Get list of image and mask files
    image_files = sorted([f for f in os.listdir(input_image_folder) if f.lower().endswith(('.jpg', '.jpeg', '.png'))])
    mask_files = sorted([f for f in os.listdir(input_mask_folder) if f.lower().endswith(('.jpeg'))])

    # Ensure image-mask alignment
    if len(image_files) != len(mask_files):
        print("Warning: Mismatch in the number of images and masks!")

    # Create output directories if they don't exist
    os.makedirs(output_image_folder, exist_ok=True)
    os.makedirs(output_mask_folder, exist_ok=True)

    num_original_images = len(image_files)

    # Determine augmentations per image needed
    num_augs_per_image = max(1, total_samples // num_original_images)

    index = 0
    for img_name, mask_name in tqdm(zip(image_files, mask_files), total=num_original_images):
        image_file = os.path.join(input_image_folder, img_name)
        mask_file = os.path.join(input_mask_folder, mask_name)

        # Load image in RGB
        image = cv2.imread(image_file)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert BGR to RGB

        # Load mask in RGB (preserve colors)
        mask = cv2.imread(mask_file, cv2.IMREAD_UNCHANGED)  # Load mask in original color (not grayscale)
        mask = cv2.cvtColor(mask, cv2.COLOR_BGR2RGB)  # Ensure it's RGB

        # Skip invalid images/masks
        if image is None or mask is None:
            print(f"Skipping: {img_name} (image/mask not found or invalid)")
            continue

        # Resize both to 256x256
        #image = cv2.resize(image, (256, 256), interpolation=cv2.INTER_LINEAR)
        #mask = cv2.resize(mask, (256, 256), interpolation=cv2.INTER_NEAREST)  # Use NEAREST to preserve segmentation colors

        for _ in range(num_augs_per_image):
            augmented = transform(image=image, mask=mask)
            augmented_image = augmented['image']
            augmented_mask = augmented['mask']

            # Ensure valid augmented images/masks
            if augmented_image is None or augmented_mask is None:
                continue

            # Save augmented image and mask
            output_image_file = os.path.join(output_image_folder, f"augmented_{index}.jpg")
            output_mask_file = os.path.join(output_mask_folder, f"augmented_{index}.jpeg")

            cv2.imwrite(output_image_file, cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR))  # Convert back to BGR for OpenCV
            cv2.imwrite(output_mask_file, cv2.cvtColor(augmented_mask, cv2.COLOR_RGB2BGR))  # Ensure mask remains in RGB

            index += 1
            if index >= total_samples:
                break  # Stop when we reach the desired number of samples

        if index >= total_samples:
            break  # Stop if we have reached 2000 samples

if __name__ == "__main__":
    # Define paths
    input_image_folder = "/content/Cells/Train/images/"
    input_mask_folder = "/content/Cells/Train/masks/"
    output_image_folder = "/content/cell_bi/aug_images_bi/"
    output_mask_folder = "/content/cell_bi/aug_masks_bi/"

    # Set total number of augmented image-mask pairs
    total_samples = 2000

    # Run augmentation
    augment_and_save_images_with_masks(input_image_folder, input_mask_folder, output_image_folder, output_mask_folder, total_samples)