In [None]:
import os
import zipfile
import shutil
import cv2

Once downloaded, we store them in a directory to separate them into two sets, one for training and another for validation.

In [None]:
# Paths to the original folders
dir_normales = 'data/normales'
dir_metastasis = 'data/metastasis'

# Paths to the destination folders
train_normales = 'data/train/normales'
train_metastasis = 'data/train/metastasis'
val_normales = 'data/val/normales'
val_metastasis = 'data/val/metastasis'

# Create the folders if they do not exist
os.makedirs(train_normales, exist_ok=True)
os.makedirs(train_metastasis, exist_ok=True)
os.makedirs(val_normales, exist_ok=True)
os.makedirs(val_metastasis, exist_ok=True)


In [None]:
def split_and_copy_data(source, train_dir, val_dir, test_size=0.2):
    """
    This function splits the files from the source directory into training and validation sets
    and copies them into their respective directories.

    Parameters:
    source (str): The source directory containing the original files.
    train_dir (str): The target directory for training files.
    val_dir (str): The target directory for validation files.
    test_size (float): The proportion of the dataset to include in the validation split.
    """
    # List all files in the source folder
    files = [f for f in os.listdir(source) if os.path.isfile(os.path.join(source, f))]

    # Split the files into training and validation sets
    train_files, val_files = train_test_split(files, test_size=test_size, random_state=42)

    # Copy the files to the training and validation folders
    for f in train_files:
        shutil.copy(os.path.join(source, f), os.path.join(train_dir, f))
    for f in val_files:
        shutil.copy(os.path.join(source, f), os.path.join(val_dir, f))

# Apply the function to the normal and metastasis folders
# split_and_copy_data(dir_normales, train_normales, val_normales)
# split_and_copy_data(dir_metastasis, train_metastasis, val_metastasis)


In [None]:
def count_images(directory):
    """
    This function counts the number of images in each category within the given directory.

    Parameters:
    directory (str): The directory containing subdirectories for each category.

    Returns:
    dict: A dictionary with categories as keys and the number of images as values.
    """
    categories = os.listdir(directory)
    counts = {}
    for category in categories:
        path = os.path.join(directory, category)
        if os.path.isdir(path): 
            counts[category] = len(os.listdir(path))
    return counts

# Count the images in the training and validation directories
train_counts = count_images('data/train')
val_counts = count_images('data/val')

#print("Training:", train_counts)
#print("Validation:", val_counts)


In [None]:
def augment_images(source_directory, target_directory):
    """
    This function augments images from the source directory by applying various transformations
    and saves the augmented images to the target directory.

    Parameters:
    source_directory (str): The directory containing the original images.
    target_directory (str): The directory where the augmented images will be saved.
    """
    if not os.path.exists(target_directory):
        os.makedirs(target_directory)

    image_count = 0

    for filename in os.listdir(source_directory):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_path = os.path.join(source_directory, filename)
            image = cv2.imread(image_path)
            if image is not None:
                # List of images to include the original and its transformations
                images = [image]

                # Apply 90-degree rotation
                rotated_90 = cv2.rotate(image, cv2.ROTATE_90_CLOCKWISE)
                images.append(rotated_90)

                # Apply 180-degree rotation
                rotated_180 = cv2.rotate(image, cv2.ROTATE_180)
                images.append(rotated_180)

                # Apply 270-degree rotation
                rotated_270 = cv2.rotate(image, cv2.ROTATE_90_COUNTERCLOCKWISE)
                images.append(rotated_270)

                # Apply horizontal flip
                flipped_horizontal = cv2.flip(image, 1)
                images.append(flipped_horizontal)

                # Apply vertical flip
                flipped_vertical = cv2.flip(image, 0)
                images.append(flipped_vertical)

                # Save all transformed images
                for img in images:
                    output_filename = f"{filename.split('.')[0]}_aug_{image_count}.{filename.split('.')[-1]}"
                    output_path = os.path.join(target_directory, output_filename)
                    cv2.imwrite(output_path, img)
                    image_count += 1
    print(f"Total images created: {image_count}")

# Source directory
# source_directory = "Raw/train/metastasis"
# Target directory
# target_directory = "Raw/train/metastasis_augmented"

# augment_images(source_directory, target_directory)
