In [1]:
import os
import shutil

# Define source and destination directories
source_dir = 'dataset1'
dest_dir = 'dataset_binary'

# Create destination directories
os.makedirs(os.path.join(dest_dir, 'monkeypox'), exist_ok=True)
os.makedirs(os.path.join(dest_dir, 'others'), exist_ok=True)

# Copy monkeypox images
monkeypox_src = os.path.join(source_dir, 'Monkeypox')
monkeypox_dst = os.path.join(dest_dir, 'monkeypox')
for filename in os.listdir(monkeypox_src):
    shutil.copy(os.path.join(monkeypox_src, filename), monkeypox_dst)

# Copy other class images into 'others'
other_classes = ['Normal', 'Measles', 'Chickenpox']
others_dst = os.path.join(dest_dir, 'others')
for cls in other_classes:
    cls_src = os.path.join(source_dir, cls)
    for filename in os.listdir(cls_src):
        shutil.copy(os.path.join(cls_src, filename), others_dst)


In [5]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Parameters
IMG_SIZE = 224
BATCH_SIZE = 1  # Set to 1 for precise control
TARGET_COUNT = 1500  # Desired total images per class

# Paths
input_dir = 'dataset_binary'
output_dir = 'dataset_augmented'

# Define data augmentation parameters
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Process each class separately
for class_name in ['monkeypox', 'others']:
    input_path = os.path.join(input_dir, class_name)
    output_path = os.path.join(output_dir, class_name)
    os.makedirs(output_path, exist_ok=True)

    # Count existing images
    existing_images = len(os.listdir(input_path))
    images_needed = TARGET_COUNT - existing_images

    if images_needed <= 0:
        print(f"{class_name}: Already has {existing_images} images. No augmentation needed.")
        continue

    # Create a generator
    generator = datagen.flow_from_directory(
        input_dir,
        classes=[class_name],
        class_mode=None,
        target_size=(IMG_SIZE, IMG_SIZE),
        batch_size=BATCH_SIZE,
        shuffle=False,
        save_to_dir=output_path,
        save_prefix='aug',
        save_format='jpeg'
    )

    # Save original images
    for i in range(existing_images):
        # Disable augmentation temporarily
        generator.image_data_generator = ImageDataGenerator(rescale=1./255)
        next(generator)

    # Reset the generator with augmentation
    generator.image_data_generator = datagen

    # Generate and save augmented images
    print(f"Augmenting {class_name}: Generating {images_needed} images...")
    for i in range(images_needed):
        next(generator)

    print(f"{class_name}: Augmentation complete. Total images: {TARGET_COUNT}")


Found 279 images belonging to 1 classes.
Augmenting monkeypox: Generating 1221 images...
monkeypox: Augmentation complete. Total images: 1500
Found 491 images belonging to 1 classes.
Augmenting others: Generating 1009 images...
others: Augmentation complete. Total images: 1500


In [6]:
import os
import shutil
import random

def split_dataset(source_dir, dest_dir, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    classes = ['monkeypox', 'others']
    for cls in classes:
        cls_path = os.path.join(source_dir, cls)
        images = os.listdir(cls_path)
        random.shuffle(images)
        
        total = len(images)
        train_end = int(total * train_ratio)
        val_end = train_end + int(total * val_ratio)
        
        splits = {
            'train': images[:train_end],
            'val': images[train_end:val_end],
            'test': images[val_end:]
        }
        
        for split, split_images in splits.items():
            split_dir = os.path.join(dest_dir, split, cls)
            os.makedirs(split_dir, exist_ok=True)
            for img in split_images:
                src = os.path.join(cls_path, img)
                dst = os.path.join(split_dir, img)
                shutil.copyfile(src, dst)

# Example usage:
source_directory = 'dataset_augmented'
destination_directory = 'dataset_split'
split_dataset(source_directory, destination_directory)
