In [4]:
import os

# Define the path to the dataset
dataset_path = r'D:\Curated X-Ray Dataset'
folders = ['COVID-19', 'Normal', 'Pneumonia-Bacterial', 'Pneumonia-Viral']

# Count the number of images in each folder
image_counts = {}
for folder in folders:
    folder_path = os.path.join(dataset_path, folder)
    image_counts[folder] = len(os.listdir(folder_path))

print(image_counts)

max_images = max(image_counts.values())
print(f"Maximum number of images in a folder: {max_images}")

{'COVID-19': 1281, 'Normal': 3270, 'Pneumonia-Bacterial': 3001, 'Pneumonia-Viral': 1656}
Maximum number of images in a folder: 3270


In [5]:
import shutil
import random

# Define the path to the output dataset
output_path = r'D:\Dataset\Raw Normal Oversampled'

# Create the output directory if it doesn't exist
os.makedirs(output_path, exist_ok=True)

# Function to oversample images
def oversample_images(folder, target_count):
    folder_path = os.path.join(dataset_path, folder)
    output_folder_path = os.path.join(output_path, folder)
    os.makedirs(output_folder_path, exist_ok=True)
    
    images = os.listdir(folder_path)
    current_count = len(images)
    
    # Copy existing images to the output folder
    for image in images:
        shutil.copy(os.path.join(folder_path, image), os.path.join(output_folder_path, image))
    
    # Oversample by duplicating images
    while current_count < target_count:
        image_to_copy = random.choice(images)
        new_image_name = f"{current_count}_{image_to_copy}"
        shutil.copy(os.path.join(folder_path, image_to_copy), os.path.join(output_folder_path, new_image_name))
        current_count += 1

# Oversample each folder
for folder, count in image_counts.items():
    if count < max_images:
        oversample_images(folder, max_images)
    else:
        # Just copy the folder as is if it already has the max number of images
        shutil.copytree(os.path.join(dataset_path, folder), os.path.join(output_path, folder))

print("Oversampling complete.")

Oversampling complete.
