In [3]:
import os
import random
import shutil

# Set the paths
original_dataset_path = r'D:\Dataset\Raw Normal Oversampled'
split_dataset_path = r'D:\Dataset\Raw Normal Splitted Oversampled'

# Create train, val, and test folders
for split_folder in ['train', 'val', 'test']:
    split_folder_path = os.path.join(split_dataset_path, split_folder)
    os.makedirs(split_folder_path, exist_ok=True)

# Define the percentage split
train_percentage = 0.8
val_percentage = 0.1
test_percentage = 0.1

# Iterate through each class folder
for class_folder in ['COVID-19', 'Normal', 'Pneumonia-Bacterial', 'Pneumonia-Viral']:
    class_path = os.path.join(original_dataset_path, class_folder)
    
    # Create corresponding class folders in train, val, and test
    for split_folder in ['train', 'val', 'test']:
        class_split_path = os.path.join(split_dataset_path, split_folder, class_folder)
        os.makedirs(class_split_path, exist_ok=True)
    
    # Get the list of files in the original class folder
    files = os.listdir(class_path)
    
    # Shuffle the files
    random.shuffle(files)
    
    # Split the files based on the defined percentages
    train_count = int(len(files) * train_percentage)
    val_count = int(len(files) * val_percentage)
    test_count = len(files) - train_count - val_count
    
    # Copy files to the corresponding split folders
    for i in range(train_count):
        shutil.copy(os.path.join(class_path, files[i]), os.path.join(split_dataset_path, 'train', class_folder))
    
    for i in range(train_count, train_count + val_count):
        shutil.copy(os.path.join(class_path, files[i]), os.path.join(split_dataset_path, 'val', class_folder))
    
    for i in range(train_count + val_count, len(files)):
        shutil.copy(os.path.join(class_path, files[i]), os.path.join(split_dataset_path, 'test', class_folder))


In [4]:
import cv2
import os
from pathlib import Path

def resize_and_pad(image, size, pad_color=0):
    h, w = image.shape[:2]
    sh, sw = size

    # Scale and keep aspect ratio
    aspect = w / h
    if aspect > 1:  # width is larger
        new_w = sw
        new_h = int(sw / aspect)
    else:  # height is larger
        new_h = sh
        new_w = int(sh * aspect)

    resized = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_AREA)

    # Create new image and paste resized on it
    new_image = cv2.copyMakeBorder(
        resized,
        top=(sh - new_h) // 2,
        bottom=(sh - new_h + 1) // 2,
        left=(sw - new_w) // 2,
        right=(sw - new_w + 1) // 2,
        borderType=cv2.BORDER_CONSTANT,
        value=pad_color
    )
    return new_image

def process_images(input_dir, output_dir, size=(224, 224)):
    input_path = Path(input_dir)
    output_path = Path(output_dir)
    
    for subdir, _, files in os.walk(input_path):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                input_file_path = Path(subdir) / file
                relative_path = input_file_path.relative_to(input_path)
                output_file_path = output_path / relative_path
                
                # Create output directory if it doesn't exist
                output_file_path.parent.mkdir(parents=True, exist_ok=True)
                
                # Read and process the image
                image = cv2.imread(str(input_file_path))
                resized_image = resize_and_pad(image, size)
                
                # Save the processed image
                cv2.imwrite(str(output_file_path), resized_image)
                print(f"Processed and saved: {output_file_path}")

# Define input and output directories
input_directory = r'D:\Dataset\Raw Normal Splitted Oversampled'
output_directory = r'D:\Dataset\Resized Normal Oversampled'

# Process the images
process_images(input_directory, output_directory)

Processed and saved: D:\Dataset\Resized Normal Oversampled\test\COVID-19\1281_COVID-19 (950).jpg
Processed and saved: D:\Dataset\Resized Normal Oversampled\test\COVID-19\1293_COVID-19 (723).jpg
Processed and saved: D:\Dataset\Resized Normal Oversampled\test\COVID-19\1303_COVID-19 (273).jpg
Processed and saved: D:\Dataset\Resized Normal Oversampled\test\COVID-19\1312_COVID-19 (290).jpg
Processed and saved: D:\Dataset\Resized Normal Oversampled\test\COVID-19\1314_COVID-19 (233).jpg
Processed and saved: D:\Dataset\Resized Normal Oversampled\test\COVID-19\1318_COVID-19 (1134).jpg
Processed and saved: D:\Dataset\Resized Normal Oversampled\test\COVID-19\1319_COVID-19 (54).jpg
Processed and saved: D:\Dataset\Resized Normal Oversampled\test\COVID-19\1335_COVID-19 (125).jpg
Processed and saved: D:\Dataset\Resized Normal Oversampled\test\COVID-19\1343_COVID-19 (467).jpg
Processed and saved: D:\Dataset\Resized Normal Oversampled\test\COVID-19\1367_COVID-19 (263).jpg
Processed and saved: D:\Datase