In [13]:
import os

from PIL import Image
import cv2

import numpy as np

import random
from tqdm import tqdm

In [15]:
# ...existing code...

# Define the base directory relative to the notebook's location
base_dir = os.getcwd()  # Gets the current working directory
data_dir = os.path.join(base_dir, '..', 'data')

# Update paths to be relative
balanced_dir = os.path.join(data_dir, 'balanced')
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'val')
input_dir = os.path.join(data_dir, 'processed')
output_dir = os.path.join(data_dir, 'balanced')
dataset_path = os.path.join(data_dir, 'Categorized Wound Images Dataset')
processed_dir = os.path.join(data_dir, 'processed')

# Ensure directories exist
for directory in [balanced_dir, train_dir, val_dir, input_dir, output_dir, processed_dir]:
    if not os.path.exists(directory):
        os.makedirs(directory)
        print(f'Directory created: {directory}')
    else:
        print(f'Directory already exists: {directory}')

# Update the rest of the code to use these relative paths
# Example usage:
# balance_dataset(input_dir, output_dir, target_size)
# split_data(balanced_dir, train_dir, val_dir, split_ratio=0.8)

Directory already exists: c:\Users\MOS\Koen\Wondherkenning\Wound-Classification-DL\src\..\data\balanced
Directory already exists: c:\Users\MOS\Koen\Wondherkenning\Wound-Classification-DL\src\..\data\train
Directory already exists: c:\Users\MOS\Koen\Wondherkenning\Wound-Classification-DL\src\..\data\val
Directory already exists: c:\Users\MOS\Koen\Wondherkenning\Wound-Classification-DL\src\..\data\processed
Directory already exists: c:\Users\MOS\Koen\Wondherkenning\Wound-Classification-DL\src\..\data\balanced
Directory already exists: c:\Users\MOS\Koen\Wondherkenning\Wound-Classification-DL\src\..\data\processed


In [16]:
# processed_dir = r'E:\Projects\Wound-Classification-DL\data\processed'


if not os.path.exists(processed_dir):
    os.makedirs(processed_dir)
    print(f'Directory created: {processed_dir}')
else:
    print(f'Directory already exists: {processed_dir}')

Directory already exists: c:\Users\MOS\Koen\Wondherkenning\Wound-Classification-DL\src\..\data\processed


In [17]:
target_size = (128, 128)

# dataset_path = r'E:\Projects\Wound-Classification-DL\data\Categorized Wound Images Dataset'
folders = [folder for folder in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, folder))]

for folder in folders:
    folder_path = os.path.join(dataset_path, folder)
    output_folder = os.path.join(processed_dir, folder)
    
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for file in os.listdir(folder_path):
        input_path = os.path.join(folder_path, file)
        output_path = os.path.join(output_folder, file)
        
        try:
            with Image.open(input_path) as img:
                resized_img = img.resize(target_size)
                resized_img.save(output_path)
        except Exception as e:
            print(f'Error processing {input_path}: {e}')

In [18]:
def augment_image(img):
    transformations = [
        lambda x: cv2.rotate(x, cv2.ROTATE_90_CLOCKWISE),
        lambda x: cv2.flip(x, 1),
        lambda x: cv2.GaussianBlur(x, (5, 5), 0),
        lambda x: shift_image(x, shift_x=10, shift_y=10),
    ]
    transform = random.choice(transformations)
    augmented = transform(img)
    if len(augmented.shape) == 3 and augmented.shape[2] == 3:
        return augmented
    else:
        return img

def shift_image(img, shift_x, shift_y):
    rows, cols, _ = img.shape
    M = np.float32([[1, 0, shift_x], [0, 1, shift_y]])
    shifted = cv2.warpAffine(img, M, (cols, rows), borderMode=cv2.BORDER_REFLECT_101)
    return shifted

def balance_dataset(input_dir, output_dir, target_size):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    for folder in tqdm(os.listdir(input_dir)):
        folder_path = os.path.join(input_dir, folder)
        output_folder = os.path.join(output_dir, folder)
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
        
        images = os.listdir(folder_path)
        
        for img in images:
            input_path = os.path.join(folder_path, img)
            output_path = os.path.join(output_folder, img)
            image = cv2.imread(input_path)
            cv2.imwrite(output_path, image)
        
        while len(os.listdir(output_folder)) < target_size:
            img = random.choice(images)
            input_path = os.path.join(folder_path, img)
            image = cv2.imread(input_path)
            augmented_image = augment_image(image)
            output_path = os.path.join(output_folder, f'aug_{random.randint(10000, 99999)}.jpg')
            cv2.imwrite(output_path, augmented_image)

# input_dir = r'E:\Projects\Wound-Classification-DL\data\processed'
# output_dir = r'E:\Projects\Wound-Classification-DL\data\balanced'
target_size = 1000

balance_dataset(input_dir, output_dir, target_size)

100%|██████████| 10/10 [00:12<00:00,  1.28s/it]


In [19]:
def split_data(source_dir, train_dir, val_dir, split_ratio=0.8):
    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
    if not os.path.exists(val_dir):
        os.makedirs(val_dir)
    
    for folder in tqdm(os.listdir(source_dir), desc='Processing Folders'):
        folder_path = os.path.join(source_dir, folder)
        train_folder = os.path.join(train_dir, folder)
        val_folder = os.path.join(val_dir, folder)
        
        if not os.path.exists(train_folder):
            os.makedirs(train_folder)
        if not os.path.exists(val_folder):
            os.makedirs(val_folder)
        
        files = os.listdir(folder_path)
        np.random.shuffle(files)
        
        split_index = int(len(files) * split_ratio)
        train_files = files[:split_index]
        val_files = files[split_index:]
        
        for file in train_files:
            input_path = os.path.join(folder_path, file)
            output_path = os.path.join(train_folder, file)
            process_and_save_image(input_path, output_path)
        
        for file in val_files:
            input_path = os.path.join(folder_path, file)
            output_path = os.path.join(val_folder, file)
            process_and_save_image(input_path, output_path)

def process_and_save_image(input_path, output_path):
    try:
        with Image.open(input_path) as img:
            rgb_image = img.convert('RGB')
            rgb_image.save(output_path)
    except Exception as e:
        print(f'Error processing file {input_path}: {e}')

# balanced_dir = r'E:\Projects\Wound-Classification-DL\data\balanced'
# train_dir = r'E:\Projects\Wound-Classification-DL\data\train'
# val_dir = r'E:\Projects\Wound-Classification-DL\data\val'

split_data(balanced_dir, train_dir, val_dir, split_ratio=0.8)

Processing Folders: 100%|██████████| 10/10 [00:58<00:00,  5.81s/it]
