# Data Augmentation

In [None]:
import os
import cv2
import random
import PIL.Image
import numpy as np
from PIL import ImageEnhance
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [None]:
# Set your directories and classes
DATA_DIR = r"D:\Comparative-Study-of-CNN-Architectures-for-Leukemia\dataset\Original"
CLASSES = ['Benign', 'Early', 'Pre', 'Pro']

# Image dimensions
img_height = 224
img_width = 224

# Function to count images in each class
def count_images(directory):
    return sum([len(files) for r, d, files in os.walk(directory)])

# Count images in each class
class_counts = {cls: count_images(os.path.join(DATA_DIR, cls)) for cls in CLASSES}
print("Original class counts:", class_counts)

In [None]:
# Data augmentation function
def augment_images(image_paths, target_dir, num_augmented):
    augmented_count = 0
    for image_path in image_paths:
        img = PIL.Image.open(image_path)
        
        # Apply brightness adjustment
        img = adjust_brightness(img, factor=random.uniform(0.9, 1.1))  # Random brightness between -10% and +10%

        # Apply contrast adjustment
        img = adjust_contrast(img, factor=random.uniform(0.9, 1.1))  # Random contrast between -10% and +10%
        
        # Apply rotation
        img = apply_rotation(img)  # Rotate image by random angle between -20° and +20°

        # Apply JPEG noise
        img = add_jpeg_noise(np.array(img), noise_factor=random.randint(50, 100))  # Apply JPEG noise with random quality

        # Convert image to array and expand dimensions for augmentation
        img = np.expand_dims(np.array(img), 0)
        
        # Apply the rest of the augmentations
        aug_iter = datagen.flow(img, batch_size=1)
        for _ in range(num_augmented // len(image_paths) + 1):
            aug_img = next(aug_iter)[0].astype(np.uint8)
            PIL.Image.fromarray(aug_img).save(os.path.join(target_dir, f'aug_{augmented_count}.jpg'))
            augmented_count += 1
            if augmented_count >= num_augmented:
                break
        if augmented_count >= num_augmented:
            break

# Brightness adjustment function
def adjust_brightness(img, factor=1.0):
    enhancer = ImageEnhance.Brightness(img)
    return enhancer.enhance(factor)

# Contrast adjustment function
def adjust_contrast(img, factor=1.0):
    enhancer = ImageEnhance.Contrast(img)
    return enhancer.enhance(factor)

# Rotation function
def apply_rotation(img):
    # Random rotation between -20° and +20°
    rotation_angle = random.uniform(-20, 20)
    return img.rotate(rotation_angle)

# Add JPEG noise function
def add_jpeg_noise(img, noise_factor=50):
    # Save image with compression (JPEG quality)
    encode_param = [int(cv2.IMWRITE_JPEG_QUALITY), noise_factor]  # Quality range: 0 (worst) to 100 (best)
    _, encoded_img = cv2.imencode('.jpg', img, encode_param)
    decoded_img = cv2.imdecode(encoded_img, 1)  # Decode back to image
    return decoded_img

# Data augmentation settings
datagen = ImageDataGenerator(
    rotation_range=0,  # Rotation is handled manually
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    vertical_flip=True,
    fill_mode='nearest'
)

In [None]:
# Directories and images for each class
benign_dir = os.path.join(DATA_DIR, 'Benign')
benign_images = [os.path.join(benign_dir, f) for f in os.listdir(benign_dir) if f.endswith('.jpg') or f.endswith('.png')]

early_dir = os.path.join(DATA_DIR, 'Early')
early_images = [os.path.join(early_dir, f) for f in os.listdir(early_dir) if f.endswith('.jpg') or f.endswith('.png')]

pre_dir = os.path.join(DATA_DIR, 'Pre')
pre_images = [os.path.join(pre_dir, f) for f in os.listdir(pre_dir) if f.endswith('.jpg') or f.endswith('.png')]

pro_dir = os.path.join(DATA_DIR, 'Pro')
pro_images = [os.path.join(pro_dir, f) for f in os.listdir(pro_dir) if f.endswith('.jpg') or f.endswith('.png')]


In [None]:
# Calculate how many augmented images we need for each class
num_augmented_benign = 1000 - len(benign_images)
num_augmented_early = 1000 - len(early_images)
num_augmented_pre = 1000 - len(pre_images)
num_augmented_pro = 1000 - len(pro_images)

# Augment images for each class
augment_images(benign_images, benign_dir, num_augmented_benign)
augment_images(early_images, early_dir, num_augmented_early)
augment_images(pre_images, pre_dir, num_augmented_pre)
augment_images(pro_images, pro_dir, num_augmented_pro)

print("New image counts:")
print(f"Benign: {count_images(benign_dir)}")
print(f"Early: {count_images(early_dir)}")
print(f"Pre: {count_images(pre_dir)}")
print(f"Pro: {count_images(pro_dir)}")