<a href="https://colab.research.google.com/github/k4karthi/Indian-Sign-Language-to-Text-Conversion/blob/main/dataset_augmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install albumentations

In [None]:
import albumentations as A
import cv2
import os
import random
from tqdm import tqdm


In [None]:

# Define augmentation pipeline
augmentation = A.Compose([
    A.Rotate(limit=5, p=0.5),
    A.RandomBrightnessContrast(brightness_limit=0.05, contrast_limit=0.05, p=0.4),
    A.HueSaturationValue(hue_shift_limit=2, sat_shift_limit=5, val_shift_limit=2, p=0.3),
    A.MotionBlur(blur_limit=3, p=0.2),
    A.GaussianBlur(blur_limit=(3, 3), p=0.1),
    A.CLAHE(clip_limit=1.0, tile_grid_size=(8, 8), p=0.2),
])

# Set paths
input_dir = "path to input directory"
output_dir = "path to output directory"
os.makedirs(output_dir, exist_ok=True)

# Randomly select n classes from the dataset. Change the 'n'

n=10

all_classes = [folder for folder in os.listdir(input_dir) if os.path.isdir(os.path.join(input_dir, folder))]
selected_classes = random.sample(all_classes, n)

# Number of images per class . change as per your need
target_count = 1000

for class_folder in tqdm(selected_classes):
    class_path = os.path.join(input_dir, class_folder)
    output_class_path = os.path.join(output_dir, class_folder)
    os.makedirs(output_class_path, exist_ok=True)

    images = [img for img in os.listdir(class_path) if img.lower().endswith((".jpg", ".png", ".PNG"))]
    original_count = len(images)

    # Copy original images first
    for img_name in images:
        img_path = os.path.join(class_path, img_name)
        image = cv2.imread(img_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        cv2.imwrite(os.path.join(output_class_path, img_name), cv2.cvtColor(image, cv2.COLOR_RGB2BGR))

    # Augment until reaching 750 images
    augment_needed = target_count - original_count
    if augment_needed > 0:
        for i in range(augment_needed):
            img_name = random.choice(images)
            img_path = os.path.join(class_path, img_name)
            image = cv2.imread(img_path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            augmented = augmentation(image=image)["image"]
            aug_img_name = f"{img_name.split('.')[0]}_aug{i}.jpg"
            cv2.imwrite(os.path.join(output_class_path, aug_img_name), cv2.cvtColor(augmented, cv2.COLOR_RGB2BGR))


print(f"Augmentation completed for {n} classes with {target_count} images each.")
