## Dataset Preparation

In [None]:
!pip install roboflow

from roboflow import Roboflow
rf = Roboflow(api_key="U0RpVkRKwFrFYW5RlBkn")
project = rf.workspace("owais-ahmed-xq0js").project("sample-augmented-dataset")
version = project.version(1)
dataset = version.download("yolov8")
                

In [None]:
!pip install albumentations opencv-python

In [6]:
import os
import cv2
import albumentations as A
from tqdm import tqdm

# Paths
image_dir = r"dataset\semi_prepared_dataset\train\images"
label_dir = r"dataset\semi_prepared_dataset\train\labels"
augmented_image_dir = r"dataset\semi_prepared_dataset\augmented_dataset\images"
augmented_label_dir = r"dataset\semi_prepared_dataset\augmented_dataset\labels"

# Create directories if they don't exist
os.makedirs(augmented_image_dir, exist_ok=True)
os.makedirs(augmented_label_dir, exist_ok=True)

# Define augmentation pipeline with brightness and exposure
augmentation_pipeline = A.Compose([
    A.RandomBrightnessContrast(brightness_limit=0.3, contrast_limit=0.3, p=1.0),
])

# Function to copy labels
def copy_labels(src_label_path, dest_label_path):
    with open(src_label_path, 'r') as file:
        lines = file.readlines()
    with open(dest_label_path, 'w') as file:
        file.writelines(lines)

# Initial image count
image_count = len(os.listdir(image_dir))

# Augment until the dataset reaches 1,000 images
current_image_index = 1
while image_count < 1000:
    for img_name in tqdm(os.listdir(image_dir)):
        if img_name.endswith('.jpg') or img_name.endswith('.png'):
            image_path = os.path.join(image_dir, img_name)
            label_path = os.path.join(label_dir, img_name.replace('.jpg', '.txt').replace('.png', '.txt'))
            
            # Load image
            image = cv2.imread(image_path)
            
            # Apply augmentation
            augmented = augmentation_pipeline(image=image)
            
            # Save augmented image
            augmented_img_name = f"aug_{current_image_index}_{img_name}"
            augmented_img_path = os.path.join(augmented_image_dir, augmented_img_name)
            cv2.imwrite(augmented_img_path, augmented['image'])

            # Copy label to the augmented labels directory
            augmented_label_path = os.path.join(augmented_label_dir, augmented_img_name.replace('.jpg', '.txt').replace('.png', '.txt'))
            copy_labels(label_path, augmented_label_path)
            
            # Increment the image count and index
            image_count += 1
            current_image_index += 1

            # Break the loop if the target of 1,000 images is reached
            if image_count >= 1000:
                break

print("Dataset augmentation completed successfully! Total images:", image_count)


100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 56.75it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 99.63it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 98.54it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 99.08it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 63.66it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 98.20it/s]
100%|█████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 101.57it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 97.73it/s]
100%|███████████████████████████████████

Dataset augmentation completed successfully! Total images: 1000



