In [17]:
import os
import numpy as np
import tensorflow as tf
from tqdm import tqdm

In [18]:
def count_images(dataset_path):
    """Counts the number of images in each subfolder (label) of the dataset."""
    class_counts = {}
    total_images = 0
    for label in os.listdir(dataset_path):
        label_path = os.path.join(dataset_path, label)
        if os.path.isdir(label_path):
            count = len(os.listdir(label_path))
            class_counts[label] = count
            total_images += count
    return class_counts, total_images

In [19]:
def load_and_preprocess_image(image_path):
    """Loads and preprocesses an image using TensorFlow."""
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [128, 128])
    return img

In [20]:
def augment_images(input_folder, output_folder, num_augmented=2):
    """Applies augmentation to images in input_folder and saves to output_folder."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    for label in tqdm(os.listdir(input_folder), desc="Augmenting Data"):
        label_path = os.path.join(input_folder, label)
        output_label_path = os.path.join(output_folder, label)
        os.makedirs(output_label_path, exist_ok=True)
        
        if os.path.isdir(label_path):
            for img_name in os.listdir(label_path):
                img_path = os.path.join(label_path, img_name)
                img = load_and_preprocess_image(img_path)
                
                for i in range(num_augmented):
                    augmented_img = tf.image.random_flip_left_right(img)
                    augmented_img = tf.image.random_brightness(augmented_img, max_delta=0.2)
                    augmented_img = tf.image.random_contrast(augmented_img, lower=0.8, upper=1.2)
                    aug_img_name = f"aug_{i}_{img_name}"
                    aug_img_path = os.path.join(output_label_path, aug_img_name)
                    tf.keras.preprocessing.image.save_img(aug_img_path, augmented_img.numpy())

In [21]:
# Define dataset paths
dataset_path = r'C:\Users\krish\Downloads\archive (3)\traffic_Data'
data_path = os.path.join(dataset_path, 'DATA')
test_path = os.path.join(dataset_path, 'TEST')

In [23]:
# Count images before augmentation
print("Dataset statistics before augmentation:")
original_counts, original_total = count_images(data_path)
print("Class-wise counts:", original_counts)
print("Total images:", original_total)

Dataset statistics before augmentation:
Class-wise counts: {'0': 118, '1': 40, '10': 70, '11': 138, '12': 96, '13': 36, '14': 128, '15': 22, '16': 142, '17': 130, '18': 8, '19': 4, '2': 80, '20': 18, '21': 12, '22': 18, '23': 14, '24': 100, '25': 4, '26': 126, '27': 28, '28': 446, '29': 44, '3': 260, '30': 150, '31': 42, '32': 14, '33': 4, '34': 26, '35': 156, '36': 40, '37': 58, '38': 30, '39': 34, '4': 98, '40': 32, '41': 18, '42': 32, '43': 82, '44': 30, '45': 24, '46': 18, '47': 12, '48': 10, '49': 42, '5': 194, '50': 56, '51': 8, '52': 36, '53': 4, '54': 324, '55': 162, '56': 110, '57': 6, '6': 78, '7': 152, '8': 8, '9': 2}
Total images: 4174


In [25]:
#Perform augmentation
augmented_path = os.path.join(dataset_path, 'AUGMENTED_DATA')
augment_images(data_path, augmented_path, num_augmented=2)

# Merge augmented data back to original dataset
for label in os.listdir(augmented_path):
    aug_label_path = os.path.join(augmented_path, label)
    orig_label_path = os.path.join(data_path, label)
    os.makedirs(orig_label_path, exist_ok=True)
    
    for img_name in os.listdir(aug_label_path):
        src = os.path.join(aug_label_path, img_name)
        dst = os.path.join(orig_label_path, img_name)
        os.rename(src, dst)

Augmenting Data: 100%|█████████████████████████████████████████████████████████████████| 58/58 [01:34<00:00,  1.62s/it]


In [26]:
# Count images after augmentation
print("\nDataset statistics after augmentation:")
final_counts, final_total = count_images(data_path)
print("Class-wise counts:", final_counts)
print("Total images:", final_total)

# Print percentage increase
percentage_increase = ((final_total - original_total) / original_total) * 100
print(f"Percentage increase in dataset size: {percentage_increase:.2f}%")


Dataset statistics after augmentation:
Class-wise counts: {'0': 354, '1': 120, '10': 210, '11': 414, '12': 288, '13': 108, '14': 384, '15': 66, '16': 426, '17': 390, '18': 24, '19': 12, '2': 240, '20': 54, '21': 36, '22': 54, '23': 42, '24': 300, '25': 12, '26': 378, '27': 84, '28': 1338, '29': 132, '3': 780, '30': 450, '31': 126, '32': 42, '33': 12, '34': 78, '35': 468, '36': 120, '37': 174, '38': 90, '39': 102, '4': 294, '40': 96, '41': 54, '42': 96, '43': 246, '44': 90, '45': 72, '46': 54, '47': 36, '48': 30, '49': 126, '5': 582, '50': 168, '51': 24, '52': 108, '53': 12, '54': 972, '55': 486, '56': 330, '57': 18, '6': 234, '7': 456, '8': 24, '9': 6}
Total images: 12522
Percentage increase in dataset size: 200.00%
