## **1. Install and Set Up Dependencies**

In [None]:
!pip install albumentations opencv-python tqdm
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## **2. Augmentation, Balancing, and ZIP**

In [None]:
import os
import cv2
import random
import albumentations as A
from tqdm import tqdm
import zipfile
from collections import defaultdict

# === CONFIG ===
input_root = "/content/drive/MyDrive/x/y"     # Original dataset folder, adjust
output_root = "/content/augmented_dataset"    # Output
zip_path = "/content/augmented_dataset.zip"   # Final zip file location
target_images_per_class = 500                 # Desired number of images per class
image_size = 1280

# === AUGMENTATION PIPELINE ===
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.3),
    A.RandomBrightnessContrast(brightness_limit=0.15, contrast_limit=0.15, p=0.3),
    A.Rotate(limit=10, p=0.2),  # Light rotation ±10 degrees
    A.GaussNoise(var_limit=(5.0, 15.0), p=0.1),  # Light noise
    A.RandomScale(scale_limit=0.1, p=0.2),  # Light zoom in/out
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.05, rotate_limit=5, p=0.2),
    A.Resize(1280, 1280)
])

# === AUGMENTATION AND BALANCING PROCESS ===
os.makedirs(output_root, exist_ok=True)

for class_name in os.listdir(input_root):
    class_input_path = os.path.join(input_root, class_name)
    class_output_path = os.path.join(output_root, class_name)
    os.makedirs(class_output_path, exist_ok=True)

    # Take all the original pictures
    image_files = [f for f in os.listdir(class_input_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    original_count = len(image_files)

    print(f"\nClass '{class_name}': {original_count} ori img")

    # Copy all original images to the output folder
    for f in image_files:
        src = os.path.join(class_input_path, f)
        dst = os.path.join(class_output_path, f)
        if not os.path.exists(dst):
            img = cv2.imread(src)
            if img is not None:
                cv2.imwrite(dst, img)

    # Calculate how much additional augmentation is needed
    need_aug = target_images_per_class - original_count
    if need_aug <= 0:
        print(f"OK")
        continue

    print(f"Adding {need_aug} augmentation...")

    # Augmentation is performed from the original image available
    for i in tqdm(range(need_aug), desc=f"Augmenting {class_name}"):
        img_name = random.choice(image_files)
        img_path = os.path.join(class_input_path, img_name)
        image = cv2.imread(img_path)
        if image is None:
            continue
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        aug_image = transform(image=image)['image']
        aug_image = cv2.cvtColor(aug_image, cv2.COLOR_RGB2BGR)
        new_filename = f"{os.path.splitext(img_name)[0]}_aug{i+1}.jpg"
        cv2.imwrite(os.path.join(class_output_path, new_filename), aug_image)

# === ZIP ALL CLASS FOLDERS ===
def zip_folder(folder_path, output_zip):
    with zipfile.ZipFile(output_zip, 'w') as zipf:
        for root, dirs, files in os.walk(folder_path):
            for file in files:
                file_path = os.path.join(root, file)
                arcname = os.path.relpath(file_path, folder_path)
                zipf.write(file_path, arcname)

zip_folder(output_root, zip_path)
print(f"\nZIP done: {zip_path}")
