In [None]:
import os
import zipfile
import cv2
import albumentations as A
import random
from tqdm import tqdm
from collections import defaultdict
import uuid

# ========== 1. Extract Dataset ==========
zip_path = "/content/data.zip"
extract_path = "/content/data_train"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# paths
images_path = os.path.join(extract_path, "images")
labels_path = os.path.join(extract_path, "labels")
final_images_path = os.path.join(extract_path, "balanced/images")
final_labels_path = os.path.join(extract_path, "balanced/labels")

os.makedirs(final_images_path, exist_ok=True)
os.makedirs(final_labels_path, exist_ok=True)

# ========== 2. Define Augmentation Pipeline ==========
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.Rotate(limit=20, p=0.5),
    A.Blur(p=0.2),
    A.HueSaturationValue(p=0.3),
    A.CLAHE(p=0.3),
    A.GaussNoise(p=0.3),
    A.ShiftScaleRotate(shift_limit=0.05, scale_limit=0.1, rotate_limit=15, p=0.5)
], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'], min_visibility=0.2))

# ========== 3. Organize images by class ==========
class_to_files = defaultdict(list)

for file in os.listdir(images_path):
    if file.endswith((".jpg", ".png", ".jpeg")):
        label_file = os.path.join(labels_path, file.rsplit(".", 1)[0] + ".txt")
        if os.path.exists(label_file):
            with open(label_file, "r") as f:
                lines = f.readlines()
                if len(lines) > 0:
                    cls = int(lines[0].split()[0])  # take first class in label
                    class_to_files[cls].append(file)

# ========== 4. Balance each class to 30 ==========
TARGET = 30

for cls, files in class_to_files.items():
    count = len(files)
    chosen_files = []

    if count >= TARGET:
        chosen_files = random.sample(files, TARGET)
    else:
        chosen_files = files.copy()
        while len(chosen_files) < TARGET:
            chosen_files.append(random.choice(files))

    # Process and save
    for idx, file in enumerate(chosen_files):
        image_file = os.path.join(images_path, file)
        label_file = os.path.join(labels_path, file.rsplit(".", 1)[0] + ".txt")

        image = cv2.imread(image_file)
        h, w = image.shape[:2]

        # Load original bboxes
        bboxes, class_labels = [], []
        with open(label_file, "r") as f:
            for line in f.readlines():
                cls_id, x, y, bw, bh = map(float, line.strip().split())
                bboxes.append([x, y, bw, bh])
                class_labels.append(int(cls_id))

        # If this is an augmented duplicate
        if idx >= count:
            augmented = transform(image=image, bboxes=bboxes, class_labels=class_labels)
            image = augmented["image"]
            bboxes = augmented["bboxes"]
            class_labels = augmented["class_labels"]

        # Save new files
        new_name = f"class{cls}_{idx}_{uuid.uuid4().hex[:6]}.jpg"
        cv2.imwrite(os.path.join(final_images_path, new_name), image)

        new_label = os.path.join(final_labels_path, new_name.replace(".jpg", ".txt"))
        with open(new_label, "w") as f:
            for cls_id, bbox in zip(class_labels, bboxes):
                x, y, bw, bh = bbox
                f.write(f"{cls_id} {x:.6f} {y:.6f} {bw:.6f} {bh:.6f}\n")

# ========== 5. Generate Background Images ==========
BACKGROUND_COUNT = 30  # عدد الصور اللي عايزها بدون داتا

all_images = [f for f in os.listdir(images_path) if f.endswith((".jpg", ".png", ".jpeg"))]

for i in range(BACKGROUND_COUNT):
    file = random.choice(all_images)
    image_file = os.path.join(images_path, file)

    image = cv2.imread(image_file)

    # Apply augmentation عشان التنويع
    augmented = transform(image=image, bboxes=[], class_labels=[])
    image = augmented["image"]

    new_name = f"background_{i}_{uuid.uuid4().hex[:6]}.jpg"
    cv2.imwrite(os.path.join(final_images_path, new_name), image)

    # ليبل فاضي (مفيهوش اي بوكس)
    new_label = os.path.join(final_labels_path, new_name.replace(".jpg", ".txt"))
    open(new_label, "w").close()

print("✅ Balancing finished + Background images created!")

  original_init(self, **validated_kwargs)


✅ Balancing finished + Background images created!


In [None]:
zip_path = "/content/data2.zip"
extract_path = "/content/data_train2"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# paths
images_path = os.path.join(extract_path, "images")
labels_path = os.path.join(extract_path, "labels")
final_images_path = os.path.join(extract_path, "balanced/images")
final_labels_path = os.path.join(extract_path, "balanced/labels")

os.makedirs(final_images_path, exist_ok=True)
os.makedirs(final_labels_path, exist_ok=True)

In [None]:
import os
import cv2
import albumentations as A
from tqdm import tqdm

# ===================== Paths =====================
images_path = "/content/data_train2/images"
labels_path = "/content/data_train2/labels"
output_images = "/content/data_train/balanced/images"
output_labels = "/content/data_train/balanced/labels"

os.makedirs(output_images, exist_ok=True)
os.makedirs(output_labels, exist_ok=True)

# ===================== Augmentation Pipeline =====================
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.5),
    A.Rotate(limit=20, p=0.5),
    A.Blur(p=0.2),
    A.HueSaturationValue(p=0.3),
], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))

# ===================== Process Each Image =====================
image_files = [f for f in os.listdir(images_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

for file in tqdm(image_files, desc="Processing images"):
    image_path = os.path.join(images_path, file)
    base_name = os.path.splitext(file)[0]
    label_path = os.path.join(labels_path, base_name + ".txt")

    # Read image
    image = cv2.imread(image_path)
    if image is None:
        print(f"❌ Could not read image: {image_path}")
        continue

    # Read labels
    bboxes = []
    class_labels = []
    if not os.path.exists(label_path):
        print(f"⚠️ Label not found: {label_path}, skipping...")
        continue

    try:
        with open(label_path, "r") as f:
            for line in f.readlines():
                parts = line.strip().split()
                if len(parts) != 5:
                    continue
                cls_id, x, y, w, h = map(float, parts)
                bboxes.append([x, y, w, h])
                class_labels.append(int(cls_id))
    except Exception as e:
        print(f"❌ Error reading label {label_path}: {e}")
        continue

    # Generate 8 augmented versions
    for i in range(8):
        augmented = transform(image=image, bboxes=bboxes, class_labels=class_labels)
        aug_image = augmented["image"]
        aug_bboxes = augmented["bboxes"]
        aug_classes = augmented["class_labels"]

        # Save image
        new_img_name = f"{base_name}_aug_{i}.jpg"
        cv2.imwrite(os.path.join(output_images, new_img_name), aug_image)

        # Save label
        new_label_name = f"{base_name}_aug_{i}.txt"
        with open(os.path.join(output_labels, new_label_name), "w") as f:
            for cls_id, bbox in zip(aug_classes, aug_bboxes):
                x, y, w, h = bbox
                f.write(f"{cls_id} {x:.6f} {y:.6f} {w:.6f} {h:.6f}\n")

print("✅ Done! Each image now has 8 augmented versions.")
print(f"Augmented images saved to: {output_images}")
print(f"Augmented labels saved to: {output_labels}")

Processing images: 100%|██████████| 14/14 [00:01<00:00,  8.94it/s]

✅ Done! Each image now has 8 augmented versions.
Augmented images saved to: /content/data_train/balanced/images
Augmented labels saved to: /content/data_train/balanced/labels





In [None]:
!wget -O D:\yolo\train_val_split.py https://raw.githubusercontent.com/EdjeElectronics/Train-and-Deploy-YOLO-Models/refs/heads/main/utils/train_val_split.py
!python /content/D:yolotrain_val_split.py --datapath="/content/data_train/balanced" --train_pct=0.9

--2025-09-23 08:51:37--  https://raw.githubusercontent.com/EdjeElectronics/Train-and-Deploy-YOLO-Models/refs/heads/main/utils/train_val_split.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3203 (3.1K) [text/plain]
Saving to: ‘D:yolotrain_val_split.py’


2025-09-23 08:51:37 (60.8 MB/s) - ‘D:yolotrain_val_split.py’ saved [3203/3203]

Created folder at /content/data/train/images.
Created folder at /content/data/train/labels.
Created folder at /content/data/validation/images.
Created folder at /content/data/validation/labels.
Number of image files: 502
Number of annotation files: 502
Images moving to train: 451
Images moving to validation: 51


In [None]:
import yaml
import os

def create_data_yaml(path_to_classes_txt, path_to_data_yaml):

  # Read class.txt to get class names
  if not os.path.exists(path_to_classes_txt):
    print(f'classes.txt file not found! Please create a classes.txt labelmap and move it to {path_to_classes_txt}')
    return
  with open(path_to_classes_txt, 'r') as f:
    classes = []
    for line in f.readlines():
      if len(line.strip()) == 0: continue
      classes.append(line.strip())
  number_of_classes = len(classes)

  # Create data dictionary
  data = {
      'path': 'data',
      'train': 'train/images',
      'val': 'validation/images',
      'nc': number_of_classes,
      'names': classes
  }

  # Write data to YAML file
  with open(path_to_data_yaml, 'w') as f:
    yaml.dump(data, f, sort_keys=False)
  print(f'Created config file at {path_to_data_yaml}')

  return

# Define path to classes.txt and run function
path_to_classes_txt = '/content/data_train/classes.txt'
path_to_data_yaml = 'data.yaml'

create_data_yaml(path_to_classes_txt, path_to_data_yaml)

print('\nFile contents:\n')


Created config file at data.yaml

File contents:



In [None]:
!yolo detect train data="/content/data/data.yaml" model=yolo11n.pt epochs=250 imgsz=640

Ultralytics 8.3.203 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/data/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=250, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train2, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=Tr

In [None]:
!yolo detect predict model=runs/detect/train2/weights/best.pt source=data/validation/images save=True

Traceback (most recent call last):
  File "/usr/local/bin/yolo", line 8, in <module>
    sys.exit(entrypoint())
             ^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ultralytics/cfg/__init__.py", line 960, in entrypoint
    model = YOLO(model, task=task)
            ^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ultralytics/models/yolo/model.py", line 83, in __init__
    super().__init__(model=model, task=task, verbose=verbose)
  File "/usr/local/lib/python3.12/dist-packages/ultralytics/engine/model.py", line 153, in __init__
    self._load(model, task=task)
  File "/usr/local/lib/python3.12/dist-packages/ultralytics/engine/model.py", line 297, in _load
    self.model, self.ckpt = load_checkpoint(weights)
                            ^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/ultralytics/nn/tasks.py", line 1501, in load_checkpoint
    ckpt, weight = torch_safe_load(weight)  # load ckpt
                   ^^^^^^^^^

In [None]:
import glob
from IPython.display import Image, display
for image_path in glob.glob(f'/content/runs/detect/predict/*.jpg')[:10]:
  display(Image(filename=image_path, height=400))
  print('\n')

In [None]:
!pip install ultralytics



Collecting ultralytics
  Downloading ultralytics-8.3.203-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.17-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.203-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m32.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.17-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.203 ultralytics-thop-2.0.17
