In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install ultralytics opencv-python matplotlib tensorflow numpy pandas

Collecting ultralytics
  Downloading ultralytics-8.3.127-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading n

In [3]:
!unzip "/content/drive/MyDrive/Traffic_Sign_Recognition/ML_Dataset/gtsdb.zip" -d "/content/gtsdb"

Archive:  /content/drive/MyDrive/Traffic_Sign_Recognition/ML_Dataset/gtsdb.zip
  inflating: /content/gtsdb/TestIJCNN2013/TestIJCNN2013Download/00000.ppm  
  inflating: /content/gtsdb/TestIJCNN2013/TestIJCNN2013Download/00001.ppm  
  inflating: /content/gtsdb/TestIJCNN2013/TestIJCNN2013Download/00002.ppm  
  inflating: /content/gtsdb/TestIJCNN2013/TestIJCNN2013Download/00003.ppm  
  inflating: /content/gtsdb/TestIJCNN2013/TestIJCNN2013Download/00004.ppm  
  inflating: /content/gtsdb/TestIJCNN2013/TestIJCNN2013Download/00005.ppm  
  inflating: /content/gtsdb/TestIJCNN2013/TestIJCNN2013Download/00006.ppm  
  inflating: /content/gtsdb/TestIJCNN2013/TestIJCNN2013Download/00007.ppm  
  inflating: /content/gtsdb/TestIJCNN2013/TestIJCNN2013Download/00008.ppm  
  inflating: /content/gtsdb/TestIJCNN2013/TestIJCNN2013Download/00009.ppm  
  inflating: /content/gtsdb/TestIJCNN2013/TestIJCNN2013Download/00010.ppm  
  inflating: /content/gtsdb/TestIJCNN2013/TestIJCNN2013Download/00011.ppm  
  inflati

In [None]:
import os
import shutil
from PIL import Image
import torch
from ultralytics import YOLO
import pandas as pd
from collections import Counter


def prepare_gtsdb_dataset():
    input_file = "/content/gtsdb/gt.txt"
    base_images_dir = "/content/gtsdb/TrainIJCNN2013/TrainIJCNN2013"
    output_dir = "/content/gtsdb_yolo"

    dirs = {
        'train': os.path.join(output_dir, 'train'),
        'val': os.path.join(output_dir, 'val')
    }

    for d in dirs.values():
        os.makedirs(os.path.join(d, 'images'), exist_ok=True)
        os.makedirs(os.path.join(d, 'labels'), exist_ok=True)

    annotations = {}
    with open(input_file, "r") as f:
        for line in f:
            parts = line.strip().split(";")
            if len(parts) != 6:
                continue

            img_file = parts[0]
            if img_file not in annotations:
                annotations[img_file] = []

            try:
                x_min, y_min, x_max, y_max, class_id = map(int, parts[1:])
                annotations[img_file].append((x_min, y_min, x_max, y_max, class_id))
            except ValueError:
                print(f"Skipping malformed line: {line.strip()}")
                continue

    processed_count = 0
    skipped_count = 0

    for i, (img_file, bboxes) in enumerate(annotations.items()):
        img_path = os.path.join(base_images_dir, img_file)

        if not os.path.exists(img_path):
            print(f"Warning: Missing image {img_file}")
            skipped_count += 1
            continue

        try:
            with Image.open(img_path) as img:
                img_width, img_height = img.size

                label_file = img_file.replace(".ppm", ".txt")
                split = 'val' if i % 10 == 0 else 'train'

                valid_bboxes = []
                for x_min, y_min, x_max, y_max, class_id in bboxes:
                    x_min = max(0, min(x_min, img_width - 1))
                    y_min = max(0, min(y_min, img_height - 1))
                    x_max = max(0, min(x_max, img_width - 1))
                    y_max = max(0, min(y_max, img_height - 1))

                    width = x_max - x_min
                    height = y_max - y_min

                    if width <= 0 or height <= 0:
                        print(f"Invalid bbox in {img_file}: {x_min},{y_min},{x_max},{y_max}")
                        continue

                    x_center = (x_min + width / 2) / img_width
                    y_center = (y_min + height / 2) / img_height
                    norm_width = width / img_width
                    norm_height = height / img_height

                    if (0 <= x_center <= 1 and 0 <= y_center <= 1 and
                        0 < norm_width <= 1 and 0 < norm_height <= 1):
                        valid_bboxes.append(f"{class_id} {x_center:.6f} {y_center:.6f} {norm_width:.6f} {norm_height:.6f}")
                    else:
                        print(f"Invalid normalized bbox in {img_file}: {x_center:.2f},{y_center:.2f},{norm_width:.2f},{norm_height:.2f}")

                if valid_bboxes:
                    with open(os.path.join(dirs[split], 'labels', label_file), 'w') as f:
                        f.write("\n".join(valid_bboxes))

                    jpg_file = img_file.replace(".ppm", ".jpg")
                    img.convert("RGB").save(os.path.join(dirs[split], 'images', jpg_file), "JPEG")
                    processed_count += 1
                else:
                    print(f"Skipping {img_file} - no valid bboxes")
                    skipped_count += 1

        except Exception as e:
            print(f"Error processing {img_file}: {str(e)}")
            skipped_count += 1

    print(f"✅ Dataset preparation complete! Processed: {processed_count}, Skipped: {skipped_count}")
    return output_dir



def analyze_dataset(dataset_path):
    print("\n📊 Dataset Analysis:")

    for split in ['train', 'val']:
        img_dir = os.path.join(dataset_path, split, 'images')
        label_dir = os.path.join(dataset_path, split, 'labels')

        num_images = len([f for f in os.listdir(img_dir) if f.endswith(('.jpg'))])
        num_labels = len([f for f in os.listdir(label_dir) if f.endswith('.txt')])
        print(f"{split.upper()}: {num_images} images, {num_labels} label files")
        img_files = {os.path.splitext(f)[0] for f in os.listdir(img_dir) if f.endswith(('.jpg', '.png', '.jpeg'))}
        label_files = {os.path.splitext(f)[0] for f in os.listdir(label_dir) if f.endswith('.txt')}

        missing_labels = img_files - label_files
        missing_images = label_files - img_files

        if missing_labels:
            print(f"  ⚠️ Missing labels for {len(missing_labels)} images")
        if missing_images:
            print(f"  ⚠️ Missing images for {len(missing_images)} labels")

    def count_classes(label_dir):
        class_counts = Counter()
        for label_file in os.listdir(label_dir):
            if not label_file.endswith('.txt'):
                continue
            try:
                with open(os.path.join(label_dir, label_file)) as f:
                    for line in f:
                        parts = line.strip().split()
                        if len(parts) == 5:  # class, x, y, w, h
                            class_id = int(parts[0])
                            class_counts[class_id] += 1
            except Exception as e:
                print(f"Error reading {label_file}: {e}")
        return class_counts

    print("\n🔢 Class Distribution:")
    train_counts = count_classes(os.path.join(dataset_path, 'train', 'labels'))
    val_counts = count_classes(os.path.join(dataset_path, 'val', 'labels'))

    class_df = pd.DataFrame({
        'Class ID': list(train_counts.keys()),
        'Train Count': list(train_counts.values()),
        'Val Count': [val_counts.get(k, 0) for k in train_counts.keys()]
    }).sort_values('Class ID')

    print("\nClass Distribution Summary:")
    print(class_df.to_string(index=False))

    print("\n🔍 Potential Issues:")

    missing_val = class_df[class_df['Val Count'] == 0]
    if not missing_val.empty:
        print(f"- {len(missing_val)} classes missing in validation set")

    imbalance = class_df[
        (class_df['Train Count'] > 10 * class_df['Val Count']) |
        (class_df['Val Count'] > 10 * class_df['Train Count'])]
    if not imbalance.empty:
        print(f"- Significant train/val imbalance in {len(imbalance)} classes")

    rare_classes = class_df[class_df['Train Count'] + class_df['Val Count'] < 5]
    if not rare_classes.empty:
        print(f"- {len(rare_classes)} rare classes (<5 total samples)")


def create_yaml_config(dataset_path):
    yaml_content = f"""
path: {dataset_path}
train: {dataset_path}/train/images
val: {dataset_path}/val/images

# GTSDB 43 classes
names:
  0: Speed limit (20km/h)
  1: Speed limit (30km/h)
  2: Speed limit (50km/h)
  3: Speed limit (60km/h)
  4: Speed limit (70km/h)
  5: Speed limit (80km/h)
  6: End of speed limit (80km/h)
  7: Speed limit (100km/h)
  8: Speed limit (120km/h)
  9: No passing
  10: No passing for vehicles over 3.5 metric tons
  11: Right-of-way at next intersection
  12: Priority road
  13: Yield
  14: Stop
  15: No vehicles
  16: Vehicles over 3.5 metric tons prohibited
  17: No entry
  18: General caution
  19: Dangerous curve to the left
  20: Dangerous curve to the right
  21: Double curve
  22: Bumpy road
  23: Slippery road
  24: Road narrows on the right
  25: Road work
  26: Traffic signals
  27: Pedestrians
  28: Children crossing
  29: Bicycles crossing
  30: Beware of ice/snow
  31: Wild animals crossing
  32: End of all speed and passing limits
  33: Turn right ahead
  34: Turn left ahead
  35: Ahead only
  36: Go straight or right
  37: Go straight or left
  38: Keep right
  39: Keep left
  40: Roundabout mandatory
  41: End of no passing
  42: End of no passing by vehicles over 3.5 metric tons
"""
    config_path = os.path.join(dataset_path, "gtsdb.yaml")
    with open(config_path, 'w') as f:
        f.write(yaml_content)
    print(f"✅ YAML config created at {config_path}")
    return config_path


def train_model(config_path):
    device = '0' if torch.cuda.is_available() else 'cpu'
    print(f"\n🚀 Training on {'GPU' if device != 'cpu' else 'CPU'}")

    model = YOLO("yolov8m.pt")

    results = model.train(
        data=config_path,
        epochs=80,
        imgsz=640,
        batch=16 if device != 'cpu' else 8,
        device=device,
        optimizer='Adam',
        lr0=0.001,
        patience=10,
        workers=4 if device != 'cpu' else 0,
        verbose=True,

        degrees=0.0,
        translate=0.1,
        scale=0.5,
        shear=0.0,
        perspective=0.0,
        flipud=0.0,
        fliplr=0.5,
    )

    return results


if __name__ == "__main__":
    print("🚀 Preparing dataset...")
    dataset_path = prepare_gtsdb_dataset()

    print("\n🔍 Analyzing dataset...")
    analyze_dataset(dataset_path)

    print("\n📄 Creating YAML config...")
    config_path = create_yaml_config(dataset_path)

    print("\n🏋️ Training model...")
    train_results = train_model(config_path)

    print("\n🎉 Training completed successfully!")

🚀 Preparing dataset...
✅ Dataset preparation complete! Processed: 506, Skipped: 0

🔍 Analyzing dataset...

📊 Dataset Analysis:
TRAIN: 455 images, 455 label files
VAL: 51 images, 51 label files

🔢 Class Distribution:

Class Distribution Summary:
 Class ID  Train Count  Val Count
        0            3          1
        1           43          5
        2           57          2
        3           17          4
        4           29          2
        5           34          3
        6           15          2
        7           33          4
        8           40          7
        9           29          3
       10           56          7
       11           23          3
       12           52          2
       13           46          6
       14           18          4
       15           10          0
       16            7          0
       17           25          0
       18           21          6
       19            1          1
       20            7          2
       

[34m[1mtrain: [0mScanning /content/gtsdb_yolo/train/labels.cache... 455 images, 0 backgrounds, 0 corrupt: 100%|██████████| 455/455 [00:00<?, ?it/s]

[34m[1mtrain: [0m/content/gtsdb_yolo/train/images/00340.jpg: 1 duplicate labels removed
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 824.8±779.2 MB/s, size: 140.1 KB)



[34m[1mval: [0mScanning /content/gtsdb_yolo/val/labels.cache... 51 images, 0 backgrounds, 0 corrupt: 100%|██████████| 51/51 [00:00<?, ?it/s]


Plotting labels to runs/detect/train2/labels.jpg... 
[34m[1moptimizer:[0m Adam(lr=0.001, momentum=0.937) with parameter groups 77 weight(decay=0.0), 84 weight(decay=0.0005), 83 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train2[0m
Starting training for 80 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/80         0G      1.893      10.86      1.095         30        640:   2%|▏         | 1/57 [00:56<52:56, 56.72s/it]

In [5]:
backup_dir = "/content/drive/MyDrive/yolov8_gtsdb_backup"
os.makedirs(backup_dir, exist_ok=True)

source_folder = "/content/runs"
zip_path = "/content/runs_backup"

shutil.make_archive(zip_path, 'zip', source_folder)

shutil.copy(f"{zip_path}.zip", backup_dir)

print("✅ Zipped 'runs' folder uploaded to Google Drive!")

✅ Zipped 'runs' folder uploaded to Google Drive!
