## Connects Colab to Google Drive to access dataset and save results.

In [1]:
from google.colab import drive
import os

drive.mount('/content/drive')

Mounted at /content/drive


## Installs the Ultralytics library (YOLO) for training and inference

In [2]:
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.204-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.17-py3-none-any.whl.metadata (14 kB)
Downloading ultralytics-8.3.204-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.17-py3-none-any.whl (28 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.204 ultralytics-thop-2.0.17


## Updates dataset paths in data.yaml so YOLO knows where train/val/test images and labels are stored.

In [3]:
import yaml
import os

dataset_path = '/content/drive/MyDrive/SAGER-TASK/dataset'
yaml_path = os.path.join(dataset_path, 'data.yaml')

with open(yaml_path, 'r') as file:
    data = yaml.safe_load(file)

data['path'] = dataset_path
data['train'] = 'images/train'
data['val'] = 'images/val'
data['test'] = 'images/test'

with open(yaml_path, 'w') as file:
    yaml.dump(data, file, sort_keys=False)

print(" data.yaml file has been updated ")


 data.yaml file has been updated 


## **Train YOLO11m Model**

Validates dataset and clears cache.

Loads YOLO11m pre-trained weights.

Trains with tuned hyperparameters (learning rate, optimizer, augmentations).

Uses regularization (dropout, weight decay) and early stopping.

Validates model at different confidence thresholds.

Saves summary and results in project folder.

In [4]:

from ultralytics import YOLO
import os, glob, yaml
import torch



DATASET_DIR = "/content/drive/MyDrive/SAGER-TASK/dataset"
YAML_PATH   = os.path.join(DATASET_DIR, "data.yaml")
RUNS_DIR    = "/content/drive/MyDrive/SAGER-TASK/run"
EXP_NAME    = "ResultsYolo11mFinal"
TEST_DIR    = os.path.join(DATASET_DIR, "images/test")

# Data validation & fix

print("Validating dataset structure.")
with open(YAML_PATH, "r") as f:
    y = yaml.safe_load(f) or {}

# Ensure proper paths and class names
y["path"]  = DATASET_DIR
y["train"] = "images/train"
y["val"]   = "images/val"
y["test"]  = "images/test"
if "names" not in y:
    y["names"] = {0: "rust"}
with open(YAML_PATH, "w") as f:
    yaml.dump(y, f, sort_keys=False)

# Basic label stats
def count_labels(split_name):
    label_dir = os.path.join(DATASET_DIR, "labels", split_name)
    if not os.path.exists(label_dir):
        return 0, 0
    files = [f for f in os.listdir(label_dir) if f.endswith(".txt")]
    inst = 0
    for lf in files:
        with open(os.path.join(label_dir, lf), "r") as fh:
            inst += sum(1 for _ in fh)
    return len(files), inst

train_files, train_instances = count_labels("train")
val_files,   val_instances   = count_labels("val")
print(f" Dataset stats: Train={train_files} files, {train_instances} instances | Val={val_files} files, {val_instances} instances")
if train_files:
    avg_inst = train_instances / train_files
    print(f" Avg instances per train image: {avg_inst:.2f}")
    if avg_inst < 1.5:
        print(" LOW INSTANCE COUNT per image — double-check labeling completeness.")


# Clearing caches
for p in glob.glob(os.path.join(DATASET_DIR, "labels", "**", "*.cache"), recursive=True):
    os.remove(p)
print("Caches cleared")

# Training
model = YOLO("yolo11m.pt")
results = model.train(
    data=YAML_PATH,
    epochs=150,
    patience=50,
    imgsz=832,
    batch=8,
    workers=2,
    rect=True,
    optimizer="AdamW",
    lr0=0.001,
    lrf=0.01,
    momentum=0.9,
    weight_decay=0.0005,
    warmup_epochs=3,
    warmup_momentum=0.8,
    warmup_bias_lr=0.1,
    # augmentations
    hsv_h=0.015, hsv_s=0.60, hsv_v=0.45,
    degrees=8,
    translate=0.10,
    scale=0.50,
    shear=2.0,
    perspective=0.0005,
    fliplr=0.5,
    mosaic=0.15,
    mixup=0.0,
    erasing=0.30,
    # regularization
    dropout=0.10,
    amp=True,
    device=0,
    # output
    project=RUNS_DIR,
    name=EXP_NAME,
    exist_ok=True,
    plots=True,
    save=True,
    val=True
)


# Validation across confidences
print("\n Validating at multiple confidence thresholds.")
best_model = YOLO(os.path.join(RUNS_DIR, EXP_NAME, "weights", "best.pt"))

conf_thresholds = [0.10, 0.15, 0.20, 0.25, 0.30, 0.35]
best_conf, best_map = 0.25, 0.0

for conf in conf_thresholds:
    print(f"\n--- Testing confidence: {conf} ---")
    val_results = best_model.val(
        data=YAML_PATH,
        conf=conf,
        iou=0.50,
        plots=True,
        device=0
    )
    current_map = getattr(val_results, "box_map50", 0.0)
    if current_map > best_map:
        best_map, best_conf = current_map, conf



print("Training finished!")
print(f"Best mAP50: {best_map:.3f} at confidence {best_conf}")
print(f"Train images: {train_files}, Val images: {val_files}")
print(f"Results saved in {os.path.join(RUNS_DIR, EXP_NAME)}")

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Validating dataset structure.
📊 Dataset stats: Train=70 files, 147 instances | Val=20 files, 36 instances
📦 Avg instances per train image: 2.10
Caches cleared
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11m.pt to 'yolo11m.pt': 100% ━━━━━━━━━━━━ 38.8MB 285.5MB/s 0.1s
Ultralytics 8.3.204 🚀 Python-3.12.11 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/M

## Run Inference on Test Set

In [5]:

from ultralytics import YOLO
import os

print("Running inference on test set...")

best_model_path = os.path.join(RUNS_DIR, EXP_NAME, "weights", "best.pt")

if os.path.exists(best_model_path):
    model = YOLO(best_model_path)
    results = model.predict(
        source=TEST_DIR,
        conf=best_conf,
        iou=0.5,
        save=True,
        save_txt=True,
        save_conf=True,
        project=RUNS_DIR,
        name=f"{EXP_NAME}_predictions",
        exist_ok=True
    )
    print("Inference done. Results saved.")
else:
    print("Best model not found.")


Running inference on test set...

image 1/10 /content/drive/MyDrive/SAGER-TASK/dataset/images/test/img90.jpg: 640x832 2 rusts, 51.8ms
image 2/10 /content/drive/MyDrive/SAGER-TASK/dataset/images/test/img91.jpg: 672x832 2 rusts, 53.0ms
image 3/10 /content/drive/MyDrive/SAGER-TASK/dataset/images/test/img92.jpg: 384x832 1 rust, 76.6ms
image 4/10 /content/drive/MyDrive/SAGER-TASK/dataset/images/test/img93.jpg: 704x832 2 rusts, 75.8ms
image 5/10 /content/drive/MyDrive/SAGER-TASK/dataset/images/test/img94.jpg: 448x832 1 rust, 50.4ms
image 6/10 /content/drive/MyDrive/SAGER-TASK/dataset/images/test/img95.jpg: 832x768 1 rust, 53.8ms
image 7/10 /content/drive/MyDrive/SAGER-TASK/dataset/images/test/img96.jpg: 576x832 5 rusts, 49.9ms
image 8/10 /content/drive/MyDrive/SAGER-TASK/dataset/images/test/img97.jpg: 832x832 2 rusts, 61.3ms
image 9/10 /content/drive/MyDrive/SAGER-TASK/dataset/images/test/img98.jpg: 384x832 1 rust, 31.6ms
image 10/10 /content/drive/MyDrive/SAGER-TASK/dataset/images/test/img9