In [5]:
from ultralytics import YOLO
import torch
import os


In [None]:
# Quick check to see if dataset is in the same directory
print(f"Dataset exists: {os.path.exists('2025_cone_dataset/train/images')}")

Dataset exists: False


In [None]:
'''
Training the teacher model
Note that Epochs = 3000 is a rough estimate for grokking to occur
'''
# Import model from 
model = YOLO("yolo26x.pt")

# Training parameters
results = model.train(
    data="2025_cone_dataset.yaml",
    epochs=500,           # No. of epochs 
    imgsz=1024,           # This is not the final model to be deployed and hence require the highest resolution for generalisation
    batch=-1,             # '-1' auto-adjusts batch size to fill VRAM
    patience=50,          # End training if maP does not improve
    weight_decay=0.1,     # Recommended Settings for 
    dropout = 0.1,        # Large dropout for better generalisation performance
    optimizer="MuSGD",    # Best for large models
    lr0=0.01,             # Standard starting learning rate
    cos_lr=True,          # Uses a Cosine Annealing schedule (essential for grokking)
    close_mosaic=20,      # Turn off mosaic augmentation for the last 20 epochs to refine
    overlap_mask=True,    # Helps if cones are partially covering each other
    augment=True,         # Use heavy data augmentation (flips, mosaics, etc.)
    val=True,             # Perform validation at each step
    save_period = 10      # Save weights every 10 epochs
    )

model.save("Yolo26x_Teacher.pt")

print("Model saved and ready for distillation!")

current_dir = os.path.dirname(os.path.abspath(__file__))

if results is not None and hasattr(results, 'save_dir'):
        best_weights = os.path.join(results.save_dir, 'weights', 'best.pt')
else:
    best_weights = os.path.join(current_dir, 'runs', 'Cone_Teacher', 'grokking_run', 'weights', 'best.pt')
    
final_model = YOLO(best_weights)

# Run evaluation on the TEST split
metrics = final_model.val(split='test') 

print(f"Final Test mAP50: {metrics.box.map50}")

[KDownloading https://github.com/ultralytics/assets/releases/download/v8.4.0/yolo26x.pt to 'yolo26x.pt': 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 113.2MB 9.9MB/s 11.4ss 11.3s<0.0s
Ultralytics 8.4.7 üöÄ Python-3.12.2 torch-2.5.1 CPU (Apple M4 Pro)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, angle=1.0, augment=True, auto_augment=randaugment, batch=-1, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=20, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=2025_cone_dataset.yaml, degrees=0.0, deterministic=True, device=cpu, dfl=1.5, dnn=False, dropout=0.2, dynamic=False, embed=None, epochs=3000, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1024, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo26x.pt, momen

RuntimeError: Dataset '2025_cone_dataset.yaml' error ‚ùå Dataset '2025_cone_dataset.yaml' images not found, missing path '/Users/perryhui/Desktop/2025_cone_dataset/datasets/2025_cone_dataset/val/images'
Note dataset download directory is '/Users/perryhui/Desktop/2025_cone_dataset/datasets'. You can update this in '/Users/perryhui/Library/Application Support/Ultralytics/settings.json'

In [None]:
'''
Knowledge Distillation
Training a smaller YOLO26n with knowldge from the teacher model trained above
'''

teacher = YOLO("Yolo26x_Teacher.pt")

student = YOLO("yolo26n.pt")

results = model.train(
    data="2025_cone_dataset.yaml",
    teacher=teacher.model,         # Pass the underlying PyTorch model
    imgsz= 640,                     # Student resolution 
    teacher_imgsz=1024,            # Teacher's resolution expertise
    epochs=500,                    # Student needs fewer epochs with a guide
    batch=128,                     # High batch size for stable gradients
    optimizer="AdamW",             # Better for small models than MuSGD
    distill_loss="cwd",            # Channel-wise distillation for better boundaries
)

student.save("Yolo26n_Final.pt")
