# Worm Detection - YOLOv8 Optimized Training (v2)

**Optimized for small object detection**

| Parameter | v1 | v2 (this) |
|---|---|---|
| Image Size | 416 | **640** |
| Batch Size | 32 | **16** |
| Model | YOLOv8m | **YOLOv8l** |
| Epochs | 100 | **150** |
| Mosaic | 1.0 all epochs | **1.0 → off at epoch 120** |
| Mixup | 0.1 | **0.0** |
| Box Loss | default (7.5) | **10.0** |
| IoU Threshold | 0.7 | **0.6** |

**Why**: Worms occupy ~0.017% of image area. At 416px, smallest worms collapse to ~4px.  
At 640px with larger model and tuned loss, small object mAP50-95 improves significantly.

## Step 1: Setup

In [None]:
!pip install -q ultralytics

## Step 2: Load Dataset

In [None]:
import os
import shutil

# Find and extract dataset
kaggle_input = '/kaggle/input'
dataset_dirs = os.listdir(kaggle_input)
print('Available datasets:', dataset_dirs)

for d in dataset_dirs:
    full_path = os.path.join(kaggle_input, d)
    # Check for zip files
    for f in os.listdir(full_path):
        if f.endswith('.zip'):
            print(f'Extracting {f}...')
            shutil.unpack_archive(os.path.join(full_path, f), '/kaggle/working/')
    # Check for direct dataset folder
    for sub in ['', 'yolov8_dataset']:
        check = os.path.join(full_path, sub, 'data.yaml')
        if os.path.exists(check):
            src = os.path.join(full_path, sub) if sub else full_path
            print(f'Found dataset at {src}')
            shutil.copytree(src, '/kaggle/working/yolov8_dataset', dirs_exist_ok=True)

dataset_path = '/kaggle/working/yolov8_dataset'
print(f'\nTrain images: {len(os.listdir(os.path.join(dataset_path, "train/images")))}')
print(f'Val images: {len(os.listdir(os.path.join(dataset_path, "val/images")))}')

## Step 3: Fix data.yaml paths

In [None]:
yaml_path = os.path.join(dataset_path, 'data.yaml')

with open(yaml_path, 'w') as f:
    f.write(f"""# Worm Detection Dataset (Optimized)
path: {dataset_path}
train: train/images
val: val/images

nc: 1
names: ['worm']
""")

print('data.yaml updated.')
with open(yaml_path) as f:
    print(f.read())

## Step 4: Analyze Worm Sizes in Training Data

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# Collect all box sizes
widths = []
heights = []
areas = []

label_dir = os.path.join(dataset_path, 'train/labels')
for lbl_file in os.listdir(label_dir):
    if not lbl_file.endswith('.txt'): continue
    with open(os.path.join(label_dir, lbl_file)) as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) >= 5:
                w, h = float(parts[3]), float(parts[4])
                widths.append(w)
                heights.append(h)
                areas.append(w * h)

print(f'Total annotations: {len(widths)}')
print(f'Width (normalized):  min={min(widths):.4f}, avg={np.mean(widths):.4f}, max={max(widths):.4f}')
print(f'Height (normalized): min={min(heights):.4f}, avg={np.mean(heights):.4f}, max={max(heights):.4f}')
print(f'Area (normalized):   min={min(areas):.6f}, avg={np.mean(areas):.6f}, max={max(areas):.6f}')

# At different resolutions
for res in [416, 640, 768]:
    avg_w = np.mean(widths) * res
    avg_h = np.mean(heights) * res
    min_w = min(widths) * res
    min_h = min(heights) * res
    print(f'\nAt {res}px: avg worm = {avg_w:.1f}x{avg_h:.1f}px, min = {min_w:.1f}x{min_h:.1f}px')

# Plot distribution
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
axes[0].hist([w * 640 for w in widths], bins=30, color='#a78bfa', edgecolor='black')
axes[0].set_title('Worm Width (px at 640)')
axes[0].axvline(x=10, color='red', linestyle='--', label='10px threshold')
axes[0].legend()

axes[1].hist([h * 640 for h in heights], bins=30, color='#a78bfa', edgecolor='black')
axes[1].set_title('Worm Height (px at 640)')
axes[1].axvline(x=10, color='red', linestyle='--', label='10px threshold')
axes[1].legend()

axes[2].hist([a * 640 * 640 for a in areas], bins=30, color='#a78bfa', edgecolor='black')
axes[2].set_title('Worm Area (px² at 640)')

plt.tight_layout()
plt.savefig('worm_size_analysis.png', dpi=100)
plt.show()

## Step 5: Train — Phase 1 (High Augmentation, 120 epochs)

In [None]:
from ultralytics import YOLO
import torch

print(f'GPU: {torch.cuda.get_device_name(0)}')
print(f'VRAM: {torch.cuda.get_device_properties(0).total_mem / 1e9:.1f} GB')

# Use YOLOv8l (large) for better small-object features
model = YOLO('yolov8l.pt')

# Phase 1: Strong augmentation for feature learning
results = model.train(
    data=yaml_path,
    
    # === RESOLUTION (critical for small objects) ===
    imgsz=640,              # Up from 416 → 2.4x more pixels
    
    # === TRAINING ===
    epochs=150,
    batch=16,               # Reduced from 32 to fit 640px in VRAM
    patience=30,            # More patience for small object convergence
    
    # === DEVICE ===    
    device=0,
    workers=4,
    amp=True,               # Mixed precision
    
    # === AUGMENTATION (tuned for small objects) ===
    augment=True,
    hsv_h=0.01,             # Reduced — worms are grey, less color variation needed
    hsv_s=0.3,
    hsv_v=0.3,
    degrees=10,             # Mild rotation
    translate=0.1,
    scale=0.2,              # Less scale variation (avoid shrinking worms further)
    fliplr=0.5,
    flipud=0.3,
    mosaic=1.0,             # Full mosaic early (will reduce at close_mosaic)
    mixup=0.0,              # Disabled — mixup blurs small objects
    copy_paste=0.1,         # Paste worms from one image to another
    
    # === LOSS WEIGHTS (tuned for small objects) ===
    box=10.0,               # Up from 7.5 — emphasize box precision
    cls=0.5,                # Down — single class, less important
    dfl=1.5,                # Distribution focal loss for tighter boxes
    
    # === DETECTION ===
    iou=0.6,                # Down from 0.7 — relaxed IoU for small objects
    
    # === OPTIMIZER ===
    optimizer='AdamW',
    lr0=0.001,
    lrf=0.01,
    warmup_epochs=5,
    weight_decay=0.0005,
    cos_lr=True,
    
    # === MOSAIC SCHEDULE ===
    close_mosaic=30,        # Disable mosaic for last 30 epochs
                            # Lets model learn precise localization
    
    # === SAVE ===
    save=True,
    save_period=25,
    project='worm_detection',
    name='yolov8l_optimized',
)

print('\nTraining complete!')

## Step 6: Evaluate

In [None]:
import cv2

best_model_path = 'worm_detection/yolov8l_optimized/weights/best.pt'
model = YOLO(best_model_path)

# Validate
metrics = model.val(data=yaml_path, imgsz=640)

print(f"\n{'='*60}")
print(f"  YOLOv8l OPTIMIZED RESULTS")
print(f"{'='*60}")
print(f"  mAP50:      {metrics.box.map50:.4f}")
print(f"  mAP50-95:   {metrics.box.map:.4f}")
print(f"  Precision:  {metrics.box.mp:.4f}")
print(f"  Recall:     {metrics.box.mr:.4f}")
print(f"{'='*60}")

# Test with TTA (Test-Time Augmentation)
print('\nWith Test-Time Augmentation (TTA):')
metrics_tta = model.val(data=yaml_path, imgsz=640, augment=True)
print(f"  mAP50:      {metrics_tta.box.map50:.4f}")
print(f"  mAP50-95:   {metrics_tta.box.map:.4f}")

## Step 7: Visualize Predictions

In [None]:
val_images = sorted(os.listdir(os.path.join(dataset_path, 'val/images')))[:8]

fig, axes = plt.subplots(2, 4, figsize=(20, 10))
for idx, img_name in enumerate(val_images):
    ax = axes[idx // 4][idx % 4]
    img_path = os.path.join(dataset_path, 'val/images', img_name)
    
    results = model(img_path, conf=0.4, imgsz=640)
    annotated = results[0].plot(line_width=1)
    annotated = cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)
    
    ax.imshow(annotated)
    n_det = len(results[0].boxes)
    ax.set_title(f'{n_det} worms', fontsize=10)
    ax.axis('off')

plt.suptitle('Validation Predictions (YOLOv8l Optimized, 640px)', fontsize=14)
plt.tight_layout()
plt.savefig('val_predictions_v2.png', dpi=100)
plt.show()

## Step 8: Speed Benchmark

In [None]:
import time

# Benchmark inference speed
test_img = os.path.join(dataset_path, 'val/images', val_images[0])

# Warmup
for _ in range(3):
    model(test_img, imgsz=640, verbose=False)

# Benchmark
times = []
for _ in range(20):
    t0 = time.time()
    model(test_img, imgsz=640, verbose=False)
    times.append(time.time() - t0)

avg_ms = np.mean(times) * 1000
fps = 1000 / avg_ms

print(f'Inference speed (640px, T4):')
print(f'  Average: {avg_ms:.1f} ms/frame')
print(f'  FPS: {fps:.0f}')
print(f'\nFor 30fps video tracking: {"REAL-TIME" if fps >= 30 else "Need optimized export"}')

## Step 9: Export Models

In [None]:
# Export best model
model = YOLO(best_model_path)

# ONNX (cross-platform, fast)
model.export(format='onnx', imgsz=640, simplify=True, half=True)

# TorchScript (for PyTorch deployment)
model.export(format='torchscript', imgsz=640)

# CoreML (for Mac M1 deployment)
try:
    model.export(format='coreml', imgsz=640, half=True)
except:
    print('CoreML export not available on this system (will export locally on Mac)')

print('\nExported models:')
export_dir = 'worm_detection/yolov8l_optimized/weights'
for f in sorted(os.listdir(export_dir)):
    size = os.path.getsize(os.path.join(export_dir, f)) / (1024*1024)
    print(f'  {f}: {size:.1f} MB')

## Step 10: Package for Download

In [None]:
# Copy everything important to /kaggle/working for easy download
output_files = {
    'best.pt': best_model_path,
}

# Copy model
shutil.copy(best_model_path, '/kaggle/working/best_worm_yolov8l.pt')

# Copy last.pt as backup
last_path = best_model_path.replace('best.pt', 'last.pt')
if os.path.exists(last_path):
    shutil.copy(last_path, '/kaggle/working/last_worm_yolov8l.pt')

# Copy ONNX if exported
onnx_path = best_model_path.replace('.pt', '.onnx')
if os.path.exists(onnx_path):
    shutil.copy(onnx_path, '/kaggle/working/best_worm_yolov8l.onnx')

# Copy training plots
results_dir = 'worm_detection/yolov8l_optimized'
plot_files = ['results.png', 'confusion_matrix.png', 'confusion_matrix_normalized.png',
              'P_curve.png', 'R_curve.png', 'F1_curve.png', 'PR_curve.png',
              'labels.jpg', 'labels_correlogram.jpg']
for pf in plot_files:
    src = os.path.join(results_dir, pf)
    if os.path.exists(src):
        shutil.copy(src, f'/kaggle/working/{pf}')

print('\nFiles ready for download:')
for f in sorted(os.listdir('/kaggle/working/')):
    fp = f'/kaggle/working/{f}'
    if os.path.isfile(fp):
        size = os.path.getsize(fp) / (1024*1024)
        if size > 0.01:
            print(f'  {f}: {size:.1f} MB')

## Training Curves & Metrics

In [None]:
from IPython.display import Image as IPImage, display

for plot in ['results.png', 'confusion_matrix.png', 'PR_curve.png', 'F1_curve.png']:
    path = os.path.join(results_dir, plot)
    if os.path.exists(path):
        print(f'\n--- {plot} ---')
        display(IPImage(filename=path, width=800))

---

## Summary

**Download `best_worm_yolov8l.pt`** and place it in your project folder.  
Then run the **worm tracking pipeline** locally to:
- Detect and count worms across the video
- Track individual worm IDs (ByteTrack)
- Calculate velocity and movement for each worm
- Visualize worm travel paths
- Detect growing/splitting worms