# ðŸŽ¯ YOLOv8 Training for Pallet Box Detection

This notebook trains YOLOv8 object detection models for counting boxes on pallets.

**Training Plan:**
1. Train YOLOv8n (nano) - fast baseline
2. Train YOLOv8s (small) - improved accuracy
3. Compare models and select best
4. Optimize confidence threshold for counting accuracy

**Dataset:**
- Combined: 6,875 images (70/20/10 split)
- Train: 4,812 | Valid: 1,374 | Test: 689
- Single class: `box`


## 1. Setup and Imports


In [None]:
import os
import sys
from pathlib import Path
import shutil

# ML Libraries
from ultralytics import YOLO
import torch

# Data handling
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image

# Utilities
import yaml
import json
from datetime import datetime

# Set paths
PROJECT_ROOT = Path('../').resolve()
DATA_CONFIG = PROJECT_ROOT / 'data' / 'combined_data.yaml'
MODELS_DIR = PROJECT_ROOT / 'models'

# Check GPU availability
device = 'cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu'
print(f"âœ“ Using device: {device}")
print(f"âœ“ PyTorch version: {torch.__version__}")
print(f"âœ“ Data config: {DATA_CONFIG}")
print(f"âœ“ Models will be saved to: {MODELS_DIR}")


In [None]:
# Verify data configuration
with open(DATA_CONFIG, 'r') as f:
    data_config = yaml.safe_load(f)
    
print("Data Configuration:")
print(f"  Train paths: {data_config.get('train', 'N/A')}")
print(f"  Val paths: {data_config.get('val', 'N/A')}")
print(f"  Test paths: {data_config.get('test', 'N/A')}")
print(f"  Classes: {data_config.get('names', 'N/A')}")
print(f"  Number of classes: {data_config.get('nc', 'N/A')}")


## 2. Train YOLOv8n (Nano) - Baseline Model

Fast, lightweight model for initial baseline.


In [None]:
# Training parameters
EPOCHS = 50
BATCH_SIZE = 16
IMG_SIZE = 640
PATIENCE = 10  # Early stopping patience

print("="*60)
print("TRAINING YOLOV8N (NANO) - BASELINE")
print("="*60)
print(f"Epochs: {EPOCHS}")
print(f"Batch Size: {BATCH_SIZE}")
print(f"Image Size: {IMG_SIZE}")
print(f"Early Stopping Patience: {PATIENCE}")
print("="*60)


In [None]:
# Initialize YOLOv8n model
model_nano = YOLO('yolov8n.pt')  # Load pretrained weights

# Train the model
results_nano = model_nano.train(
    data=str(DATA_CONFIG),
    epochs=EPOCHS,
    batch=BATCH_SIZE,
    imgsz=IMG_SIZE,
    patience=PATIENCE,
    device=device,
    project=str(PROJECT_ROOT / 'runs' / 'detect'),
    name='yolov8n_boxes',
    exist_ok=True,
    verbose=True,
    plots=True,
    save=True,
    val=True
)

print("\nâœ“ YOLOv8n training complete!")


In [None]:
# Save the best model
best_model_path = PROJECT_ROOT / 'runs' / 'detect' / 'yolov8n_boxes' / 'weights' / 'best.pt'
target_path = MODELS_DIR / 'yolov8n_boxes_best.pt'

if best_model_path.exists():
    shutil.copy(best_model_path, target_path)
    print(f"âœ“ Best model saved to: {target_path}")
else:
    print(f"âš  Best model not found at: {best_model_path}")


## 3. Train YOLOv8s (Small) - Improved Model

Larger model for potentially better accuracy.


In [None]:
print("="*60)
print("TRAINING YOLOV8S (SMALL) - IMPROVED MODEL")
print("="*60)

# Initialize YOLOv8s model
model_small = YOLO('yolov8s.pt')  # Load pretrained weights

# Train the model
results_small = model_small.train(
    data=str(DATA_CONFIG),
    epochs=EPOCHS,
    batch=BATCH_SIZE,
    imgsz=IMG_SIZE,
    patience=PATIENCE,
    device=device,
    project=str(PROJECT_ROOT / 'runs' / 'detect'),
    name='yolov8s_boxes',
    exist_ok=True,
    verbose=True,
    plots=True,
    save=True,
    val=True
)

print("\nâœ“ YOLOv8s training complete!")


In [None]:
# Save the best model
best_model_path = PROJECT_ROOT / 'runs' / 'detect' / 'yolov8s_boxes' / 'weights' / 'best.pt'
target_path = MODELS_DIR / 'yolov8s_boxes_best.pt'

if best_model_path.exists():
    shutil.copy(best_model_path, target_path)
    print(f"âœ“ Best model saved to: {target_path}")
else:
    print(f"âš  Best model not found at: {best_model_path}")


## 4. Model Evaluation & Comparison


In [None]:
# Load trained models for evaluation
model_nano_eval = YOLO(str(MODELS_DIR / 'yolov8n_boxes_best.pt'))
model_small_eval = YOLO(str(MODELS_DIR / 'yolov8s_boxes_best.pt'))

print("âœ“ Models loaded for evaluation")


In [None]:
# Evaluate both models on validation set
print("Evaluating YOLOv8n on validation set...")
metrics_nano = model_nano_eval.val(data=str(DATA_CONFIG), split='val')

print("\nEvaluating YOLOv8s on validation set...")
metrics_small = model_small_eval.val(data=str(DATA_CONFIG), split='val')


In [None]:
# Compare model metrics
print("="*60)
print("MODEL COMPARISON - DETECTION METRICS")
print("="*60)

comparison_data = {
    'Model': ['YOLOv8n (Nano)', 'YOLOv8s (Small)'],
    'mAP@0.5': [metrics_nano.box.map50, metrics_small.box.map50],
    'mAP@0.5:0.95': [metrics_nano.box.map, metrics_small.box.map],
    'Precision': [metrics_nano.box.mp, metrics_small.box.mp],
    'Recall': [metrics_nano.box.mr, metrics_small.box.mr]
}

df_comparison = pd.DataFrame(comparison_data)
print(df_comparison.to_string(index=False))

# Determine best model
best_model_name = 'YOLOv8n' if metrics_nano.box.map50 >= metrics_small.box.map50 else 'YOLOv8s'
print(f"\nâœ“ Best model by mAP@0.5: {best_model_name}")


## 5. Counting Accuracy Evaluation

The business metric is counting accuracy, not just detection metrics.


In [None]:
def evaluate_counting_accuracy(model, data_path, split='test', conf_threshold=0.4):
    """
    Evaluate counting accuracy on a dataset split.
    Returns counting metrics rather than detection metrics.
    """
    from pathlib import Path
    
    # Get image and label paths
    data_root = Path(data_path).parent.parent
    
    # Collect test images from both datasets
    test_images = []
    test_labels = []
    
    for dataset_name in ['Boxes.v1i.yolov8', 'Final_Object_Detection.v1i.yolov8']:
        dataset_path = data_root / dataset_name
        images_dir = dataset_path / split / 'images'
        labels_dir = dataset_path / split / 'labels'
        
        if images_dir.exists():
            for img_path in images_dir.glob('*.jpg'):
                label_path = labels_dir / f"{img_path.stem}.txt"
                if label_path.exists():
                    test_images.append(img_path)
                    test_labels.append(label_path)
    
    results = []
    for img_path, label_path in zip(test_images, test_labels):
        # Get ground truth count
        with open(label_path, 'r') as f:
            true_count = len([l for l in f.readlines() if l.strip()])
        
        # Get predicted count
        pred = model.predict(str(img_path), conf=conf_threshold, verbose=False)
        pred_count = len(pred[0].boxes) if pred[0].boxes is not None else 0
        
        results.append({
            'image': img_path.name,
            'true_count': true_count,
            'pred_count': pred_count,
            'error': pred_count - true_count,
            'abs_error': abs(pred_count - true_count)
        })
    
    df_results = pd.DataFrame(results)
    
    # Calculate metrics
    exact_match = (df_results['true_count'] == df_results['pred_count']).mean()
    off_by_1 = (df_results['abs_error'] <= 1).mean()
    off_by_2 = (df_results['abs_error'] <= 2).mean()
    mae = df_results['abs_error'].mean()
    
    return {
        'count_accuracy': exact_match,
        'off_by_1': off_by_1,
        'off_by_2': off_by_2,
        'mae': mae,
        'results_df': df_results
    }

print("âœ“ Counting accuracy function defined")


In [None]:
# Evaluate counting accuracy for both models on test set
print("="*60)
print("COUNTING ACCURACY EVALUATION (Test Set)")
print("="*60)

# YOLOv8n counting accuracy
print("\nEvaluating YOLOv8n counting accuracy...")
count_metrics_nano = evaluate_counting_accuracy(model_nano_eval, str(DATA_CONFIG), split='test', conf_threshold=0.4)

# YOLOv8s counting accuracy  
print("Evaluating YOLOv8s counting accuracy...")
count_metrics_small = evaluate_counting_accuracy(model_small_eval, str(DATA_CONFIG), split='test', conf_threshold=0.4)

# Display results
print("\n" + "="*60)
print("COUNTING METRICS COMPARISON")
print("="*60)

count_comparison = {
    'Model': ['YOLOv8n', 'YOLOv8s'],
    'Count Accuracy': [f"{count_metrics_nano['count_accuracy']*100:.1f}%", f"{count_metrics_small['count_accuracy']*100:.1f}%"],
    'Off-by-1 Accuracy': [f"{count_metrics_nano['off_by_1']*100:.1f}%", f"{count_metrics_small['off_by_1']*100:.1f}%"],
    'Off-by-2 Accuracy': [f"{count_metrics_nano['off_by_2']*100:.1f}%", f"{count_metrics_small['off_by_2']*100:.1f}%"],
    'Count MAE': [f"{count_metrics_nano['mae']:.2f}", f"{count_metrics_small['mae']:.2f}"]
}

df_count_comparison = pd.DataFrame(count_comparison)
print(df_count_comparison.to_string(index=False))


## 6. Confidence Threshold Optimization


In [None]:
# Find optimal confidence threshold using the better model
# Use YOLOv8s for threshold optimization
best_model = model_small_eval

print("="*60)
print("CONFIDENCE THRESHOLD OPTIMIZATION")
print("="*60)

thresholds = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
threshold_results = []

for thresh in thresholds:
    print(f"Testing threshold: {thresh}...")
    metrics = evaluate_counting_accuracy(best_model, str(DATA_CONFIG), split='valid', conf_threshold=thresh)
    threshold_results.append({
        'threshold': thresh,
        'count_accuracy': metrics['count_accuracy'],
        'off_by_1': metrics['off_by_1'],
        'mae': metrics['mae']
    })

df_thresholds = pd.DataFrame(threshold_results)
print("\nThreshold Analysis:")
print(df_thresholds.to_string(index=False))

# Find best threshold
best_idx = df_thresholds['count_accuracy'].idxmax()
best_threshold = df_thresholds.loc[best_idx, 'threshold']
print(f"\nâœ“ Optimal confidence threshold: {best_threshold}")


In [None]:
# Plot threshold analysis
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Count accuracy vs threshold
ax1 = axes[0]
ax1.plot(df_thresholds['threshold'], df_thresholds['count_accuracy'] * 100, 'b-o', linewidth=2, markersize=8)
ax1.axvline(best_threshold, color='red', linestyle='--', label=f'Best: {best_threshold}')
ax1.set_xlabel('Confidence Threshold', fontsize=12)
ax1.set_ylabel('Count Accuracy (%)', fontsize=12)
ax1.set_title('Count Accuracy vs Confidence Threshold', fontsize=14, fontweight='bold')
ax1.legend()
ax1.grid(True, alpha=0.3)

# Off-by-1 accuracy vs threshold
ax2 = axes[1]
ax2.plot(df_thresholds['threshold'], df_thresholds['off_by_1'] * 100, 'g-o', linewidth=2, markersize=8)
ax2.axvline(best_threshold, color='red', linestyle='--', label=f'Best: {best_threshold}')
ax2.set_xlabel('Confidence Threshold', fontsize=12)
ax2.set_ylabel('Off-by-1 Accuracy (%)', fontsize=12)
ax2.set_title('Off-by-1 Accuracy vs Confidence Threshold', fontsize=14, fontweight='bold')
ax2.legend()
ax2.grid(True, alpha=0.3)

# MAE vs threshold
ax3 = axes[2]
ax3.plot(df_thresholds['threshold'], df_thresholds['mae'], 'r-o', linewidth=2, markersize=8)
ax3.axvline(best_threshold, color='blue', linestyle='--', label=f'Best: {best_threshold}')
ax3.set_xlabel('Confidence Threshold', fontsize=12)
ax3.set_ylabel('Count MAE', fontsize=12)
ax3.set_title('MAE vs Confidence Threshold', fontsize=14, fontweight='bold')
ax3.legend()
ax3.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('../docs/threshold_analysis.png', dpi=150, bbox_inches='tight')
plt.show()

print("âœ“ Figure saved to docs/threshold_analysis.png")


## 7. Sample Predictions


In [None]:
# Get sample test images
test_images_dir = PROJECT_ROOT / 'Boxes.v1i.yolov8' / 'test' / 'images'
sample_images = list(test_images_dir.glob('*.jpg'))[:6]

# Run predictions
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

for ax, img_path in zip(axes.flatten(), sample_images):
    # Run prediction
    results = best_model.predict(str(img_path), conf=best_threshold, verbose=False)
    
    # Get annotated image
    annotated_img = results[0].plot()
    annotated_img = annotated_img[:, :, ::-1]  # BGR to RGB
    
    # Get count
    pred_count = len(results[0].boxes) if results[0].boxes is not None else 0
    
    ax.imshow(annotated_img)
    ax.set_title(f'Predicted: {pred_count} boxes', fontsize=12, fontweight='bold')
    ax.axis('off')

plt.suptitle('Sample Predictions with Best Model', fontsize=16, fontweight='bold', y=1.02)
plt.tight_layout()
plt.savefig('../docs/sample_predictions.png', dpi=150, bbox_inches='tight')
plt.show()

print("âœ“ Figure saved to docs/sample_predictions.png")


## 8. Training Summary


In [None]:
# Save training summary
training_summary = {
    'timestamp': datetime.now().isoformat(),
    'models_trained': ['yolov8n', 'yolov8s'],
    'epochs': EPOCHS,
    'batch_size': BATCH_SIZE,
    'image_size': IMG_SIZE,
    'device': device,
    'yolov8n_metrics': {
        'mAP50': float(metrics_nano.box.map50),
        'mAP50_95': float(metrics_nano.box.map),
        'precision': float(metrics_nano.box.mp),
        'recall': float(metrics_nano.box.mr),
        'count_accuracy': count_metrics_nano['count_accuracy'],
        'count_mae': count_metrics_nano['mae']
    },
    'yolov8s_metrics': {
        'mAP50': float(metrics_small.box.map50),
        'mAP50_95': float(metrics_small.box.map),
        'precision': float(metrics_small.box.mp),
        'recall': float(metrics_small.box.mr),
        'count_accuracy': count_metrics_small['count_accuracy'],
        'count_mae': count_metrics_small['mae']
    },
    'best_model': 'yolov8s' if count_metrics_small['count_accuracy'] >= count_metrics_nano['count_accuracy'] else 'yolov8n',
    'optimal_confidence_threshold': float(best_threshold)
}

with open('../docs/training_summary.json', 'w') as f:
    json.dump(training_summary, f, indent=2)

print("="*70)
print("                    TRAINING SUMMARY")
print("="*70)
print(f"""
ðŸ“Š MODELS TRAINED
{'â”€'*50}
â€¢ YOLOv8n (Nano) - Baseline
â€¢ YOLOv8s (Small) - Improved

ðŸ“ˆ BEST MODEL: {training_summary['best_model'].upper()}
{'â”€'*50}
â€¢ mAP@0.5: {training_summary[f"{training_summary['best_model']}_metrics"]['mAP50']:.4f}
â€¢ Count Accuracy: {training_summary[f"{training_summary['best_model']}_metrics"]['count_accuracy']*100:.1f}%
â€¢ Count MAE: {training_summary[f"{training_summary['best_model']}_metrics"]['count_mae']:.2f}
â€¢ Optimal Threshold: {best_threshold}

ðŸ’¾ SAVED MODELS
{'â”€'*50}
â€¢ models/yolov8n_boxes_best.pt
â€¢ models/yolov8s_boxes_best.pt

âœ“ Training summary saved to docs/training_summary.json
""")

print("="*70)
print("                    YOLOv8 TRAINING COMPLETE âœ“")
print("="*70)
