In [1]:
# ============================================================================
# CELL 1: SETUP & INSTALLATION
# ============================================================================

import os
from google.colab import drive
import torch

print("="*70)
print("YOLO11-POSE HAND KEYPOINTS TRAINING - SETUP")
print("="*70)

# Mount Google Drive
print("\n[1/4] Mounting Google Drive...")
drive.mount('/content/drive')

# Create output directory
output_dir = '/content/drive/MyDrive/Intro To AI/yolo_hand_pose'
os.makedirs(output_dir, exist_ok=True)
print(f"‚úì Output directory: {output_dir}")

# Install Ultralytics
print("\n[2/4] Installing Ultralytics YOLO...")
!pip install -q ultralytics
print("‚úì Ultralytics installed")

# Verify GPU
print("\n[3/4] Checking GPU...")
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    print(f"‚úì GPU Available: {gpu_name}")
    print(f"‚úì CUDA Version: {torch.version.cuda}")
    print(f"‚úì GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
else:
    print("‚ö† WARNING: No GPU! Go to Runtime ‚Üí Change runtime type ‚Üí T4 GPU")
    raise RuntimeError("GPU required for training")

# Import YOLO
print("\n[4/4] Importing YOLO...")
from ultralytics import YOLO
print("‚úì YOLO imported successfully")

print("\n" + "="*70)
print("‚úÖ SETUP COMPLETE! Run Cell 2 for test training.")
print("="*70)

YOLO11-POSE HAND KEYPOINTS TRAINING - SETUP

[1/4] Mounting Google Drive...
Mounted at /content/drive
‚úì Output directory: /content/drive/MyDrive/Intro To AI/yolo_hand_pose

[2/4] Installing Ultralytics YOLO...
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m1.1/1.1 MB[0m [31m69.2 MB/s[0m eta [36m0:00:00[0m
[?25h‚úì Ultralytics installed

[3/4] Checking GPU...
‚úì GPU Available: Tesla T4
‚úì CUDA Version: 12.6
‚úì GPU Memory: 15.83 GB

[4/4] Importing YOLO...
Creating new Ultralytics Settings v0.0.6 file ‚úÖ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
‚úì YOLO imported successfully

‚úÖ SETUP COMPLETE! Run Cell 2 for test training.


In [None]:
# ============================================================================
# CELL 2: TEST RUN - 2 EPOCHS
# ============================================================================

from ultralytics import YOLO
import os

output_dir = '/content/drive/MyDrive/Intro To AI/yolo_hand_pose'

print("="*70)
print("TEST RUN: 2 EPOCHS")
print("This verifies everything works before full training")
print("="*70)

# Load pretrained model
print("\nLoading yolo11n-pose.pt...")
model = YOLO('yolo11n-pose.pt')

print("\nüß™ Starting test training...")
print("Dataset will auto-download (369 MB) on first run")
print("Expected time: 5-10 minutes")
print("-"*70)

# Test training
test_results = model.train(
    data='hand-keypoints.yaml',      # Auto-downloads dataset
    epochs=2,                         # Just 2 epochs
    imgsz=640,
    batch=16,
    device=0,
    project=output_dir,
    name='test_run',
    exist_ok=True,
    cache=True,                       # Cache for speed
    amp=True,                         # Mixed precision
    workers=2,
    verbose=True,
)

print("\n" + "="*70)
print("‚úÖ TEST RUN SUCCESSFUL!")
print("="*70)
print(f"Test model saved: {output_dir}/test_run/weights/best.pt")
print("\nEverything works! Now run Cell 3 for full training (50 epochs).")
print("="*70)

TEST RUN: 2 EPOCHS
This verifies everything works before full training

Loading yolo11n-pose.pt...
[KDownloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-pose.pt to 'yolo11n-pose.pt': 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 6.0MB 73.7MB/s 0.1s

üß™ Starting test training...
Dataset will auto-download (369 MB) on first run
Expected time: 5-10 minutes
----------------------------------------------------------------------
Ultralytics 8.3.226 üöÄ Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=hand-keypoints.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=2, erasing=0.4, exist_ok=True, fliplr=0.5

In [None]:
# ============================================================================
# CELL 3: FULL TRAINING - 50 EPOCHS (~90-120 minutes)
# ============================================================================

from ultralytics import YOLO
import os
from datetime import datetime

output_dir = '/content/drive/MyDrive/Intro To AI/yolo_hand_pose'

print("="*70)
print("FULL TRAINING: 50 EPOCHS")
print("="*70)
print(f"Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("Estimated duration: 90-120 minutes on T4 GPU")
print("="*70)

# Load fresh pretrained model
print("\nLoading yolo11n-pose.pt...")
model = YOLO('yolo11n-pose.pt')

print("\nüöÄ Starting full training...")
print("-"*70)

# Full training
results = model.train(
    data='hand-keypoints.yaml',
    epochs=30,
    imgsz=640,
    batch=16,
    device=0,
    project=output_dir,
    name='hand_pose_full',
    exist_ok=True,
    cache=True,
    amp=True,
    workers=4,
    patience=10,
    save_period=2,
    verbose=True,
    plots=True,
)

# ============================================================================
# EXPORT MODELS FOR MACBOOK DEPLOYMENT
# ============================================================================
print("\n" + "="*70)
print("EXPORTING MODELS FOR DEPLOYMENT")
print("="*70)

best_model_path = f"{output_dir}/hand_pose_full/weights/best.pt"
model = YOLO(best_model_path)

# Export 1: CoreML (optimized for MacBook/Apple Silicon)
print("\n[1/2] Exporting to CoreML format (for MacBook)...")
try:
    coreml_path = model.export(format='coreml')
    print(f"‚úÖ CoreML model: {coreml_path}")
    print("    ‚Üí Best for MacBook M1/M2/M3 (uses Neural Engine)")
except Exception as e:
    print(f"‚ö†Ô∏è  CoreML export failed: {e}")

# Export 2: ONNX (cross-platform)
print("\n[2/2] Exporting to ONNX format (cross-platform)...")
try:
    onnx_path = model.export(format='onnx')
    print(f"‚úÖ ONNX model: {onnx_path}")
    print("    ‚Üí Works on Mac/Windows/Linux")
except Exception as e:
    print(f"‚ö†Ô∏è  ONNX export failed: {e}")

# ============================================================================
# TRAINING COMPLETE
# ============================================================================
print("\n" + "="*70)
print("TRAINING & EXPORT COMPLETE!")
print("="*70)

# Display metrics
if hasattr(results, 'results_dict'):
    print("\nFINAL METRICS:")
    for key, value in results.results_dict.items():
        if 'mAP' in key or 'precision' in key or 'recall' in key:
            print(f"  {key}: {value:.4f}")

# File summary
print("\n" + "="*70)
print("FILES IN GOOGLE DRIVE")
print("="*70)
print(f"üìÅ {output_dir}/hand_pose_full/")
print(f"   ‚îú‚îÄ‚îÄ weights/")
print(f"   ‚îÇ   ‚îú‚îÄ‚îÄ best.pt           ‚¨Ö PyTorch model")
print(f"   ‚îÇ   ‚îú‚îÄ‚îÄ best.mlpackage/   ‚¨Ö CoreML for MacBook ‚≠ê")
print(f"   ‚îÇ   ‚îú‚îÄ‚îÄ best.onnx         ‚¨Ö ONNX cross-platform")
print(f"   ‚îÇ   ‚îî‚îÄ‚îÄ last.pt           (for resuming)")
print(f"   ‚îú‚îÄ‚îÄ results.png")
print(f"   ‚îî‚îÄ‚îÄ confusion_matrix.png")

print("\nüí° FOR MACBOOK: Download 'best.mlpackage' folder")
print("   (CoreML is optimized for Apple Silicon)")
print("\n‚úÖ Go to Google Drive and download your models!")
print(f"End time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("="*70)

In [2]:
# ============================================================================
# CELL 4: RESUME TRAINING FROM CHECKPOINT
# ============================================================================

from google.colab import drive
from ultralytics import YOLO
import torch

print("="*70)
print("RESUMING TRAINING FROM CHECKPOINT")
print("="*70)

# Mount Drive
print("\n[1/3] Mounting Google Drive...")
drive.mount('/content/drive')

# Verify GPU
print("\n[2/3] Checking GPU...")
if torch.cuda.is_available():
    print(f"‚úì GPU: {torch.cuda.get_device_name(0)}")
else:
    raise RuntimeError("GPU required!")

# Resume training
print("\n[3/3] Resuming from last checkpoint...")
output_dir = '/content/drive/MyDrive/Intro To AI/yolo_hand_pose'
checkpoint_path = f'{output_dir}/hand_pose_full/weights/last.pt'

print(f"Loading: {checkpoint_path}")

# Load checkpoint and resume
model = YOLO(checkpoint_path)
results = model.train(resume=True)  # Automatically continues from where it stopped

print("\n‚úÖ Training resumed and completed!")


RESUMING TRAINING FROM CHECKPOINT

[1/3] Mounting Google Drive...
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

[2/3] Checking GPU...
‚úì GPU: Tesla T4

[3/3] Resuming from last checkpoint...
Loading: /content/drive/MyDrive/Intro To AI/yolo_hand_pose/hand_pose_full/weights/last.pt
Ultralytics 8.3.226 üöÄ Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/usr/local/lib/python3.12/dist-packages/ultralytics/cfg/datasets/hand-keypoints.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torc

RuntimeError: Checkpoint /content/drive/MyDrive/Intro To AI/yolo_hand_pose/hand_pose_full/weights/last.pt is corrupted with NaN/Inf weights

In [3]:
!ls -lh /content/drive/MyDrive/Intro\ To\ AI/yolo_hand_pose/hand_pose_full/weights/

total 228M
-rw------- 1 root root 18M Nov  8 20:34 best.pt
-rw------- 1 root root 18M Nov  7 20:24 epoch0.pt
-rw------- 1 root root 18M Nov  8 17:36 epoch10.pt
-rw------- 1 root root 18M Nov  8 17:50 epoch12.pt
-rw------- 1 root root 18M Nov  8 18:03 epoch14.pt
-rw------- 1 root root 18M Nov  8 18:16 epoch16.pt
-rw------- 1 root root 18M Nov  8 18:30 epoch18.pt
-rw------- 1 root root 18M Nov  8 20:34 epoch20.pt
-rw------- 1 root root 18M Nov  7 20:41 epoch2.pt
-rw------- 1 root root 18M Nov  8 16:34 epoch4.pt
-rw------- 1 root root 18M Nov  8 16:59 epoch6.pt
-rw------- 1 root root 18M Nov  8 17:23 epoch8.pt
-rw------- 1 root root 18M Nov  8 20:34 last.pt


In [None]:
# ============================================================================
# RESUME FROM BEST.PT (SAFE CHECKPOINT BEFORE NaN)
# ============================================================================

from google.colab import drive
from ultralytics import YOLO
import torch

print("="*70)
print("RESUMING FROM EPOCH 20 (best.pt)")
print("="*70)

# Mount Drive
drive.mount('/content/drive')

# Verify GPU
if torch.cuda.is_available():
    print(f"‚úì GPU: {torch.cuda.get_device_name(0)}")
else:
    raise RuntimeError("GPU required!")

# Load the best checkpoint (from epoch 20, before corruption)
output_dir = '/content/drive/MyDrive/Intro To AI/yolo_hand_pose'
checkpoint_path = f'{output_dir}/hand_pose_full/weights/best.pt'

print(f"\nLoading: {checkpoint_path}")
print("This was your best model before the NaN error at epoch 21")

model = YOLO(checkpoint_path)

print("\nüöÄ Resuming training with NaN prevention settings...")
print("Changes from original:")
print("  ‚Ä¢ Lower learning rate (0.0005 instead of 0.002)")
print("  ‚Ä¢ Mixed precision disabled (more stable)")
print("  ‚Ä¢ Training to epoch 30")
print("-"*70)

# Resume training with stable settings
results = model.train(
    data='hand-keypoints.yaml',
    epochs=30,                    # Continue to epoch 30
    imgsz=640,
    batch=16,
    device=0,
    project=output_dir,
    name='hand_pose_full',
    exist_ok=True,

    # NaN Prevention Settings
    lr0=0.0005,                   # Lower learning rate (was 0.002)
    lrf=0.01,                     # Lower final learning rate
    warmup_epochs=2,              # Gradual warmup
    amp=False,                    # Disable mixed precision for stability

    # Regular settings
    cache=True,
    workers=2,
    patience=15,                  # Increased patience
    save_period=2,                # Keep saving every 2 epochs
    verbose=True,
    plots=True,
)

# Export models after completion
print("\n" + "="*70)
print("EXPORTING MODELS")
print("="*70)

best_model = YOLO(f'{output_dir}/hand_pose_full/weights/best.pt')

# Export CoreML for MacBook
print("\n[1/2] Exporting to CoreML...")
try:
    coreml_path = best_model.export(format='coreml')
    print(f"‚úÖ CoreML: {coreml_path}")
except Exception as e:
    print(f"‚ö†Ô∏è  CoreML failed: {e}")

# Export ONNX
print("\n[2/2] Exporting to ONNX...")
try:
    onnx_path = best_model.export(format='onnx')
    print(f"‚úÖ ONNX: {onnx_path}")
except Exception as e:
    print(f"‚ö†Ô∏è  ONNX failed: {e}")

print("\n" + "="*70)
print("‚úÖ TRAINING COMPLETE!")
print("="*70)
print(f"Models saved in: {output_dir}/hand_pose_full/weights/")
print("  ‚Ä¢ best.pt (PyTorch)")
print("  ‚Ä¢ best.mlpackage/ (CoreML for MacBook)")
print("  ‚Ä¢ best.onnx (cross-platform)")
print("="*70)

RESUMING FROM EPOCH 20 (best.pt)
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
‚úì GPU: Tesla T4

Loading: /content/drive/MyDrive/Intro To AI/yolo_hand_pose/hand_pose_full/weights/best.pt
This was your best model before the NaN error at epoch 21

üöÄ Resuming training with NaN prevention settings...
Changes from original:
  ‚Ä¢ Lower learning rate (0.0005 instead of 0.002)
  ‚Ä¢ Mixed precision disabled (more stable)
  ‚Ä¢ Training to epoch 30
----------------------------------------------------------------------
Ultralytics 8.3.226 üöÄ Python-3.12.12 torch-2.8.0+cu126 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=False, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=True, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=hand-keypoints.yaml, degr