# ü¶å Deer Detection Model Training

This notebook automatically trains and fine-tunes a YOLOv8 model for deer detection using labeled data from your production system.

## Workflow:
1. Mount Google Drive
2. Load latest training dataset (COCO format)
3. Convert to YOLOv8 format
4. Fine-tune YOLOv8 model
5. Validate performance
6. Save trained model back to Drive

**Prerequisites:**
- Labeled detections exported from dashboard
- Dataset synced to Google Drive (`training_data/` folder)
- GPU runtime enabled (Runtime ‚Üí Change runtime type ‚Üí T4 GPU)

## üì¶ Install Dependencies

In [None]:
!pip install ultralytics==8.0.196 -q
!pip install roboflow -q

import os
import json
import shutil
from pathlib import Path
from datetime import datetime
import yaml

print("‚úì Dependencies installed")

## üîó Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Set paths
DRIVE_ROOT = '/content/drive/MyDrive/Deer video detection'
TRAINING_DATA_DIR = f'{DRIVE_ROOT}/training_data'
MODELS_DIR = f'{DRIVE_ROOT}/trained_models'

# Create directories if they don't exist
os.makedirs(MODELS_DIR, exist_ok=True)

print(f"‚úì Drive mounted at: {DRIVE_ROOT}")
print(f"‚úì Training data: {TRAINING_DATA_DIR}")
print(f"‚úì Models output: {MODELS_DIR}")

## üìÇ Find Latest Training Dataset

In [None]:
# Find all training dataset versions
versions = []
if os.path.exists(TRAINING_DATA_DIR):
    for item in os.listdir(TRAINING_DATA_DIR):
        item_path = os.path.join(TRAINING_DATA_DIR, item)
        if os.path.isdir(item_path) and item.startswith('production_'):
            versions.append(item)

if not versions:
    raise Exception("‚ùå No training datasets found in Google Drive. Please export and sync data first.")

# Sort by timestamp (newest first)
versions.sort(reverse=True)
latest_version = versions[0]
dataset_path = os.path.join(TRAINING_DATA_DIR, latest_version)

print(f"‚úì Found {len(versions)} dataset version(s)")
print(f"‚úì Latest version: {latest_version}")
print(f"‚úì Dataset path: {dataset_path}")

# List contents
print("\nüìÅ Dataset contents:")
for item in os.listdir(dataset_path):
    print(f"  - {item}")

## üîÑ Convert COCO to YOLOv8 Format

In [None]:
# Load COCO annotations
annotations_file = None
for file in os.listdir(dataset_path):
    if file.startswith('annotations_') and file.endswith('.json'):
        annotations_file = os.path.join(dataset_path, file)
        break

if not annotations_file:
    raise Exception("‚ùå No annotations file found in dataset")

with open(annotations_file, 'r') as f:
    coco_data = json.load(f)

print(f"‚úì Loaded annotations: {annotations_file}")
print(f"  - Images: {len(coco_data['images'])}")
print(f"  - Annotations: {len(coco_data['annotations'])}")
print(f"  - Categories: {len(coco_data['categories'])}")

# Create YOLOv8 dataset structure
yolo_dataset_path = '/content/deer_dataset'
os.makedirs(f'{yolo_dataset_path}/images/train', exist_ok=True)
os.makedirs(f'{yolo_dataset_path}/labels/train', exist_ok=True)

# Copy images
images_dir = os.path.join(dataset_path, 'images')
if os.path.exists(images_dir):
    for img_file in os.listdir(images_dir):
        src = os.path.join(images_dir, img_file)
        dst = os.path.join(yolo_dataset_path, 'images', 'train', img_file)
        shutil.copy2(src, dst)
    print(f"‚úì Copied {len(os.listdir(images_dir))} images")
else:
    print("‚ö†Ô∏è No images directory found")

# Convert annotations to YOLO format
# YOLO format: <class> <x_center> <y_center> <width> <height> (normalized)
image_id_to_filename = {img['id']: img['file_name'] for img in coco_data['images']}
image_id_to_size = {img['id']: (img['width'], img['height']) for img in coco_data['images']}

annotations_by_image = {}
for ann in coco_data['annotations']:
    img_id = ann['image_id']
    if img_id not in annotations_by_image:
        annotations_by_image[img_id] = []
    annotations_by_image[img_id].append(ann)

for img_id, annotations in annotations_by_image.items():
    filename = image_id_to_filename[img_id]
    img_width, img_height = image_id_to_size[img_id]
    
    # Create label file
    label_filename = filename.replace('.jpg', '.txt').replace('.png', '.txt')
    label_path = os.path.join(yolo_dataset_path, 'labels', 'train', label_filename)
    
    with open(label_path, 'w') as f:
        for ann in annotations:
            # COCO bbox format: [x, y, width, height]
            x, y, w, h = ann['bbox']
            
            # Convert to YOLO format (normalized center coordinates)
            x_center = (x + w / 2) / img_width
            y_center = (y + h / 2) / img_height
            width = w / img_width
            height = h / img_height
            
            # Category ID (assuming deer is category 1, making it class 0 in YOLO)
            class_id = ann['category_id'] - 1
            
            f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")

print(f"‚úì Converted {len(annotations_by_image)} label files to YOLO format")
print(f"‚úì YOLOv8 dataset ready at: {yolo_dataset_path}")

## üìù Create Dataset Configuration

In [None]:
# Create data.yaml for YOLOv8
data_yaml = {
    'path': yolo_dataset_path,
    'train': 'images/train',
    'val': 'images/train',  # Using same data for val (we'll use holdout validation)
    'nc': len(coco_data['categories']),  # number of classes
    'names': [cat['name'] for cat in sorted(coco_data['categories'], key=lambda x: x['id'])]
}

data_yaml_path = f'{yolo_dataset_path}/data.yaml'
with open(data_yaml_path, 'w') as f:
    yaml.dump(data_yaml, f)

print("‚úì Created data.yaml configuration:")
print(yaml.dump(data_yaml, default_flow_style=False))

## üöÄ Train YOLOv8 Model

In [None]:
from ultralytics import YOLO

# Load pretrained YOLOv8 model (or your existing model if available)
# Options: yolov8n.pt (nano), yolov8s.pt (small), yolov8m.pt (medium)
model = YOLO('yolov8n.pt')  # Start with nano for faster training

print("‚úì Loaded YOLOv8n pretrained model")
print("\nüèãÔ∏è Starting training...\n")

# Train the model
results = model.train(
    data=data_yaml_path,
    epochs=50,              # Adjust based on dataset size
    imgsz=640,              # Image size
    batch=16,               # Batch size (adjust based on GPU memory)
    patience=10,            # Early stopping patience
    save=True,              # Save checkpoints
    project='/content/runs', # Output directory
    name='deer_detection',   # Run name
    pretrained=True,         # Use pretrained weights
    optimizer='AdamW',       # Optimizer
    lr0=0.001,              # Initial learning rate
    augment=True,           # Use data augmentation
    mosaic=1.0,             # Mosaic augmentation
    verbose=True            # Verbose output
)

print("\n‚úì Training complete!")

## üìä Evaluate Model Performance

In [None]:
# Validate the trained model
metrics = model.val()

print("\nüìä Model Performance Metrics:")
print(f"  - mAP50: {metrics.box.map50:.4f}")
print(f"  - mAP50-95: {metrics.box.map:.4f}")
print(f"  - Precision: {metrics.box.mp:.4f}")
print(f"  - Recall: {metrics.box.mr:.4f}")

# Display training plots
from IPython.display import Image, display

print("\nüìà Training Results:")
results_path = '/content/runs/deer_detection/results.png'
if os.path.exists(results_path):
    display(Image(results_path))
else:
    print("‚ö†Ô∏è Results plot not found")

# Show confusion matrix
confusion_path = '/content/runs/deer_detection/confusion_matrix.png'
if os.path.exists(confusion_path):
    print("\nüéØ Confusion Matrix:")
    display(Image(confusion_path))
else:
    print("‚ö†Ô∏è Confusion matrix not found")

## üíæ Save Model to Google Drive

In [None]:
# Generate version timestamp
version_timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
model_version = f'deer_yolov8n_{version_timestamp}'

# Create version directory
model_output_dir = os.path.join(MODELS_DIR, model_version)
os.makedirs(model_output_dir, exist_ok=True)

# Copy trained model
trained_model_path = '/content/runs/deer_detection/weights/best.pt'
output_model_path = os.path.join(model_output_dir, 'best.pt')
shutil.copy2(trained_model_path, output_model_path)

print(f"‚úì Saved model: {output_model_path}")

# Save metadata
metadata = {
    'version': model_version,
    'timestamp': version_timestamp,
    'base_model': 'yolov8n.pt',
    'training_dataset': latest_version,
    'epochs': 50,
    'image_count': len(coco_data['images']),
    'annotation_count': len(coco_data['annotations']),
    'metrics': {
        'map50': float(metrics.box.map50),
        'map50_95': float(metrics.box.map),
        'precision': float(metrics.box.mp),
        'recall': float(metrics.box.mr)
    }
}

metadata_path = os.path.join(model_output_dir, 'metadata.json')
with open(metadata_path, 'w') as f:
    json.dump(metadata, f, indent=2)

print(f"‚úì Saved metadata: {metadata_path}")

# Copy training results
if os.path.exists(results_path):
    shutil.copy2(results_path, os.path.join(model_output_dir, 'results.png'))
    print("‚úì Saved training results plot")

if os.path.exists(confusion_path):
    shutil.copy2(confusion_path, os.path.join(model_output_dir, 'confusion_matrix.png'))
    print("‚úì Saved confusion matrix")

print(f"\n‚úÖ Model saved to Google Drive: {model_version}")
print(f"\nüìç Full path: {model_output_dir}")
print("\nüéâ Training pipeline complete! You can now deploy this model to your backend.")

## üîç Test Model (Optional)

In [None]:
# Test the model on a sample image
test_images_dir = f'{yolo_dataset_path}/images/train'
test_images = [f for f in os.listdir(test_images_dir) if f.endswith(('.jpg', '.png'))][:3]

if test_images:
    print("üîç Testing model on sample images:\n")
    
    for img_name in test_images:
        img_path = os.path.join(test_images_dir, img_name)
        
        # Run inference
        results = model(img_path)
        
        # Display results
        print(f"\nüì∑ {img_name}")
        for r in results:
            print(f"  - Detected {len(r.boxes)} object(s)")
            for box in r.boxes:
                conf = box.conf[0].item()
                cls = int(box.cls[0].item())
                print(f"    ‚Ä¢ Class {cls} (deer), Confidence: {conf:.2%}")
        
        # Show annotated image
        annotated = results[0].plot()
        from PIL import Image as PILImage
        import matplotlib.pyplot as plt
        
        plt.figure(figsize=(12, 8))
        plt.imshow(annotated)
        plt.axis('off')
        plt.title(f"Detection: {img_name}")
        plt.show()
else:
    print("‚ö†Ô∏è No test images found")

## üìã Summary

In [None]:
print("="*60)
print("üéâ TRAINING SUMMARY")
print("="*60)
print(f"\nüì¶ Dataset:")
print(f"  - Version: {latest_version}")
print(f"  - Images: {len(coco_data['images'])}")
print(f"  - Annotations: {len(coco_data['annotations'])}")
print(f"\nü§ñ Model:")
print(f"  - Architecture: YOLOv8n")
print(f"  - Version: {model_version}")
print(f"\nüìä Performance:")
print(f"  - mAP50: {metadata['metrics']['map50']:.4f}")
print(f"  - Precision: {metadata['metrics']['precision']:.4f}")
print(f"  - Recall: {metadata['metrics']['recall']:.4f}")
print(f"\nüíæ Saved to:")
print(f"  - {model_output_dir}")
print(f"\nüöÄ Next Steps:")
print(f"  1. Review training metrics above")
print(f"  2. Deploy model using: POST /api/training/deploy-latest")
print(f"  3. Monitor performance in production")
print(f"  4. Collect more data and retrain as needed")
print("\n" + "="*60)