In [3]:
# Cell 1: Environment Setup and Dependencies
import os
import sys
import platform
import time
from datetime import datetime
from pathlib import Path
import json
import yaml
import shutil
import random

# Deep learning
import torch
from ultralytics import YOLO

# Data analysis
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Progress tracking
from tqdm.notebook import tqdm

# Print Python and environment information
print(f"Python version: {platform.python_version()}")
print(f"Platform: {platform.platform()}")

# Check for CUDA
try:
    import torch
    print(f"PyTorch version: {torch.__version__}")
    print(f"CUDA available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"CUDA version: {torch.version.cuda}")
        print(f"GPU device: {torch.cuda.get_device_name(0)}")
        print(f"Number of GPUs: {torch.cuda.device_count()}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.2f} GB")
    else:
        print("CUDA is not available - training will use CPU")
except ImportError:
    print("PyTorch is not installed - you'll need to install it with pip install torch torchvision")

# Check for other required libraries
required_packages = ['numpy', 'matplotlib', 'pandas', 'ultralytics']
for package in required_packages:
    try:
        module = __import__(package.replace('-', '_'))
        print(f"✅ {package} is installed (version: {module.__version__})")
    except ImportError:
        print(f"❌ {package} is NOT installed - use pip install {package}")
    except AttributeError:
        print(f"✅ {package} is installed (version unknown)")

# Manually set the project root path to ensure accuracy
project_root = "/home/peter/Desktop/TU PHD/WildlifeDetectionSystem"
print(f"\nProject root path: {project_root}")

# Output the current working directory for reference
print(f"Current working directory: {os.getcwd()}")

# Create tracking directory
tracking_dir = os.path.join(project_root, "tracking")
os.makedirs(tracking_dir, exist_ok=True)

print("\nEnvironment setup check complete!")

Python version: 3.12.3
Platform: Linux-6.8.0-58-generic-x86_64-with-glibc2.39
PyTorch version: 2.6.0+cu124
CUDA available: True
CUDA version: 12.4
GPU device: NVIDIA GeForce RTX 4050 Laptop GPU
Number of GPUs: 1
GPU Memory: 5.76 GB
✅ numpy is installed (version: 2.1.1)
✅ matplotlib is installed (version: 3.10.1)
✅ pandas is installed (version: 2.2.3)
✅ ultralytics is installed (version: 8.3.106)

Project root path: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem
Current working directory: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/notebooks/training/Planned_Notebooks_v2

Environment setup check complete!


In [4]:
# Cell 2: Load Configuration from Notebook 1
# Find and load the configuration generated by the data preparation notebook
import os
import json
import yaml
from datetime import datetime

# Manually set the project root path to ensure accuracy - this makes the cell self-contained
project_root = "/home/peter/Desktop/TU PHD/WildlifeDetectionSystem"
print(f"Project root path: {project_root}")

def find_latest_config(config_dir, prefix="notebook_data_"):
    """Find the latest configuration file based on timestamp in filename"""
    config_files = [f for f in os.listdir(config_dir) if f.startswith(prefix) and f.endswith('.json')]
    if not config_files:
        return None
    
    # Sort by timestamp (assuming format notebook_data_YYYYMMDD_HHMM.json)
    latest_config = sorted(config_files, reverse=True)[0]
    return os.path.join(config_dir, latest_config)

# Define paths
config_dir = os.path.join(project_root, "config")
if not os.path.exists(config_dir):
    print(f"❌ Config directory not found: {config_dir}")
    print("Please run notebook 1 (data preparation) first")
else:
    # Try to find the latest config file
    latest_config = find_latest_config(config_dir)
    
    if latest_config and os.path.exists(latest_config):
        print(f"Found configuration from notebook 1: {latest_config}")
        
        # Load configuration
        with open(latest_config, 'r') as f:
            notebook1_config = json.load(f)
        
        # Extract key paths and parameters
        timestamp = notebook1_config["timestamp"]
        class_names = notebook1_config["class_names"]
        taxonomic_groups = notebook1_config["taxonomic_groups"]
        standard_export_path = notebook1_config["standard_export_path"]
        hierarchical_export_path = notebook1_config["hierarchical_export_path"]
        
        print(f"\nLoaded configuration with timestamp: {timestamp}")
        print(f"Number of classes: {len(class_names)}")
        print(f"Number of taxonomic groups: {len(taxonomic_groups)}")
        print(f"Standard dataset: {standard_export_path}")
        print(f"Hierarchical dataset: {hierarchical_export_path}")
        
        # Check if the datasets exist
        if os.path.exists(standard_export_path):
            print(f"✅ Standard YOLO dataset exists")
            
            # Verify data.yaml
            data_yaml_path = os.path.join(standard_export_path, 'data.yaml')
            if os.path.exists(data_yaml_path):
                with open(data_yaml_path, 'r') as f:
                    data_yaml = yaml.safe_load(f)
                print(f"   Classes in data.yaml: {data_yaml.get('nc', 'unknown')}")
            else:
                print(f"❌ data.yaml not found in standard dataset")
        else:
            print(f"❌ Standard YOLO dataset not found: {standard_export_path}")
        
        if os.path.exists(hierarchical_export_path):
            print(f"✅ Hierarchical YOLO dataset exists")
            
            # Verify data.yaml
            hierarchical_yaml_path = os.path.join(hierarchical_export_path, 'data.yaml')
            if os.path.exists(hierarchical_yaml_path):
                with open(hierarchical_yaml_path, 'r') as f:
                    hierarchical_yaml = yaml.safe_load(f)
                print(f"   Groups in data.yaml: {hierarchical_yaml.get('nc', 'unknown')}")
            else:
                print(f"❌ data.yaml not found in hierarchical dataset")
        else:
            print(f"❌ Hierarchical YOLO dataset not found: {hierarchical_export_path}")
    else:
        print(f"❌ Configuration from notebook 1 not found in {config_dir}")
        print("Please run notebook 1 (data preparation) first")

# Define the output paths for this notebook
model_save_dir = os.path.join(project_root, "models", "trained")
reports_dir = os.path.join(project_root, "reports")
timestamp_now = datetime.now().strftime("%Y%m%d_%H%M")

# Generate paths for model outputs
standard_model_name = f"wildlife_detector_{timestamp_now}"
hierarchical_model_name = f"wildlife_detector_hierarchical_{timestamp_now}"

standard_model_path = os.path.join(model_save_dir, standard_model_name)
hierarchical_model_path = os.path.join(model_save_dir, hierarchical_model_name)

print(f"\nOutput paths for trained models:")
print(f"- Standard model: {standard_model_path}")
print(f"- Hierarchical model: {hierarchical_model_path}")

# Save the training configuration for reference and tracking
training_config = {
    "notebook": "02_model_training",
    "timestamp": timestamp_now,
    "input": {
        "config": latest_config,
        "standard_dataset": standard_export_path,
        "hierarchical_dataset": hierarchical_export_path,
        "class_names": class_names,
        "taxonomic_groups": taxonomic_groups
    },
    "output": {
        "standard_model": standard_model_path,
        "hierarchical_model": hierarchical_model_path,
        "reports_dir": reports_dir
    }
}

# Save configuration
training_config_path = os.path.join(config_dir, f"training_config_{timestamp_now}.json")
with open(training_config_path, 'w') as f:
    json.dump(training_config, f, indent=2)

print(f"\nTraining configuration saved to: {training_config_path}")

Project root path: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem
Found configuration from notebook 1: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/config/notebook_data_20250510_0038.json

Loaded configuration with timestamp: 20250510_0038
Number of classes: 30
Number of taxonomic groups: 5
Standard dataset: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/data/export/yolo_export_test_01_20250510_0038
Hierarchical dataset: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/data/export/yolo_hierarchical_test_01_20250510_0038
✅ Standard YOLO dataset exists
   Classes in data.yaml: 30
✅ Hierarchical YOLO dataset exists
   Groups in data.yaml: 5

Output paths for trained models:
- Standard model: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/models/trained/wildlife_detector_20250510_0114
- Hierarchical model: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/models/trained/wildlife_detector_hierarchical_20250510_0114

Training configuration saved to: /home/peter/Desktop/T

In [5]:
# Cell 3: Hardware-Aware Model Selection and Optimization
# Automatically selects the optimal model size based on available hardware

import torch 

def detect_hardware_capabilities():
    """Detect hardware capabilities and recommend model size"""
    # Check for CUDA availability
    cuda_available = torch.cuda.is_available()
    
    if not cuda_available:
        print("No CUDA detected. Using CPU for training.")
        return {
            "device": "cpu",
            "recommended_model": "n",  # nano model for CPU
            "batch_size": 1,
            "image_size": 320,
            "workers": 0
        }
    
    # Get GPU memory in GB
    try:
        gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
        print(f"GPU detected with {gpu_memory:.2f} GB memory")
        
        # Determine model size based on available GPU memory
        if gpu_memory >= 24:
            # High-end GPU (RTX 3090/4090, A100, etc.)
            return {
                "device": 0,
                "recommended_model": "x",  # largest model
                "batch_size": 16,
                "image_size": 640,
                "workers": 4
            }
        elif gpu_memory >= 16:
            # Good GPU (RTX 3080, RTX A6000, etc.)
            return {
                "device": 0,
                "recommended_model": "l",  # large model
                "batch_size": 12,
                "image_size": 640,
                "workers": 4
            }
        elif gpu_memory >= 8:
            # Mid-range GPU (RTX 3070, RTX 2080, etc.)
            return {
                "device": 0,
                "recommended_model": "m",  # medium model
                "batch_size": 8,
                "image_size": 512,
                "workers": 2
            }
        elif gpu_memory >= 4:
            # Entry-level GPU (GTX 1660, RTX 3050, etc.)
            return {
                "device": 0,
                "recommended_model": "s",  # small model
                "batch_size": 4,
                "image_size": 416,
                "workers": 2
            }
        else:
            # Low-end GPU or integrated (MX series, etc.)
            return {
                "device": 0,
                "recommended_model": "n",  # nano model
                "batch_size": 2,
                "image_size": 320,
                "workers": 1
            }
    except Exception as e:
        print(f"Error detecting GPU properties: {e}")
        print("Defaulting to safe configuration")
        return {
            "device": 0 if cuda_available else "cpu",
            "recommended_model": "n",  # nano model for safety
            "batch_size": 1,
            "image_size": 320,
            "workers": 0
        }

# Detect hardware capabilities
print("Detecting hardware capabilities...")
hw_config = detect_hardware_capabilities()

# Map model size to readable name and parameters
model_sizes = {
    "n": {"name": "YOLOv8n", "description": "Nano", "params": "3.2M"},
    "s": {"name": "YOLOv8s", "description": "Small", "params": "11.2M"},
    "m": {"name": "YOLOv8m", "description": "Medium", "params": "25.9M"},
    "l": {"name": "YOLOv8l", "description": "Large", "params": "43.7M"},
    "x": {"name": "YOLOv8x", "description": "Extra Large", "params": "68.2M"}
}

# Display recommended configuration
recommended_model_size = hw_config["recommended_model"]
recommended_model = model_sizes[recommended_model_size]

print("\nRecommended Configuration Based on Hardware:")
print(f"- Model: {recommended_model['name']} ({recommended_model['description']})")
print(f"- Parameters: {recommended_model['params']}")
print(f"- Device: {'GPU' if hw_config['device'] == 0 else 'CPU'}")
print(f"- Batch Size: {hw_config['batch_size']}")
print(f"- Image Size: {hw_config['image_size']}px")
print(f"- Workers: {hw_config['workers']}")

# Allow manual override
print("\nWould you like to override the recommended model size? (y/n)")
override = input().strip().lower()
if override == 'y':
    print("Select model size (n=nano, s=small, m=medium, l=large, x=extra-large):")
    model_input = input().strip().lower()
    if model_input in model_sizes:
        recommended_model_size = model_input
        recommended_model = model_sizes[recommended_model_size]
        print(f"Using {recommended_model['name']} ({recommended_model['description']}) with {recommended_model['params']} parameters.")
    else:
        print(f"Invalid selection. Using recommended {recommended_model['name']}.")

# Define model paths
base_model_path = f"yolov8{recommended_model_size}.pt"
print(f"\nBase model path: {base_model_path}")

# Save hardware configuration to the training config
training_config["hardware"] = hw_config
training_config["model"] = {
    "size": recommended_model_size,
    "name": recommended_model["name"],
    "description": recommended_model["description"],
    "parameters": recommended_model["params"],
    "base_model_path": base_model_path
}

# Update training config file
with open(training_config_path, 'w') as f:
    json.dump(training_config, f, indent=2)

print(f"Updated training configuration with hardware and model settings.")

Detecting hardware capabilities...
GPU detected with 5.76 GB memory

Recommended Configuration Based on Hardware:
- Model: YOLOv8s (Small)
- Parameters: 11.2M
- Device: GPU
- Batch Size: 4
- Image Size: 416px
- Workers: 2

Would you like to override the recommended model size? (y/n)



Base model path: yolov8s.pt
Updated training configuration with hardware and model settings.


In [6]:
# Cell 4: Training Configuration Setup
# Define training parameters for both standard and hierarchical models

# Memory optimization settings
memory_optimizations = {
    "cpu": {
        # CPU-specific optimizations
        "device": "cpu",
        "workers": 0,
        "batch": 1,
        "cache": "disk",
        "imgsz": min(hw_config["image_size"], 320),  # Changed from "image_size" to "imgsz"
        "amp": False  # No mixed precision on CPU
    },
    "gpu_low_memory": {
        # For GPUs with less than 4GB memory
        "device": 0,
        "workers": hw_config["workers"],
        "batch": max(1, hw_config["batch_size"] // 2),  # Reduce batch size
        "cache": "disk",
        "imgsz": min(hw_config["image_size"], 384),  # Changed from "image_size" to "imgsz"
        "amp": True  # Mixed precision
    },
    "gpu_standard": {
        # For standard GPUs with sufficient memory
        "device": 0,
        "workers": hw_config["workers"],
        "batch": hw_config["batch_size"],
        "cache": "ram",
        "imgsz": hw_config["image_size"],  # Changed from "image_size" to "imgsz"
        "amp": True  # Mixed precision
    }
}

# Select memory optimization profile based on hardware
if hw_config["device"] == "cpu":
    memory_profile = "cpu"
elif hw_config["device"] == 0 and torch.cuda.get_device_properties(0).total_memory / (1024**3) < 4:
    memory_profile = "gpu_low_memory"
else:
    memory_profile = "gpu_standard"

print(f"Selected memory optimization profile: {memory_profile}")
memory_config = memory_optimizations[memory_profile]

# Base hyperparameters common to both standard and hierarchical training
base_hyperparams = {
    # Standard YOLOv8 parameters
    'epochs': 100,                    # Maximum number of epochs
    'patience': 25,                   # Early stopping patience
    'optimizer': 'AdamW',             # Optimizer (AdamW better for imbalanced data)
    'lr0': 0.001,                     # Initial learning rate
    'lrf': 0.01,                      # Final learning rate as a fraction of lr0
    'momentum': 0.937,                # SGD momentum/Adam beta1
    'weight_decay': 0.0005,           # Regularization 
    'warmup_epochs': 5,               # Warmup epochs
    'warmup_momentum': 0.8,           # Initial warmup momentum
    'warmup_bias_lr': 0.1,            # Initial warmup learning rate for bias
    
    # Loss function weights
    'box': 7.5,                       # Box loss weight
    'cls': 3.0,                       # Class loss weight
    'dfl': 1.5,                       # DFL loss weight
    
    # Data augmentation
    'hsv_h': 0.015,                   # HSV Hue augmentation
    'hsv_s': 0.7,                     # HSV Saturation augmentation (higher for wildlife)
    'hsv_v': 0.4,                     # HSV Value augmentation (for varying lighting)
    'degrees': 10.0,                  # Rotation augmentation
    'translate': 0.2,                 # Translation augmentation
    'scale': 0.6,                     # Scale augmentation (stronger for wildlife)
    'fliplr': 0.5,                    # Horizontal flip probability
    'mosaic': 1.0,                    # Mosaic augmentation
    'mixup': 0.1,                     # Mixup augmentation
    'copy_paste': 0.1,                # Copy-paste augmentation (for rare classes)
    
    # Saving and checkpointing
    'save': True,                     # Save model
    'save_period': 10,                # Save checkpoints every X epochs
    
    # Nominal batch size for gradient accumulation 
    'nbs': 16                         # Nominal batch size
}

# Merge base hyperparameters with memory optimizations
standard_hyperparams = {**base_hyperparams, **memory_config}
hierarchical_hyperparams = {**base_hyperparams, **memory_config}

# Special adjustments for hierarchical model (fewer classes, may need different parameters)
hierarchical_hyperparams.update({
    'cls': 2.0,                       # Reduced class weight (fewer classes)
    'epochs': 50,                     # Fewer epochs may be sufficient for taxonomic groups
    'patience': 15                    # Earlier stopping 
})

# Display final training configurations
print("\nStandard Model Training Configuration:")
for key, value in standard_hyperparams.items():
    if key in ['epochs', 'patience', 'optimizer', 'lr0', 'batch', 'image_size', 'device', 'workers', 'amp']:
        print(f"- {key}: {value}")

print("\nHierarchical Model Training Configuration:")
for key, value in hierarchical_hyperparams.items():
    if key in ['epochs', 'patience', 'optimizer', 'lr0', 'batch', 'image_size', 'device', 'workers', 'amp']:
        print(f"- {key}: {value}")

# Update training config with hyperparameters
training_config["hyperparameters"] = {
    "standard": standard_hyperparams,
    "hierarchical": hierarchical_hyperparams,
    "memory_profile": memory_profile
}

# Update training config file
with open(training_config_path, 'w') as f:
    json.dump(training_config, f, indent=2)

print(f"\nHyperparameters added to training configuration.")

Selected memory optimization profile: gpu_standard

Standard Model Training Configuration:
- epochs: 100
- patience: 25
- optimizer: AdamW
- lr0: 0.001
- device: 0
- workers: 2
- batch: 4
- amp: True

Hierarchical Model Training Configuration:
- epochs: 50
- patience: 15
- optimizer: AdamW
- lr0: 0.001
- device: 0
- workers: 2
- batch: 4
- amp: True

Hyperparameters added to training configuration.


In [7]:
# Cell 5: Standard Model Training
# Train the standard model with all species classes

from ultralytics import YOLO 
import time

# Function to handle out-of-memory errors during training
def train_with_fallback(model, hyperparams, fallbacks=3):
    """Train with automatic fallback to lower resource config if OOM errors occur"""
    for attempt in range(fallbacks + 1):
        try:
            print(f"\nTraining attempt {attempt + 1}/{fallbacks + 1}")
            # Start training timer
            start_time = time.time()
            
            # Train the model with current hyperparams
            results = model.train(**hyperparams)
            
            # If we get here, training was successful
            training_time = time.time() - start_time
            hours, remainder = divmod(training_time, 3600)
            minutes, seconds = divmod(remainder, 60)
            
            print(f"\nTraining completed in {int(hours)}h {int(minutes)}m {int(seconds)}s")
            return results, hyperparams
            
        except RuntimeError as e:
            if 'out of memory' in str(e).lower() and attempt < fallbacks:
                print("\n⚠️ GPU OUT OF MEMORY ERROR DETECTED ⚠️")
                print("Reducing resource usage and trying again...")
                
                # Reduce resource usage
                if hyperparams['batch'] > 1:
                    hyperparams['batch'] = hyperparams['batch'] // 2
                    print(f"Reduced batch size to {hyperparams['batch']}")
                
                if hyperparams['imgsz'] > 320:
                    hyperparams['imgsz'] = max(320, hyperparams['imgsz'] - 64)
                    print(f"Reduced image size to {hyperparams['imgsz']}")
                
                if hyperparams['device'] != 'cpu' and attempt == fallbacks - 1:
                    print("Switching to CPU as last resort")
                    hyperparams['device'] = 'cpu'
                    hyperparams['workers'] = 0
                    hyperparams['amp'] = False
                
                # Free up GPU memory
                torch.cuda.empty_cache()
                import gc
                gc.collect()
                
            else:
                print(f"\nTraining error: {e}")
                return None, hyperparams
    
    return None, hyperparams

# Create model output directories
os.makedirs(os.path.join(model_save_dir), exist_ok=True)
os.makedirs(standard_model_path, exist_ok=True)

print(f"Starting standard model training on all {len(class_names)} classes")
print(f"Using base model: {base_model_path}")
print(f"Dataset path: {standard_export_path}")
print(f"Model will be saved to: {standard_model_path}")

try:
    # Initialize YOLOv8 model
    model = YOLO(base_model_path)
    
    # Set training parameters
    standard_params = {
        **standard_hyperparams,
        'data': os.path.join(standard_export_path, 'data.yaml'),
        'project': model_save_dir,
        'name': os.path.basename(standard_model_path)
    }
    
    print("\nStarting training with the following settings:")
    print(f"- Model: {base_model_path}")
    print(f"- Epochs: {standard_params['epochs']}")
    print(f"- Batch size: {standard_params['batch']}")
    print(f"- Image size: {standard_params['imgsz']}px")
    print(f"- Device: {'CPU' if standard_params['device'] == 'cpu' else 'GPU'}")
    print(f"- Workers: {standard_params['workers']}")
    
    # Train with automatic fallback on OOM errors
    standard_results, final_params = train_with_fallback(model, standard_params)
    
    if standard_results:
        # Save final hyperparameters actually used
        training_config["standard_model"] = {
            "train_results": {
                "best_epoch": standard_results.best_epoch,
                "maps": standard_results.maps,
                "fitness": standard_results.fitness
            },
            "final_hyperparams": final_params
        }
        
        # Update training config with results
        with open(training_config_path, 'w') as f:
            json.dump(training_config, f, indent=2)
        
        print("\nStandard model training results:")
        print(f"- Best mAP50-95: {standard_results.maps[0]:.4f}")
        print(f"- Best mAP50: {standard_results.maps[1]:.4f}")
        print(f"- Best epoch: {standard_results.best_epoch}")
        print(f"- Model saved to: {standard_model_path}")
        
        # Create a training summary report
        summary_path = os.path.join(reports_dir, f"standard_model_summary_{timestamp_now}.md")
        os.makedirs(os.path.dirname(summary_path), exist_ok=True)
        
        with open(summary_path, 'w') as f:
            f.write(f"# Standard Wildlife Detection Model Training Summary\n\n")
            f.write(f"## Training Metadata\n")
            f.write(f"- **Date and Time**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"- **Model**: {base_model_path}\n")
            f.write(f"- **Dataset**: {standard_export_path}\n")
            f.write(f"- **Classes**: {len(class_names)}\n\n")
            
            f.write(f"## Training Configuration\n")
            for param in ['epochs', 'batch', 'image_size', 'device', 'optimizer', 'lr0']:
                f.write(f"- **{param}**: {final_params.get(param, 'N/A')}\n")
            
            f.write(f"\n## Performance Metrics\n")
            f.write(f"- **Best mAP50-95**: {standard_results.maps[0]:.4f}\n")
            f.write(f"- **Best mAP50**: {standard_results.maps[1]:.4f}\n")
            f.write(f"- **Best epoch**: {standard_results.best_epoch}\n")

        print(f"Training summary saved to: {summary_path}")
        
        # Save model path for future notebooks
        standard_best_model_path = os.path.join(standard_model_path, "weights", "best.pt")
        training_config["standard_best_model_path"] = standard_best_model_path
        
        # Update training config with paths to result files
        with open(training_config_path, 'w') as f:
            json.dump(training_config, f, indent=2)
    else:
        print("\nStandard model training failed.")

except Exception as e:
    print(f"Error during standard model training: {e}")
    import traceback
    traceback.print_exc()

Starting standard model training on all 30 classes
Using base model: yolov8s.pt
Dataset path: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/data/export/yolo_export_test_01_20250510_0038
Model will be saved to: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/models/trained/wildlife_detector_20250510_0114

Starting training with the following settings:
- Model: yolov8s.pt
- Epochs: 100
- Batch size: 4
- Image size: 416px
- Device: GPU
- Workers: 2

Training attempt 1/4
New https://pypi.org/project/ultralytics/8.3.130 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.106 🚀 Python-3.12.3 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 4050 Laptop GPU, 5898MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=/home/peter/Desktop/TU PHD/WildlifeDetectionSystem/data/export/yolo_export_test_01_20250510_0038/data.yaml, epochs=100, time=None, patience=25, batch=4, imgsz=416, save=True, save_period=10, cache=ram, device=0, workers=2, project=/ho

[34m[1mtrain: [0mScanning /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/data/export/yolo_export_test_01_20250510_0038/labels/train.cache... 356 images, 0 backgrounds, 6 corrupt: 100%|██████████| 356/356 [00:00<?, ?it/s]








[34m[1mtrain: [0mCaching images (0.1GB RAM): 100%|██████████| 350/350 [00:13<00:00, 26.48it/s]


[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/data/export/yolo_export_test_01_20250510_0038/labels/val.cache... 89 images, 0 backgrounds, 3 corrupt: 100%|██████████| 89/89 [00:00<?, ?it/s]








[34m[1mval: [0mCaching images (0.0GB RAM): 100%|██████████| 86/86 [00:03<00:00, 23.53it/s]


Plotting labels to /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/models/trained/wildlife_detector_20250510_01142/labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.937) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 416 train, 416 val
Using 2 dataloader workers
Logging results to [1m/home/peter/Desktop/TU PHD/WildlifeDetectionSystem/models/trained/wildlife_detector_20250510_01142[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100     0.619G      1.978      22.17      1.776          5        416: 100%|██████████| 88/88 [00:11<00:00,  7.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:02<00:00,  4.95it/s]


                   all         86         88       0.18      0.151     0.0825      0.037

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      2/100     0.705G      1.896      15.45      1.712          5        416: 100%|██████████| 88/88 [00:10<00:00,  8.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:01<00:00,  6.30it/s]

                   all         86         88      0.564     0.0885     0.0757     0.0341






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      3/100     0.744G      1.886      14.58      1.724          3        416: 100%|██████████| 88/88 [00:10<00:00,  8.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:02<00:00,  4.93it/s]

                   all         86         88      0.702      0.107      0.185      0.103






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      4/100     0.762G       1.93      13.94      1.787          5        416: 100%|██████████| 88/88 [00:13<00:00,  6.33it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:02<00:00,  4.21it/s]

                   all         86         88      0.707      0.145      0.167     0.0829






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      5/100      0.82G      1.899      13.49      1.737          4        416: 100%|██████████| 88/88 [00:09<00:00,  8.85it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:02<00:00,  5.28it/s]

                   all         86         88      0.528      0.176      0.174     0.0635






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      6/100      0.82G      1.839      13.06      1.723          3        416: 100%|██████████| 88/88 [00:08<00:00,  9.85it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:01<00:00,  6.88it/s]

                   all         86         88      0.563      0.305      0.211     0.0876






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      7/100      0.82G      1.848      12.15      1.706          2        416: 100%|██████████| 88/88 [00:08<00:00, 10.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:02<00:00,  5.42it/s]

                   all         86         88      0.321      0.215      0.237      0.102






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      8/100      0.82G      1.774       11.5      1.683          4        416: 100%|██████████| 88/88 [00:09<00:00,  9.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:02<00:00,  5.44it/s]

                   all         86         88      0.545      0.249      0.214     0.0976






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      9/100      0.82G      1.781      11.87      1.661          4        416: 100%|██████████| 88/88 [00:10<00:00,  8.63it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:02<00:00,  5.46it/s]


                   all         86         88      0.746      0.274      0.393      0.192

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     10/100      0.82G      1.741      11.51      1.639          1        416: 100%|██████████| 88/88 [00:10<00:00,  8.31it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:02<00:00,  5.34it/s]

                   all         86         88      0.527      0.337      0.312      0.156






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     11/100      0.82G      1.701      10.78      1.638          5        416: 100%|██████████| 88/88 [00:11<00:00,  7.84it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:02<00:00,  4.74it/s]

                   all         86         88      0.634      0.303      0.383      0.193






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     12/100      0.82G      1.668      9.882      1.594          8        416: 100%|██████████| 88/88 [00:10<00:00,  8.12it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:02<00:00,  5.15it/s]

                   all         86         88      0.685      0.228      0.269      0.139






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     13/100      0.82G      1.645      9.771      1.589          5        416: 100%|██████████| 88/88 [00:09<00:00,  9.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:02<00:00,  5.00it/s]

                   all         86         88      0.632      0.365      0.312      0.169






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     14/100      0.82G      1.635      9.793       1.58          4        416: 100%|██████████| 88/88 [00:06<00:00, 13.82it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 11.46it/s]

                   all         86         88      0.652      0.388      0.345      0.185






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     15/100      0.82G      1.702      10.17      1.668          3        416: 100%|██████████| 88/88 [00:05<00:00, 15.01it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 12.57it/s]

                   all         86         88      0.664      0.254      0.288      0.143






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     16/100      0.82G      1.596      9.741       1.57          3        416: 100%|██████████| 88/88 [00:05<00:00, 14.74it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 11.44it/s]

                   all         86         88       0.68      0.246      0.316      0.172






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     17/100      0.82G      1.635      9.388      1.609          3        416: 100%|██████████| 88/88 [00:05<00:00, 14.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 11.61it/s]

                   all         86         88      0.731      0.349       0.36      0.197






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     18/100      0.82G      1.571      9.395      1.551          1        416: 100%|██████████| 88/88 [00:06<00:00, 14.56it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 12.19it/s]

                   all         86         88       0.52      0.399      0.324      0.176






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     19/100      0.82G      1.521      8.964      1.495          6        416: 100%|██████████| 88/88 [00:05<00:00, 14.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 11.44it/s]

                   all         86         88      0.526      0.396      0.339      0.179






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     20/100      0.82G      1.498      8.931      1.498          3        416: 100%|██████████| 88/88 [00:06<00:00, 14.61it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 13.31it/s]

                   all         86         88      0.654      0.339      0.344      0.185






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     21/100      0.82G      1.561      8.935      1.539          5        416: 100%|██████████| 88/88 [00:06<00:00, 14.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 15.61it/s]

                   all         86         88      0.536      0.385      0.354      0.189






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     22/100      0.82G      1.435      8.336      1.444          1        416: 100%|██████████| 88/88 [00:06<00:00, 14.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 29.12it/s]

                   all         86         88      0.675      0.387      0.372      0.194






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     23/100      0.82G       1.51      8.759      1.511          5        416: 100%|██████████| 88/88 [00:06<00:00, 13.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 21.84it/s]

                   all         86         88      0.705      0.402      0.417      0.216






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     24/100      0.82G      1.414      8.208      1.456          3        416: 100%|██████████| 88/88 [00:06<00:00, 13.95it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 26.12it/s]

                   all         86         88      0.753      0.419      0.447      0.244






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     25/100      0.82G      1.439      8.059      1.458          3        416: 100%|██████████| 88/88 [00:06<00:00, 13.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 22.49it/s]

                   all         86         88      0.665      0.402      0.451      0.266






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     26/100      0.82G      1.454      8.133      1.489          6        416: 100%|██████████| 88/88 [00:06<00:00, 13.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 21.44it/s]

                   all         86         88      0.538      0.429      0.392       0.21






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     27/100      0.82G      1.423      7.687      1.442          3        416: 100%|██████████| 88/88 [00:06<00:00, 12.99it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 19.67it/s]

                   all         86         88      0.586      0.487      0.423      0.209






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     28/100      0.82G      1.449      8.237      1.485          5        416: 100%|██████████| 88/88 [00:06<00:00, 13.57it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 17.77it/s]

                   all         86         88      0.692      0.388      0.389      0.215






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     29/100      0.82G      1.461      7.768      1.467          6        416: 100%|██████████| 88/88 [00:03<00:00, 23.11it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 37.89it/s]

                   all         86         88      0.759      0.358      0.595      0.337






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     30/100      0.82G       1.45      7.799      1.452          2        416: 100%|██████████| 88/88 [00:04<00:00, 20.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 40.58it/s]

                   all         86         88      0.652      0.452      0.466       0.26






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     31/100      0.82G      1.362      7.444       1.42          2        416: 100%|██████████| 88/88 [00:03<00:00, 22.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 25.39it/s]

                   all         86         88       0.61      0.396      0.372       0.21






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     32/100      0.82G      1.337      7.201      1.377          6        416: 100%|██████████| 88/88 [00:05<00:00, 15.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 16.11it/s]

                   all         86         88      0.508      0.523      0.392      0.222






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     33/100      0.82G       1.38      7.484      1.422          1        416: 100%|██████████| 88/88 [00:05<00:00, 17.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 15.97it/s]

                   all         86         88       0.65      0.424      0.393      0.232






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     34/100      0.82G      1.324      7.299      1.395          3        416: 100%|██████████| 88/88 [00:05<00:00, 16.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 16.65it/s]

                   all         86         88      0.695      0.405      0.383      0.228






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     35/100      0.82G      1.371      7.326      1.424          4        416: 100%|██████████| 88/88 [00:05<00:00, 17.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 16.75it/s]

                   all         86         88      0.516      0.498      0.392      0.224






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     36/100      0.82G      1.351      7.331      1.406          7        416: 100%|██████████| 88/88 [00:05<00:00, 16.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 17.61it/s]

                   all         86         88       0.55      0.411      0.364      0.194






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     37/100      0.82G      1.342       7.23      1.389          8        416: 100%|██████████| 88/88 [00:04<00:00, 17.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 17.78it/s]

                   all         86         88      0.671      0.339      0.411      0.234






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     38/100      0.82G      1.317      6.622      1.363          2        416: 100%|██████████| 88/88 [00:05<00:00, 16.80it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 17.68it/s]

                   all         86         88      0.589      0.502      0.411      0.222






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     39/100      0.82G      1.294      6.617       1.36          4        416: 100%|██████████| 88/88 [00:04<00:00, 17.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 17.87it/s]

                   all         86         88      0.582      0.506      0.394      0.205






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     40/100      0.82G      1.373       7.21       1.39          1        416: 100%|██████████| 88/88 [00:05<00:00, 16.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 19.15it/s]

                   all         86         88      0.634      0.473      0.449      0.282






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     41/100      0.82G      1.315      7.054      1.388          4        416: 100%|██████████| 88/88 [00:05<00:00, 16.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 17.28it/s]

                   all         86         88      0.595      0.518      0.477      0.292






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     42/100      0.82G      1.293      6.853      1.381          5        416: 100%|██████████| 88/88 [00:05<00:00, 17.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 32.01it/s]

                   all         86         88      0.507      0.477      0.425       0.23






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     43/100      0.82G      1.295      6.526       1.36          5        416: 100%|██████████| 88/88 [00:05<00:00, 17.18it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 17.08it/s]

                   all         86         88      0.618      0.496      0.463      0.254






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     44/100      0.82G       1.29      6.341      1.342          4        416: 100%|██████████| 88/88 [00:05<00:00, 17.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 32.30it/s]

                   all         86         88      0.561      0.504      0.436       0.23






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     45/100      0.82G      1.275      6.678      1.361          9        416: 100%|██████████| 88/88 [00:05<00:00, 16.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 17.69it/s]

                   all         86         88      0.785      0.339      0.408      0.227






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     46/100      0.82G        1.3      6.649      1.366          5        416: 100%|██████████| 88/88 [00:04<00:00, 17.90it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 16.97it/s]

                   all         86         88      0.657      0.391      0.402      0.218






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     47/100      0.82G      1.223      6.286      1.331          3        416: 100%|██████████| 88/88 [00:05<00:00, 16.86it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 17.50it/s]

                   all         86         88       0.49      0.473      0.392       0.22






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     48/100      0.82G      1.261      6.443      1.342          5        416: 100%|██████████| 88/88 [00:05<00:00, 17.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 18.45it/s]

                   all         86         88      0.505      0.463      0.394      0.231






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     49/100      0.82G      1.187      5.872      1.302          5        416: 100%|██████████| 88/88 [00:05<00:00, 16.83it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 17.66it/s]

                   all         86         88      0.768      0.351      0.392      0.232






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     50/100      0.82G      1.278      6.414      1.364          5        416: 100%|██████████| 88/88 [00:05<00:00, 17.23it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 17.63it/s]

                   all         86         88      0.751      0.392      0.447      0.229






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     51/100      0.82G      1.257      6.285      1.365          2        416: 100%|██████████| 88/88 [00:05<00:00, 16.84it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 17.55it/s]

                   all         86         88      0.625      0.461      0.506      0.302






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     52/100      0.82G      1.197      5.999      1.305          4        416: 100%|██████████| 88/88 [00:04<00:00, 17.98it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 16.94it/s]

                   all         86         88      0.539      0.599      0.414      0.233






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     53/100      0.82G      1.208      6.111      1.313          2        416: 100%|██████████| 88/88 [00:05<00:00, 16.62it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 19.98it/s]

                   all         86         88      0.475      0.502      0.391       0.21






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


     54/100      0.82G      1.231      6.128      1.331          2        416: 100%|██████████| 88/88 [00:05<00:00, 17.31it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:00<00:00, 17.10it/s]

                   all         86         88      0.758      0.385      0.422      0.239
[34m[1mEarlyStopping: [0mTraining stopped early as no improvement observed in last 25 epochs. Best results observed at epoch 29, best model saved as best.pt.
To update EarlyStopping(patience=25) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.






54 epochs completed in 0.120 hours.
Optimizer stripped from /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/models/trained/wildlife_detector_20250510_01142/weights/last.pt, 22.5MB
Optimizer stripped from /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/models/trained/wildlife_detector_20250510_01142/weights/best.pt, 22.5MB

Validating /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/models/trained/wildlife_detector_20250510_01142/weights/best.pt...
Ultralytics 8.3.106 🚀 Python-3.12.3 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 4050 Laptop GPU, 5898MiB)
Model summary (fused): 72 layers, 11,137,194 parameters, 0 gradients, 28.5 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 11/11 [00:02<00:00,  5.25it/s]


                   all         86         88      0.751      0.358      0.595      0.337
         Male Roe Deer         23         23      0.738      0.435      0.796      0.412
       Female Roe Deer         14         14      0.446       0.46      0.366      0.227
                   Fox          8          8      0.451        0.5      0.426      0.255
                Jackal          4          4          1          0      0.364      0.193
                Weasel          1          1          1          0      0.249      0.174
               Wildcat          1          1          1          0      0.995      0.597
                Rabbit         26         27      0.696      0.667      0.732      0.324
                 Human          9         10      0.673        0.8      0.836      0.512
Speed: 0.1ms preprocess, 15.4ms inference, 0.0ms loss, 4.6ms postprocess per image
Results saved to [1m/home/peter/Desktop/TU PHD/WildlifeDetectionSystem/models/trained/wildlife_detector_20250510_01

Traceback (most recent call last):
  File "/tmp/ipykernel_10671/3399800566.py", line 94, in <module>
    "best_epoch": standard_results.best_epoch,
                  ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/peter/Desktop/TU PHD/WildlifeDetectionSystem/api/venv/lib/python3.12/site-packages/ultralytics/utils/__init__.py", line 240, in __getattr__
    raise AttributeError(f"'{name}' object has no attribute '{attr}'. See valid attributes below.\n{self.__doc__}")
AttributeError: 'DetMetrics' object has no attribute 'best_epoch'. See valid attributes below.

    Utility class for computing detection metrics such as precision, recall, and mean average precision (mAP).

    Attributes:
        save_dir (Path): A path to the directory where the output plots will be saved.
        plot (bool): A flag that indicates whether to plot precision-recall curves for each class.
        names (dict): A dictionary of class names.
        box (Metric): An instance of the Metric class for storing detection

In [8]:
# Cell 6: Hierarchical Model Training
# Train the hierarchical model with taxonomic groups as classes

# Create model output directories
os.makedirs(hierarchical_model_path, exist_ok=True)

print(f"\nStarting hierarchical model training on {len(taxonomic_groups)} taxonomic groups")
print(f"Using base model: {base_model_path}")
print(f"Dataset path: {hierarchical_export_path}")
print(f"Model will be saved to: {hierarchical_model_path}")

try:
    # Initialize YOLOv8 model
    hierarchical_model = YOLO(base_model_path)
    
    # Set training parameters
    hierarchical_params = {
        **hierarchical_hyperparams,
        'data': os.path.join(hierarchical_export_path, 'data.yaml'),
        'project': model_save_dir,
        'name': os.path.basename(hierarchical_model_path)
    }
    
    print("\nStarting hierarchical training with the following settings:")
    print(f"- Model: {base_model_path}")
    print(f"- Epochs: {hierarchical_params['epochs']}")
    print(f"- Batch size: {hierarchical_params['batch']}")
    print(f"- Image size: {hierarchical_params['image_size']}")
    print(f"- Device: {'CPU' if hierarchical_params['device'] == 'cpu' else 'GPU'}")
    print(f"- Workers: {hierarchical_params['workers']}")
    
    # Train with automatic fallback on OOM errors
    hierarchical_results, final_hierarchical_params = train_with_fallback(hierarchical_model, hierarchical_params)
    
    if hierarchical_results:
        # Save final hyperparameters actually used
        training_config["hierarchical_model"] = {
            "train_results": {
                "best_epoch": hierarchical_results.best_epoch,
                "maps": hierarchical_results.maps,
                "fitness": hierarchical_results.fitness
            },
            "final_hyperparams": final_hierarchical_params
        }
        
        # Update training config with results
        with open(training_config_path, 'w') as f:
            json.dump(training_config, f, indent=2)
        
        print("\nHierarchical model training results:")
        print(f"- Best mAP50-95: {hierarchical_results.maps[0]:.4f}")
        print(f"- Best mAP50: {hierarchical_results.maps[1]:.4f}")
        print(f"- Best epoch: {hierarchical_results.best_epoch}")
        print(f"- Model saved to: {hierarchical_model_path}")
        
        # Create a training summary report
        hierarchical_summary_path = os.path.join(reports_dir, f"hierarchical_model_summary_{timestamp_now}.md")
        os.makedirs(os.path.dirname(hierarchical_summary_path), exist_ok=True)
        
        with open(hierarchical_summary_path, 'w') as f:
            f.write(f"# Hierarchical Wildlife Detection Model Training Summary\n\n")
            f.write(f"## Training Metadata\n")
            f.write(f"- **Date and Time**: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            f.write(f"- **Model**: {base_model_path}\n")
            f.write(f"- **Dataset**: {hierarchical_export_path}\n")
            f.write(f"- **Taxonomic Groups**: {len(taxonomic_groups)}\n\n")
            
            f.write(f"## Training Configuration\n")
            for param in ['epochs', 'batch', 'image_size', 'device', 'optimizer', 'lr0']:
                f.write(f"- **{param}**: {final_hierarchical_params.get(param, 'N/A')}\n")
            
            f.write(f"\n## Performance Metrics\n")
            f.write(f"- **Best mAP50-95**: {hierarchical_results.maps[0]:.4f}\n")
            f.write(f"- **Best mAP50**: {hierarchical_results.maps[1]:.4f}\n")
            f.write(f"- **Best epoch**: {hierarchical_results.best_epoch}\n")
            
            f.write(f"\n## Taxonomic Groups\n")
            for group, class_ids in taxonomic_groups.items():
                species = [class_names[idx] for idx in class_ids if idx < len(class_names)]
                f.write(f"- **{group}**: {', '.join(species[:5])}")
                if len(species) > 5:
                    f.write(f" and {len(species)-5} more")
                f.write(f" ({len(species)} species)\n")

        print(f"Hierarchical training summary saved to: {hierarchical_summary_path}")
        
        # Save model path for future notebooks
        hierarchical_best_model_path = os.path.join(hierarchical_model_path, "weights", "best.pt")
        training_config["hierarchical_best_model_path"] = hierarchical_best_model_path
        
        # Update training config with paths to result files
        with open(training_config_path, 'w') as f:
            json.dump(training_config, f, indent=2)
    else:
        print("\nHierarchical model training failed.")

except Exception as e:
    print(f"Error during hierarchical model training: {e}")
    import traceback
    traceback.print_exc()


Starting hierarchical model training on 5 taxonomic groups
Using base model: yolov8s.pt
Dataset path: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/data/export/yolo_hierarchical_test_01_20250510_0038
Model will be saved to: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/models/trained/wildlife_detector_hierarchical_20250510_0114

Starting hierarchical training with the following settings:
- Model: yolov8s.pt
- Epochs: 50
- Batch size: 4
Error during hierarchical model training: 'image_size'


Traceback (most recent call last):
  File "/tmp/ipykernel_10671/2745908388.py", line 28, in <module>
    print(f"- Image size: {hierarchical_params['image_size']}")
                           ~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^
KeyError: 'image_size'


In [9]:
# Cell 7: Training Visualization
# Visualize and compare training results

def load_results_csv(model_path):
    """Load and parse the results.csv file from model training"""
    results_path = os.path.join(model_path, 'results.csv')
    if os.path.exists(results_path):
        try:
            return pd.read_csv(results_path)
        except Exception as e:
            print(f"Error loading results from {results_path}: {e}")
    return None

def plot_training_metrics(standard_df, hierarchical_df=None, metrics=None):
    """Plot training metrics from results dataframes"""
    if metrics is None:
        # Default metrics to plot (handle different column names in YOLOv8)
        metrics = []
        if standard_df is not None:
            cols = standard_df.columns
            for metric, patterns in {
                'loss': ['train/box_loss', 'box_loss', 'loss'],
                'precision': ['metrics/precision(B)', 'precision', 'val/precision'],
                'recall': ['metrics/recall(B)', 'recall', 'val/recall'],
                'mAP50': ['metrics/mAP50(B)', 'mAP50', 'val/mAP50'],
                'mAP50-95': ['metrics/mAP50-95(B)', 'mAP50-95', 'val/mAP50-95'] 
            }.items():
                # Find the first matching column pattern
                matching_cols = [col for col in cols if any(pattern in col for pattern in patterns)]
                if matching_cols:
                    metrics.append((metric, matching_cols[0]))
    
    # Create subplots for each metric
    if not metrics:
        print("No metrics found in results files")
        return
    
    n_metrics = len(metrics)
    fig, axs = plt.subplots(n_metrics, 1, figsize=(12, 4 * n_metrics))
    if n_metrics == 1:
        axs = [axs]
    
    # Plot each metric
    for i, (metric_name, col_name) in enumerate(metrics):
        ax = axs[i]
        
        # Plot standard model
        if standard_df is not None and col_name in standard_df.columns:
            ax.plot(standard_df['epoch'], standard_df[col_name], 'b-', label='Standard')
            # Mark best epoch
            if 'best_epoch' in training_config.get('standard_model', {}).get('train_results', {}):
                best_epoch = training_config['standard_model']['train_results']['best_epoch']
                if best_epoch < len(standard_df):
                    best_value = standard_df.iloc[best_epoch][col_name]
                    ax.plot(best_epoch, best_value, 'bo', markersize=8)
                    ax.axvline(x=best_epoch, color='b', linestyle='--', alpha=0.3)
        
        # Plot hierarchical model if available
        if hierarchical_df is not None and col_name in hierarchical_df.columns:
            ax.plot(hierarchical_df['epoch'], hierarchical_df[col_name], 'r-', label='Hierarchical')
            # Mark best epoch
            if 'best_epoch' in training_config.get('hierarchical_model', {}).get('train_results', {}):
                best_epoch = training_config['hierarchical_model']['train_results']['best_epoch']
                if best_epoch < len(hierarchical_df):
                    best_value = hierarchical_df.iloc[best_epoch][col_name]
                    ax.plot(best_epoch, best_value, 'ro', markersize=8)
                    ax.axvline(x=best_epoch, color='r', linestyle='--', alpha=0.3)
        
        ax.set_xlabel('Epoch')
        ax.set_ylabel(metric_name)
        ax.set_title(f'{metric_name.upper()} vs. Epoch')
        ax.grid(True, alpha=0.3)
        if standard_df is not None and hierarchical_df is not None:
            ax.legend()
    
    plt.tight_layout()
    
    # Save the plot
    plots_dir = os.path.join(reports_dir, "plots")
    os.makedirs(plots_dir, exist_ok=True)
    
    plot_path = os.path.join(plots_dir, f"training_metrics_{timestamp_now}.png")
    plt.savefig(plot_path)
    print(f"Saved training metrics plot to: {plot_path}")
    
    # Add plot path to training config
    training_config["plots"] = {"training_metrics": plot_path}
    with open(training_config_path, 'w') as f:
        json.dump(training_config, f, indent=2)
    
    return plot_path

# Load results
print("Loading training results...")
standard_results_df = load_results_csv(standard_model_path)
hierarchical_results_df = load_results_csv(hierarchical_model_path)

if standard_results_df is not None:
    print(f"Loaded standard model results: {len(standard_results_df)} epochs")
else:
    print("Standard model results not found")

if hierarchical_results_df is not None:
    print(f"Loaded hierarchical model results: {len(hierarchical_results_df)} epochs")
else:
    print("Hierarchical model results not found")

# Plot training metrics
if standard_results_df is not None or hierarchical_results_df is not None:
    print("\nGenerating training metrics visualization...")
    plot_path = plot_training_metrics(standard_results_df, hierarchical_results_df)
    print(f"Training visualization complete: {plot_path}")
else:
    print("Cannot generate visualizations: No results data available")

Loading training results...
Standard model results not found
Hierarchical model results not found
Cannot generate visualizations: No results data available


In [10]:
# Cell 8: Output Tracking for Long-Term Project Management
# Create detailed tracking of all generated files and models

# Create tracking directory if it doesn't exist
tracking_dir = os.path.join(project_root, "tracking")
os.makedirs(tracking_dir, exist_ok=True)

# Collect all generated files and their purposes
generated_files = {
    "configuration": {
        "training_config": training_config_path
    },
    "standard_model": {
        "base_path": standard_model_path,
        "best_weights": os.path.join(standard_model_path, "weights", "best.pt"),
        "last_weights": os.path.join(standard_model_path, "weights", "last.pt"),
        "results_csv": os.path.join(standard_model_path, "results.csv"),
        "summary_report": os.path.join(reports_dir, f"standard_model_summary_{timestamp_now}.md")
    },
    "hierarchical_model": {
        "base_path": hierarchical_model_path,
        "best_weights": os.path.join(hierarchical_model_path, "weights", "best.pt"),
        "last_weights": os.path.join(hierarchical_model_path, "weights", "last.pt"),
        "results_csv": os.path.join(hierarchical_model_path, "results.csv"),
        "summary_report": os.path.join(reports_dir, f"hierarchical_model_summary_{timestamp_now}.md")
    },
    "visualizations": {
        "training_metrics": os.path.join(reports_dir, "plots", f"training_metrics_{timestamp_now}.png")
    }
}

# Create a comprehensive output tracking file
tracking_file = os.path.join(tracking_dir, f"notebook2_outputs_{timestamp_now}.json")
with open(tracking_file, 'w') as f:
    json.dump({
        "notebook": "02_model_training",
        "execution_timestamp": timestamp_now,
        "description": "Model training for wildlife detection",
        "generated_files": generated_files,
        "training_config": training_config,
        "next_steps": {
            "notebook": "03_model_evaluation.ipynb",
            "required_inputs": [training_config_path]
        }
    }, f, indent=2)

# Create a simple Markdown summary for human-readable reference
summary_file = os.path.join(tracking_dir, f"notebook2_summary_{timestamp_now}.md")
with open(summary_file, 'w') as f:
    f.write(f"# Model Training Notebook Outputs\n\n")
    f.write(f"**Execution Date:** {datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n")
    
    f.write(f"## Configuration\n\n")
    f.write(f"- Training Config: `{training_config_path}`\n")
    f.write(f"- Model Size: {training_config['model']['name']} ({training_config['model']['description']})\n")
    f.write(f"- Hardware: {training_config['hardware']['device']} | Batch Size: {training_config['hardware']['batch_size']} | Image Size: {training_config['hardware']['image_size']}px\n\n")
    
    # Standard model results
    if 'standard_model' in training_config:
        std_results = training_config['standard_model']['train_results'] 
        f.write(f"## Standard Model Results\n\n")
        f.write(f"- Path: `{standard_model_path}`\n")
        f.write(f"- Best Weights: `weights/best.pt`\n")
        f.write(f"- Classes: {len(class_names)}\n")
        f.write(f"- Best Epoch: {std_results.get('best_epoch', 'N/A')}\n")
        f.write(f"- mAP50: {std_results.get('maps', [0, 0])[1]:.4f}\n")
        f.write(f"- mAP50-95: {std_results.get('maps', [0])[0]:.4f}\n\n")
    
    # Hierarchical model results
    if 'hierarchical_model' in training_config:
        hier_results = training_config['hierarchical_model']['train_results']
        f.write(f"## Hierarchical Model Results\n\n")
        f.write(f"- Path: `{hierarchical_model_path}`\n")
        f.write(f"- Best Weights: `weights/best.pt`\n")
        f.write(f"- Taxonomic Groups: {len(taxonomic_groups)}\n")
        f.write(f"- Best Epoch: {hier_results.get('best_epoch', 'N/A')}\n")
        f.write(f"- mAP50: {hier_results.get('maps', [0, 0])[1]:.4f}\n")
        f.write(f"- mAP50-95: {hier_results.get('maps', [0])[0]:.4f}\n\n")
    
    f.write(f"## Next Steps\n\n")
    f.write(f"Proceed to notebook 3 (Model Evaluation) using the training config: `{os.path.basename(training_config_path)}`")

print(f"\nOutput tracking files created:")
print(f"- JSON tracking: {tracking_file}")
print(f"- Markdown summary: {summary_file}")
print(f"\nThese files document all outputs from this notebook for long-term project organization.")

print("\nModel training complete!")
print("The trained models are ready for evaluation.")
print("Please proceed to the model evaluation notebook (03_model_evaluation.ipynb).")


Output tracking files created:
- JSON tracking: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/tracking/notebook2_outputs_20250510_0114.json
- Markdown summary: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/tracking/notebook2_summary_20250510_0114.md

These files document all outputs from this notebook for long-term project organization.

Model training complete!
The trained models are ready for evaluation.
Please proceed to the model evaluation notebook (03_model_evaluation.ipynb).
