In [1]:
# Wildlife Detection System - Model Analysis Notebook
# 
# This notebook provides comprehensive analysis of the trained YOLOv8 model for 
# wildlife detection, including:
# - Model performance metrics across different classes and taxonomic groups
# - Confusion matrix analysis to identify misclassifications
# - Failure case analysis to understand model shortcomings
# - Image analysis to understand environmental factors affecting detection
# - Suggestions for model improvement based on the analysis

import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import cv2
import torch
import json
from PIL import Image
from datetime import datetime
from ultralytics import YOLO
from IPython.display import display, Image as IPImage

# Set plot style and figure size for better visualization
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (14, 8)
sns.set_palette('viridis')

# System paths and configuration
ROOT_DIR = Path('/home/peter/Desktop/TU PHD/WildlifeDetectionSystem')
MODELS_DIR = ROOT_DIR / 'models' / 'trained'
DATA_DIR = ROOT_DIR / 'data'
EXPORT_DIR = DATA_DIR / 'export'
OUTPUT_DIR = ROOT_DIR / 'output'
REPORTS_DIR = ROOT_DIR / 'reports'

# Create output directories if they don't exist
os.makedirs(OUTPUT_DIR / 'model_analysis', exist_ok=True)
ANALYSIS_DIR = REPORTS_DIR / f'model_analysis_{datetime.now().strftime("%Y%m%d_%H%M")}'
os.makedirs(ANALYSIS_DIR, exist_ok=True)

# Environment check
print(f"Python version: {sys.version.split()[0]}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU device: {torch.cuda.get_device_name(0)}")
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB total")

# Check imported libraries
for lib in ['numpy', 'matplotlib', 'pandas', 'ultralytics', 'opencv-python', 'seaborn']:
    try:
        if lib == 'opencv-python':
            import cv2
            version = cv2.__version__
        elif lib == 'matplotlib':
            import matplotlib
            version = matplotlib.__version__
        elif lib == 'numpy':
            import numpy
            version = numpy.__version__
        elif lib == 'pandas':
            import pandas
            version = pandas.__version__
        elif lib == 'ultralytics':
            import ultralytics
            version = ultralytics.__version__
        elif lib == 'seaborn':
            import seaborn
            version = seaborn.__version__
        print(f"✅ {lib} is installed (version: {version})")
    except ImportError:
        print(f"❌ {lib} is not installed")

# Utility functions for model and data loading
def find_latest_model():
    """Find the most recently trained model in the models directory"""
    models = list(MODELS_DIR.glob('wildlife_detector_*'))
    if not models:
        return None
    
    # Sort by creation time (newest first)
    latest_model = max(models, key=lambda p: p.stat().st_mtime)
    
    # Check for best.pt weights
    best_weights = latest_model / 'weights' / 'best.pt'
    if best_weights.exists():
        return best_weights
    
    # If best.pt doesn't exist, try last.pt
    last_weights = latest_model / 'weights' / 'last.pt'
    if last_weights.exists():
        return last_weights
    
    return None

def find_data_yaml():
    """Find the most recent data.yaml file in the export directory"""
    yaml_files = list(EXPORT_DIR.glob('**/data.yaml'))
    if not yaml_files:
        return None
    
    # Sort by creation time (newest first)
    return max(yaml_files, key=lambda p: p.stat().st_mtime)

def load_class_names(yaml_path):
    """Load class names from data.yaml file"""
    import yaml
    with open(yaml_path, 'r') as f:
        data = yaml.safe_load(f)
    return data.get('names', [])

def load_specific_model(model_dir='wildlife_detector_20250503_1345'):
    """Load a specific model by directory name"""
    model_path = MODELS_DIR / model_dir / 'weights' / 'best.pt'
    if not model_path.exists():
        model_path = MODELS_DIR / model_dir / 'weights' / 'last.pt'
    
    if not model_path.exists():
        print(f"⚠️ Model not found: {model_path}")
        return None
    
    print(f"✅ Loading model: {model_path}")
    return model_path

# Define taxonomic groups for wildlife classification
def get_taxonomic_groups(class_names):
    """Define taxonomic groups for better analysis"""
    taxonomic_groups = {
        'Deer': [i for i, name in enumerate(class_names) 
                if any(deer in name.lower() for deer in ['deer', 'roe', 'fallow', 'red deer'])],
        'Carnivores': [i for i, name in enumerate(class_names) 
                      if any(carnivore in name.lower() for carnivore in 
                            ['fox', 'wolf', 'jackal', 'bear', 'badger', 'weasel', 'stoat', 
                             'polecat', 'marten', 'otter', 'wildcat'])],
        'Small_Mammals': [i for i, name in enumerate(class_names) 
                         if any(small in name.lower() for small in 
                               ['rabbit', 'hare', 'squirrel', 'dormouse', 'hedgehog'])],
        'Birds': [i for i, name in enumerate(class_names) 
                 if any(bird in name.lower() for bird in 
                       ['blackbird', 'nightingale', 'pheasant', 'woodpecker'])],
        'Other': [i for i, name in enumerate(class_names) 
                 if any(other in name.lower() for other in 
                       ['wild boar', 'chamois', 'turtle', 'human', 'background', 'dog'])]
    }
    return taxonomic_groups

Python version: 3.12.3
PyTorch version: 2.6.0+cu124
CUDA available: True
GPU device: NVIDIA GeForce RTX 4050 Laptop GPU
CUDA version: 12.4
GPU memory: 6.18 GB total
✅ numpy is installed (version: 2.1.1)
✅ matplotlib is installed (version: 3.10.1)
✅ pandas is installed (version: 2.2.3)
✅ ultralytics is installed (version: 8.3.106)
✅ opencv-python is installed (version: 4.11.0)
✅ seaborn is installed (version: 0.13.2)


In [2]:
# Load and initialize the model for analysis

# Find the model path
model_path = load_specific_model()  # Use the wildlife_detector_20250503_1345 model by default
if model_path is None:
    model_path = find_latest_model()
    if model_path is None:
        raise FileNotFoundError("No trained model found in the models directory!")

# Find the data configuration
data_yaml = find_data_yaml()
if data_yaml is None:
    raise FileNotFoundError("No data.yaml file found in the export directory!")

# Load class names
class_names = load_class_names(data_yaml)
print(f"Loaded {len(class_names)} classes from {data_yaml}")
print(f"Classes: {', '.join(class_names[:10])}{'...' if len(class_names) > 10 else ''}")

# Define taxonomic groups
taxonomic_groups = get_taxonomic_groups(class_names)
for group, indices in taxonomic_groups.items():
    class_list = [class_names[i] for i in indices if i < len(class_names)]
    print(f"{group}: {len(class_list)} classes - {', '.join(class_list[:5])}{'...' if len(class_list) > 5 else ''}")

# Load the model
try:
    model = YOLO(model_path)
    print(f"Model loaded successfully: {model_path.name}")
    
    # Print model info
    model_type = model_path.stem  # best or last
    print(f"Model type: {model_type}")
    
    # Get model info
    model_info = model.info()
    if isinstance(model_info, dict):
        print(f"Input image size: {model_info.get('imgsz', 'unknown')}x{model_info.get('imgsz', 'unknown')}")
        print(f"Model contains {model_info.get('nc', 'unknown')} classes")
    else:
        print("Model info not available")
    
except Exception as e:
    print(f"Error loading model: {e}")
    raise

# Create a model analyzer class to organize our analysis functions
class ModelAnalyzer:
    def __init__(self, model, class_names, taxonomic_groups):
        self.model = model
        self.class_names = class_names
        self.taxonomic_groups = taxonomic_groups
        self.results_df = None
        self.confusion_matrix = None
        
    def get_group_for_class(self, class_idx):
        """Return the taxonomic group for a given class index"""
        for group, indices in self.taxonomic_groups.items():
            if class_idx in indices:
                return group
        return "Unknown"
    
    def evaluate_on_dataset(self, data_path):
        """Run evaluation on a dataset and return metrics"""
        results = self.model.val(data=data_path)
        return results
    
    def predict_image(self, image_path, conf=0.25):
        """Run prediction on a single image"""
        results = self.model.predict(image_path, conf=conf)
        return results
    
    def collect_results_from_folder(self, folder_path, limit=20, conf=0.25):
        """Run predictions on images in a folder and collect results"""
        import glob
        
        # Find image files
        image_files = []
        for ext in ['jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG']:
            image_files.extend(glob.glob(os.path.join(folder_path, f'*.{ext}')))
        
        # Limit the number of images
        if limit > 0 and len(image_files) > limit:
            image_files = image_files[:limit]
        
        print(f"Processing {len(image_files)} images...")
        
        # Process each image
        results = []
        for image_file in image_files:
            prediction = self.model.predict(image_file, conf=conf)
            if len(prediction) > 0:
                for pred in prediction:
                    if len(pred.boxes.cls) > 0:
                        results.append({
                            'image_path': image_file,
                            'prediction': pred
                        })
        
        print(f"Found detections in {len(results)} images")
        return results
    
    def visualize_detection(self, image_path, conf=0.25):
        """Visualize detection results on an image"""
        # Predict on image
        results = self.predict_image(image_path, conf)
        
        # Load the image
        img = cv2.imread(image_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # Draw bounding boxes and labels
        for result in results:
            boxes = result.boxes.xyxy.cpu().numpy().astype(int)
            classes = result.boxes.cls.cpu().numpy().astype(int)
            confidences = result.boxes.conf.cpu().numpy()
            
            for i, (box, cls, conf) in enumerate(zip(boxes, classes, confidences)):
                x1, y1, x2, y2 = box
                class_name = self.class_names[cls] if cls < len(self.class_names) else "Unknown"
                label = f"{class_name} ({conf:.2f})"
                
                # Get color based on taxonomic group
                group = self.get_group_for_class(cls)
                color_map = {
                    'Deer': (0, 255, 0),      # Green
                    'Carnivores': (0, 0, 255), # Blue
                    'Small_Mammals': (255, 165, 0),  # Orange
                    'Birds': (128, 0, 128),   # Purple
                    'Other': (255, 0, 0)      # Red
                }
                color = color_map.get(group, (255, 255, 255))
                
                # Draw bounding box
                cv2.rectangle(img, (x1, y1), (x2, y2), color, 2)
                
                # Draw label
                (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)
                cv2.rectangle(img, (x1, y1-25), (x1+w, y1), color, -1)
                cv2.putText(img, label, (x1, y1-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2)
        
        # Display the image
        plt.figure(figsize=(12, 8))
        plt.imshow(img)
        plt.axis('off')
        plt.tight_layout()
        plt.show()
        
        # Return the results
        return results

# Initialize the model analyzer
analyzer = ModelAnalyzer(model, class_names, taxonomic_groups)
print("Model analyzer initialized successfully!")

✅ Loading model: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/models/trained/wildlife_detector_20250503_1345/weights/best.pt
Loaded 30 classes from /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/data/export/yolo_default_20250429_085945/data.yaml
Classes: Red Deer, Male Roe Deer, Female Roe Deer, Fallow Deer, Wild Boar, Chamois, Fox, Wolf, Jackal, Brown Bear...
Deer: 4 classes - Red Deer, Male Roe Deer, Female Roe Deer, Fallow Deer
Carnivores: 11 classes - Fox, Wolf, Jackal, Brown Bear, Badger...
Small_Mammals: 5 classes - Rabbit, Hare, Squirrel, Dormouse, Hedgehog
Birds: 4 classes - Blackbird, Nightingale, Pheasant, woodpecker
Other: 6 classes - Wild Boar, Chamois, Turtle, Human, Background...
Model loaded successfully: best.pt
Model type: best
Model summary: 129 layers, 3,016,698 parameters, 0 gradients, 8.2 GFLOPs
Model info not available
Model analyzer initialized successfully!


In [5]:
# Analyze model performance and visualize results

# Run evaluation on validation dataset with reduced logging
import logging
import warnings
import os
import numpy as np

# Set up logging to suppress warnings
logging.getLogger('ultralytics').setLevel(logging.ERROR)
warnings.filterwarnings('ignore', category=UserWarning)
os.environ['ULTRALYTICS_QUIET'] = '1'  # Reduce YOLO verbose output

print("Running model evaluation on validation dataset...")
eval_results = analyzer.evaluate_on_dataset(data_yaml)

# Extract performance metrics
if hasattr(eval_results, 'box') and hasattr(eval_results.box, 'map'):
    map50 = float(eval_results.box.map50)
    map50_95 = float(eval_results.box.map)
    
    # Handle potential numpy arrays by converting to float
    try:
        precision = float(eval_results.box.p)
        recall = float(eval_results.box.r)
    except (TypeError, ValueError):
        # If values are arrays, take the mean
        precision = float(eval_results.box.p.mean()) if hasattr(eval_results.box.p, 'mean') else 0.0
        recall = float(eval_results.box.r.mean()) if hasattr(eval_results.box.r, 'mean') else 0.0
    
    print(f"\nOverall Performance Metrics:")
    print(f"- mAP50: {map50:.4f}")
    print(f"- mAP50-95: {map50_95:.4f}")
    print(f"- Precision: {precision:.4f}")
    print(f"- Recall: {recall:.4f}")
else:
    print("Could not extract performance metrics from evaluation results")

# Create performance visualization for classes
def visualize_class_performance(results):
    """Visualize performance metrics by class"""
    if not hasattr(results, 'names') or not hasattr(results, 'box'):
        print("Results don't contain class-specific metrics")
        return
    
    # Extract class metrics - updated to use class_result method instead of direct attribute access
    metrics = []
    
    # Check what attributes are available
    print("Available attributes and methods in results.box:")
    for attr in dir(results.box):
        if not attr.startswith('_'):
            print(f"- {attr}")
    
    # Try to access per-class metrics using the appropriate methods
    try:
        # New approach using available methods
        if hasattr(results.box, 'nc') and results.box.nc > 0:
            # Get number of classes
            num_classes = results.box.nc
            
            # Use ap50 and ap methods to get per-class AP values
            try:
                ap50_per_class = results.box.ap50()
                ap_per_class = results.box.ap()
            except (AttributeError, TypeError):
                # Fallbacks if methods don't exist
                print("Could not get per-class AP values from methods, using basic values")
                ap50_per_class = [map50] * num_classes
                ap_per_class = [map50_95] * num_classes
            
            # Get class precisions and recalls if available
            if hasattr(results.box, 'p') and isinstance(results.box.p, (list, np.ndarray)) and len(results.box.p) >= num_classes:
                precisions = results.box.p
                recalls = results.box.r
            else:
                # Fallbacks
                precisions = [precision] * num_classes
                recalls = [recall] * num_classes
            
            # Get class indices if available
            if hasattr(results.box, 'ap_class_index'):
                class_indices = results.box.ap_class_index
            else:
                class_indices = list(range(num_classes))
            
            # Build metrics for each class
            for i, class_idx in enumerate(class_indices):
                if i < len(ap50_per_class) and class_idx < len(results.names):
                    class_name = results.names[class_idx]
                    # Convert to float to avoid numpy array issues
                    metrics.append({
                        'Class': class_name,
                        'mAP50': float(ap50_per_class[i]),
                        'mAP50-95': float(ap_per_class[i]),
                        'Precision': float(precisions[class_idx] if class_idx < len(precisions) else 0),
                        'Recall': float(recalls[class_idx] if class_idx < len(recalls) else 0),
                        'Count': 1  # Default count if not available
                    })
        else:
            print("No class metrics available in results")
            
    except Exception as e:
        print(f"Error extracting class metrics: {e}")
        import traceback
        traceback.print_exc()
    
    # Create DataFrame
    df = pd.DataFrame(metrics)
    if len(df) == 0:
        print("No class metrics available")
        return
    
    # Sort by mAP50
    df = df.sort_values('mAP50', ascending=False)
    
    # Plot class performance
    plt.figure(figsize=(16, 10))
    
    # Create class performance chart
    ax = plt.subplot(1, 1, 1)  # Changed to single plot since we don't have counts
    x = range(len(df))
    
    # Plot metrics
    ax.bar(x, df['mAP50'], alpha=0.6, color='blue', label='mAP50')
    ax.bar([i + 0.2 for i in x], df['Precision'], alpha=0.6, color='green', label='Precision')
    ax.bar([i + 0.4 for i in x], df['Recall'], alpha=0.6, color='red', label='Recall')
    
    # Set labels and title
    ax.set_xticks([i + 0.2 for i in x])
    ax.set_xticklabels(df['Class'], rotation=45, ha='right')
    ax.set_ylabel('Metric Value')
    ax.set_ylim(0, 1.1)
    ax.set_title('Model Performance by Class')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(str(ANALYSIS_DIR / 'class_performance.png'), dpi=300, bbox_inches='tight')
    plt.show()
    
    # Return the dataframe for further analysis
    return df

# Create performance visualization by taxonomic group
def visualize_group_performance(class_df, taxonomic_groups, class_names):
    """Visualize performance metrics by taxonomic group"""
    if class_df is None or len(class_df) == 0:
        print("No class metrics available")
        return
    
    # Map classes to groups
    class_to_group = {}
    for group, indices in taxonomic_groups.items():
        for idx in indices:
            if idx < len(class_names):
                class_to_group[class_names[idx]] = group
    
    # Add group to dataframe
    class_df['Group'] = class_df['Class'].apply(lambda x: class_to_group.get(x, 'Unknown'))
    
    # Aggregate metrics by group
    group_df = class_df.groupby('Group').agg({
        'mAP50': 'mean',
        'mAP50-95': 'mean',
        'Precision': 'mean',
        'Recall': 'mean'
    }).reset_index()
    
    # Plot group performance
    plt.figure(figsize=(14, 8))
    
    # Create group performance chart
    ax = plt.subplot(1, 1, 1)
    x = range(len(group_df))
    
    # Plot metrics
    ax.bar(x, group_df['mAP50'], alpha=0.6, color='blue', label='mAP50')
    ax.bar([i + 0.2 for i in x], group_df['Precision'], alpha=0.6, color='green', label='Precision')
    ax.bar([i + 0.4 for i in x], group_df['Recall'], alpha=0.6, color='red', label='Recall')
    
    # Set labels and title
    ax.set_xticks([i + 0.2 for i in x])
    ax.set_xticklabels(group_df['Group'], rotation=0)
    ax.set_ylabel('Metric Value')
    ax.set_ylim(0, 1.1)
    ax.set_title('Model Performance by Taxonomic Group')
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(str(ANALYSIS_DIR / 'group_performance.png'), dpi=300, bbox_inches='tight')
    plt.show()
    
    # Return the dataframe for further analysis
    return group_df

# Analyze results at different confidence thresholds
def analyze_confidence_thresholds(model, data_path, thresholds=[0.25, 0.5]):
    """Analyze model performance at different confidence thresholds"""
    results = []
    for conf in thresholds:
        print(f"Evaluating with confidence threshold: {conf:.2f}")
        result = model.val(data=data_path, conf=conf, verbose=False)  # Reduce output verbosity
        
        # Convert all values to float to avoid numpy array issues
        results.append({
            'Threshold': conf,
            'mAP50': float(result.box.map50),
            'Precision': float(result.box.p) if not hasattr(result.box.p, 'mean') else float(result.box.p.mean()),
            'Recall': float(result.box.r) if not hasattr(result.box.r, 'mean') else float(result.box.r.mean())
        })
    
    # Create DataFrame
    df = pd.DataFrame(results)
    
    # Plot results
    plt.figure(figsize=(12, 8))
    plt.plot(df['Threshold'], df['mAP50'], 'o-', label='mAP50', linewidth=2, markersize=10)
    plt.plot(df['Threshold'], df['Precision'], 's-', label='Precision', linewidth=2, markersize=10)
    plt.plot(df['Threshold'], df['Recall'], '^-', label='Recall', linewidth=2, markersize=10)
    
    plt.xlabel('Confidence Threshold')
    plt.ylabel('Metric Value')
    plt.title('Model Performance vs. Confidence Threshold')
    plt.grid(True, alpha=0.3)
    plt.legend()
    plt.savefig(str(ANALYSIS_DIR / 'confidence_threshold_analysis.png'), dpi=300, bbox_inches='tight')
    plt.show()
    
    # Return the dataframe for further analysis
    return df

# Visualize sample predictions (limited to just a few images)
def visualize_sample_predictions(model, image_folder, num_samples=2, conf=0.25):
    """Visualize predictions on a few sample images"""
    import random
    import glob
    
    # Find image files
    image_files = []
    for ext in ['jpg', 'jpeg', 'png', 'JPG', 'JPEG', 'PNG']:
        pattern = os.path.join(image_folder, f'*.{ext}')
        found_files = glob.glob(pattern)
        if found_files:
            image_files.extend(found_files[:50])  # Limit to 50 files per extension
    
    if not image_files:
        print(f"No image files found in {image_folder}")
        return
        
    print(f"Found {len(image_files)} image files, selecting {num_samples} for sample visualization")
    
    # Randomly select samples
    if len(image_files) > num_samples:
        samples = random.sample(image_files, num_samples)
    else:
        samples = image_files
    
    # Process each sample
    for image_file in samples:
        try:
            print(f"Image: {os.path.basename(image_file)}")
            results = model.predict(image_file, conf=conf, verbose=False)  # Reduce verbosity
            
            if len(results) > 0 and len(results[0].boxes) > 0:
                # Get results
                boxes = results[0].boxes
                print(f"Found {len(boxes)} detections with confidence > {conf}")
                
                # Print detection information (limit to first 3 to avoid cluttering output)
                max_detections = min(3, len(boxes))
                for i in range(max_detections):
                    box = boxes[i]
                    cls = int(box.cls.item())
                    conf_val = box.conf.item()
                    cls_name = class_names[cls] if cls < len(class_names) else "Unknown"
                    print(f"  Detection {i+1}: {cls_name} (confidence: {conf_val:.2f})")
                
                if len(boxes) > max_detections:
                    print(f"  ... and {len(boxes) - max_detections} more detections")
            else:
                print("  No detections found")
            
            # Show image with predictions
            img = cv2.imread(image_file)
            if img is None:
                print(f"  Warning: Could not read image file {image_file}")
                continue
                
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            
            plt.figure(figsize=(10, 8))
            
            # If results exist, plot them on the image
            if len(results) > 0 and len(results[0].boxes) > 0:
                # Plot image with results
                result_plotted = results[0].plot(line_width=2, font_size=1.5)
                plt.imshow(result_plotted)
            else:
                # Just show the image
                plt.imshow(img)
            
            plt.title(f"Image: {os.path.basename(image_file)}")
            plt.axis('off')
            plt.tight_layout()
            plt.show()
            
        except Exception as e:
            print(f"Error processing image {image_file}: {e}")
        
        print("\n" + "-"*50 + "\n")

# Run analysis with more diagnostic output
try:
    # Run class performance analysis
    print("\nAnalyzing class-specific performance...")
    class_df = visualize_class_performance(eval_results)

    # If class_df was successfully created, continue with group analysis
    if class_df is not None and len(class_df) > 0:
        print("\nAnalyzing performance by taxonomic group...")
        group_df = visualize_group_performance(class_df, taxonomic_groups, class_names)
    else:
        print("Skipping taxonomic group analysis due to missing class data")
        group_df = None

    # Analyze confidence thresholds (with fewer thresholds)
    print("\nAnalyzing impact of confidence thresholds...")
    conf_df = analyze_confidence_thresholds(model, data_yaml, thresholds=[0.25, 0.5])

    # Visualize sample predictions (reduced number)
    print("\nVisualizing sample predictions...")
    val_img_folder = os.path.join(os.path.dirname(data_yaml), 'images/val')
    visualize_sample_predictions(model, val_img_folder, num_samples=2, conf=0.25)

    # Generate simple report
    print("\nGenerating model analysis report...")

    # Safe string formatting for potentially numpy values
    def safe_format(value):
        try:
            return f"{float(value):.4f}"
        except (TypeError, ValueError):
            if hasattr(value, 'mean'):
                return f"{float(value.mean()):.4f}"
            else:
                return "N/A"

    report = f"""# Wildlife Detection Model Analysis Report
Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

## Model Information
- Model: {model_path}
- Data: {data_yaml}

## Overall Performance Metrics
- mAP50: {safe_format(map50)}
- mAP50-95: {safe_format(map50_95)}
- Precision: {safe_format(precision)}
- Recall: {safe_format(recall)}

## Confidence Threshold Analysis
Threshold | mAP50 | Precision | Recall
----------|-------|-----------|-------
"""

    # Add confidence threshold data if available
    if conf_df is not None and len(conf_df) > 0:
        for _, row in conf_df.iterrows():
            report += f"{row['Threshold']:.2f} | {row['mAP50']:.4f} | {row['Precision']:.4f} | {row['Recall']:.4f}\n"

    # Add recommendations
    report += """
## Key Observations

1. The model performs best on Human and Male Roe Deer classes, likely due to their distinctive features and consistent appearance.

2. Some classes with few samples (Weasel, Wildcat) show poor performance, indicating a need for more training data.

3. Small animals like Rabbit show good detection rates, but their small size in images may lead to lower precision.

## Recommendations for Model Improvement

1. **Address Class Imbalance**: 
   - Collect more data for underrepresented classes
   - Use data augmentation techniques for rare species
   - Consider transfer learning from similar species

2. **Environmental Factors**: 
   - Add separate analysis for day/night conditions
   - Consider vegetation density in performance evaluation
   - Analyze distance effects on detection accuracy

3. **Model Architecture Adjustments**:
   - Try different input resolutions for small animals
   - Experiment with different backbone networks
   - Consider specialized models for taxonomic groups

4. **Data Quality Improvements**:
   - Fix corrupt JPEG issues in training data
   - Improve annotation consistency
   - Add metadata about environmental conditions
"""

    # Save report
    with open(ANALYSIS_DIR / 'model_analysis_report.md', 'w') as f:
        f.write(report)

    print(f"Analysis complete! Report saved to: {ANALYSIS_DIR / 'model_analysis_report.md'}")
    
except Exception as e:
    print(f"Error during analysis: {e}")
    import traceback
    traceback.print_exc()

Running model evaluation on validation dataset...


[34m[1mval: [0mScanning /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/data/export/yolo_default_20250429_085945/labels/val.cache... 89 images, 0 backgrounds, 3 corrupt: 100%|██████████| 89/89 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:00<00:00,  6.10it/s]



Overall Performance Metrics:
- mAP50: 0.6236
- mAP50-95: 0.4129
- Precision: 0.7877
- Recall: 0.5138

Analyzing class-specific performance...
Available attributes and methods in results.box:
- all_ap
- ap
- ap50
- ap_class_index
- class_result
- curves
- curves_results
- f1
- f1_curve
- fitness
- map
- map50
- map75
- maps
- mean_results
- mp
- mr
- nc
- p
- p_curve
- prec_values
- px
- r
- r_curve
- update
Could not get per-class AP values from methods, using basic values


<Figure size 1600x1000 with 1 Axes>


Analyzing performance by taxonomic group...


<Figure size 1400x800 with 1 Axes>


Analyzing impact of confidence thresholds...
Evaluating with confidence threshold: 0.25


[34m[1mval: [0mScanning /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/data/export/yolo_default_20250429_085945/labels/val.cache... 89 images, 0 backgrounds, 3 corrupt: 100%|██████████| 89/89 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:01<00:00,  5.98it/s]


Evaluating with confidence threshold: 0.50


[34m[1mval: [0mScanning /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/data/export/yolo_default_20250429_085945/labels/val.cache... 89 images, 0 backgrounds, 3 corrupt: 100%|██████████| 89/89 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 6/6 [00:00<00:00,  6.15it/s]


<Figure size 1200x800 with 1 Axes>


Visualizing sample predictions...
Found 50 image files, selecting 2 for sample visualization
Image: 1488_15_03_24_Моллова_курия_IMAG0345.JPG
Found 1 detections with confidence > 0.25
  Detection 1: Male Roe Deer (confidence: 0.93)


<Figure size 1000x800 with 1 Axes>


--------------------------------------------------

Image: 0516_27_01_2024_100BMCIM_IMAG0017.JPG
Found 1 detections with confidence > 0.25
  Detection 1: Female Roe Deer (confidence: 0.80)


<Figure size 1000x800 with 1 Axes>


--------------------------------------------------


Generating model analysis report...
Analysis complete! Report saved to: /home/peter/Desktop/TU PHD/WildlifeDetectionSystem/reports/model_analysis_20250504_1104/model_analysis_report.md
