In [None]:
# 1. Logging Setup (run first)

import logging

class CustomFormatter(logging.Formatter):
    """Custom formatter with colors for notebook output"""
    grey = "\x1b[38;21m"
    blue = "\x1b[38;5;39m"
    yellow = "\x1b[38;5;226m"
    red = "\x1b[38;5;196m"
    reset = "\x1b[0m"

    def __init__(self):
        super().__init__(fmt="%(asctime)s - %(levelname)s - %(message)s",
                         datefmt="%Y-%m-%d %H:%M:%S")
        self.FORMATS = {
            logging.DEBUG: self.grey,
            logging.INFO: self.blue,
            logging.WARNING: self.yellow,
            logging.ERROR: self.red
        }

    def format(self, record):
        color = self.FORMATS.get(record.levelno, self.grey)
        log_fmt = f"{color}%(asctime)s - %(levelname)s - %(message)s{self.reset}"
        formatter = logging.Formatter(log_fmt, datefmt="%Y-%m-%d %H:%M:%S")
        return formatter.format(record)

# Setup logger
logger = logging.getLogger("YOLOComparison")
logger.setLevel(logging.INFO)
if not logger.handlers:
    console_handler = logging.StreamHandler()
    console_handler.setFormatter(CustomFormatter())
    logger.addHandler(console_handler)
logger.info("Logger initialized")

[38;5;39m2025-06-07 11:19:56 - INFO - Logger initialized[0m
INFO:YOLOComparison:Logger initialized


In [None]:
# Install all required packages (run this first if you get ModuleNotFoundError)
!pip install ultralytics sahi pycocotools pandas seaborn --quiet

In [None]:
# 2. Imports & Config (run after logging setup)

import os
import yaml
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import matplotlib.patches as patches
from ultralytics import YOLO, __version__ as yolo_version
from sklearn.metrics import confusion_matrix

# Google Drive mounting (run only in Google Colab)
from google.colab import drive
drive.mount('/content/drive')

# Path configs (Change if needed)
BASE_DIR = '/content/drive/MyDrive/new scope model'
DATA_YAML = f'{BASE_DIR}/data.yaml'
DATASET_DIR = '/content/drive/MyDrive/Samplesmall_dataset'

logger.info(f"Using Ultralytics YOLO version: {yolo_version}")
for path in [BASE_DIR, DATA_YAML, DATASET_DIR]:
    if not os.path.exists(path):
        logger.error(f"Path not found: {path}")

[38;5;39m2025-06-07 11:20:18 - INFO - Using Ultralytics YOLO version: 8.3.151[0m
INFO:YOLOComparison:Using Ultralytics YOLO version: 8.3.151


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches

def plot_comparison_grid(images, detections_list, class_names, method_names, save_path):
    """
    images: list of np.array images [img1, img2]
    detections_list: list of lists, shape [n_methods][n_images][detections]
    class_names: list of str
    method_names: list of str, e.g. ['YOLOv8', 'Enhanced YOLO', 'SAHI']
    save_path: where to save the output image
    """
    n_images = len(images)
    n_methods = len(method_names)
    fig, axes = plt.subplots(n_images, n_methods, figsize=(5 * n_methods, 5 * n_images))

    for i in range(n_images):
        for j in range(n_methods):
            ax = axes[i, j] if n_images > 1 else axes[j]
            ax.imshow(images[i])
            # Draw detections for this method/image
            for det in detections_list[j][i]:
                bbox = det['bbox']
                label = class_names[det['class_id']]
                conf = det['confidence']
                rect = patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3],
                                         linewidth=2, edgecolor='r', facecolor='none')
                ax.add_patch(rect)
                ax.text(bbox[0], bbox[1]-5, f'{label}: {conf:.2f}',
                        color='white', bbox=dict(facecolor='red', alpha=0.5))
            if i == 0:
                ax.set_title(method_names[j])
            ax.axis('off')
    plt.tight_layout()
    plt.savefig(save_path, dpi=300, bbox_inches='tight')
    plt.close()
    print(f"[INFO] Saved comparison grid to {save_path}")

In [None]:
# 3. ResultsVisualizer Class (data visualization)

class ResultsVisualizer:
    """Handles all visualization tasks"""
    def __init__(self, base_dir):
        self.base_dir = base_dir
        self.timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        self.results_dir = os.path.join(base_dir, f'comparison_results_{self.timestamp}')
        self.dirs = {
            'plots': os.path.join(self.results_dir, 'plots'),
            'metrics': os.path.join(self.results_dir, 'metrics'),
            'detections': os.path.join(self.results_dir, 'detection_examples'),
            'logs': os.path.join(self.results_dir, 'logs')
        }
        for dir_path in self.dirs.values():
            os.makedirs(dir_path, exist_ok=True)
        logger.info(f"Created results directory at {self.results_dir}")

    def plot_metrics_comparison(self, metrics_dict):
        plt.figure(figsize=(12, 8))
        df = pd.DataFrame(metrics_dict).T
        ax = df.plot(kind='bar', width=0.8)
        plt.title('Performance Comparison Across Models')
        plt.xlabel('Model Type')
        plt.ylabel('Score')
        for container in ax.containers:
            ax.bar_label(container, fmt='%.3f')
        plt.tight_layout()
        save_path = os.path.join(self.dirs['plots'], 'metrics_comparison.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()
        csv_path = os.path.join(self.dirs['metrics'], 'metrics_comparison.csv')
        df.to_csv(csv_path)
        markdown_path = os.path.join(self.dirs['metrics'], 'metrics_summary.md')
        with open(markdown_path, 'w') as f:
            f.write("# Model Performance Comparison\n\n")
            f.write(df.to_markdown())
        return df

    def plot_confusion_matrix(self, true_labels, pred_labels, class_names, model_name):
        plt.figure(figsize=(12, 10))
        cm = confusion_matrix(true_labels, pred_labels)
        sns.heatmap(cm, annot=True, fmt='d',
                    xticklabels=class_names,
                    yticklabels=class_names)
        plt.title(f'Confusion Matrix - {model_name}')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        save_path = os.path.join(self.dirs['plots'], f'confusion_matrix_{model_name}.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

    def plot_precision_recall_curves(self, precisions, recalls, model_names):
        plt.figure(figsize=(10, 8))
        for i, model_name in enumerate(model_names):
            plt.plot(recalls[i], precisions[i], label=model_name)
        plt.title('Precision-Recall Curves')
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.legend()
        plt.grid(True)
        save_path = os.path.join(self.dirs['plots'], 'precision_recall_curves.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

    def plot_per_class_map(self, class_maps, class_names, model_names):
        df = pd.DataFrame(class_maps, index=model_names, columns=class_names)
        plt.figure(figsize=(15, 8))
        ax = df.plot(kind='bar', width=0.8)
        plt.title('Per-Class mAP Comparison')
        plt.xlabel('Model')
        plt.ylabel('mAP')
        plt.legend(title='Classes', bbox_to_anchor=(1.05, 1), loc='upper left')
        for container in ax.containers:
            ax.bar_label(container, fmt='%.3f', rotation=90)
        plt.tight_layout()
        save_path = os.path.join(self.dirs['plots'], 'per_class_map.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

    def create_detection_grid(self, images, detections, class_names, model_names):
        n_images = len(images)
        n_models = len(model_names)
        fig, axes = plt.subplots(n_images, n_models, figsize=(5*n_models, 5*n_images))
        for i in range(n_images):
            for j in range(n_models):
                ax = axes[i, j] if n_images > 1 else axes[j]
                ax.imshow(images[i])
                for det in detections[j][i]:
                    bbox = det['bbox']
                    label = class_names[det['class_id']]
                    conf = det['confidence']
                    rect = patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3],
                                             linewidth=2, edgecolor='r', facecolor='none')
                    ax.add_patch(rect)
                    ax.text(bbox[0], bbox[1]-5, f'{label}: {conf:.2f}',
                            color='white', bbox=dict(facecolor='red', alpha=0.5))
                if i == 0:
                    ax.set_title(model_names[j])
                ax.axis('off')
        plt.tight_layout()
        save_path = os.path.join(self.dirs['detections'], 'detection_grid.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

    def save_markdown_report(self, metrics_df, additional_notes=None):
        report_path = os.path.join(self.dirs['metrics'], 'complete_report.md')
        with open(report_path, 'w') as f:
            f.write("# Model Comparison Report\n\n")
            f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            f.write("## Summary Metrics\n")
            f.write(metrics_df.to_markdown())
            f.write("\n\n")
            f.write("## Visualization Directory Structure\n")
            for dir_name, dir_path in self.dirs.items():
                f.write(f"- {dir_name}: {dir_path}\n")
            f.write("\n")
            if additional_notes:
                f.write("## Additional Notes\n")
                f.write(additional_notes)
                f.write("\n")
            f.write("\n## Plots Generated\n")
            f.write("1. Metrics Comparison (Bar Plot)\n")
            f.write("2. Confusion Matrices\n")
            f.write("3. Precision-Recall Curves\n")
            f.write("4. Per-Class mAP Comparison\n")
            f.write("5. Detection Examples Grid\n")

    def generate_all_visualizations(self, results_dict, class_names):
        # 1. Overall metrics comparison
        metrics_df = self.plot_metrics_comparison(results_dict['summary_metrics'])
        # 2. Confusion matrices
        for model_name in results_dict['confusion_matrices']:
            cm_data = results_dict['confusion_matrices'][model_name]
            self.plot_confusion_matrix(
                cm_data['true'],
                cm_data['pred'],
                class_names,
                model_name
            )
        # 3. Precision-recall curves
        self.plot_precision_recall_curves(
            results_dict['precision'],
            results_dict['recall'],
            list(results_dict['summary_metrics'].keys())
        )
        # 4. Per-class mAP
        if 'per_class_map' in results_dict:
            self.plot_per_class_map(
                results_dict['per_class_map'],
                class_names,
                list(results_dict['summary_metrics'].keys())
            )
        # 5. Detection grid
        if 'example_images' in results_dict and 'example_detections' in results_dict:
            self.create_detection_grid(
                results_dict['example_images'],
                results_dict['example_detections'],
                class_names,
                list(results_dict['summary_metrics'].keys())
            )
        # 6. Markdown report
        self.save_markdown_report(
            metrics_df,
            additional_notes=results_dict.get('notes', None)
        )
        logger.info(f"All visualizations saved in {self.results_dir}")
        return self.results_dir

In [None]:
class ModelEvaluator:
    def __init__(self, base_dir, data_yaml, dataset_dir):
        self.base_dir = base_dir
        self.data_yaml = data_yaml
        self.dataset_dir = dataset_dir
        self.visualizer = ResultsVisualizer(base_dir)

        # Load class names from yaml
        with open(data_yaml, 'r') as f:
            self.data_config = yaml.safe_load(f)
        self.class_names = self.data_config['names']

        logger.info(f"Initialized evaluator with {len(self.class_names)} classes")

    def train_and_evaluate_baseline(self):
        """Train and evaluate baseline YOLOv8 model"""
        logger.info("Starting baseline model training...")

        model = YOLO('yolov8n.pt')
        results = model.train(
            data=self.data_yaml,
            epochs=1,
            imgsz=640,
            project=self.base_dir,
            name='baseline_model'
        )

        # Evaluate
        val_results = model.val(data=self.data_yaml)
        return model, val_results

    def train_and_evaluate_enhanced(self):
        """Train and evaluate enhanced model (larger size + TTA)"""
        logger.info("Starting enhanced model training...")

        model = YOLO('yolov8n.pt')
        results = model.train(
            data=self.data_yaml,
            epochs=1,
            imgsz=1024,
            project=self.base_dir,
            name='enhanced_model'
        )

        # Evaluate with TTA
        val_results = model.val(
            data=self.data_yaml,
            imgsz=1024,
            augment=True
        )
        return model, val_results

    def evaluate_with_sahi(self, model_path):
        """Evaluate using SAHI with better error handling"""
        try:
            from sahi import AutoDetectionModel
            from sahi.predict import get_sliced_prediction

            logger.info(f"Loading model from {model_path}")
            if not os.path.exists(model_path):
                raise FileNotFoundError(f"Model file not found: {model_path}")

            detection_model = AutoDetectionModel.from_pretrained(
                model_type='ultralytics',
                model_path=model_path,
                confidence_threshold=0.3,
                device='cuda'
            )

            test_images_dir = os.path.join(self.dataset_dir, 'test/images')
            if not os.path.exists(test_images_dir):
                raise FileNotFoundError(f"Test images dir not found: {test_images_dir}")

            results = []
            for image_name in os.listdir(test_images_dir):
                if image_name.endswith(('.jpg', '.png')):
                    image_path = os.path.join(test_images_dir, image_name)
                    try:
                        result = get_sliced_prediction(
                            image=image_path,
                            detection_model=detection_model,
                            slice_height=512,
                            slice_width=512,
                            overlap_height_ratio=0.2,
                            overlap_width_ratio=0.2
                        )
                        results.append(result)
                        logger.debug(f"Processed {image_name} successfully")
                    except Exception as e:
                        logger.warning(f"Failed to process {image_name}: {str(e)}")
                        continue

            return results

        except Exception as e:
            logger.error(f"SAHI evaluation failed: {str(e)}")
            return []

    def run_complete_evaluation(self):
        """Run complete evaluation pipeline"""
        try:
            # 1. Baseline evaluation
            baseline_model, baseline_results = self.train_and_evaluate_baseline()

            # 2. Enhanced evaluation
            enhanced_model, enhanced_results = self.train_and_evaluate_enhanced()

            # 3. SAHI evaluation
            sahi_results = self.evaluate_with_sahi(
                f'{self.base_dir}/enhanced_model/weights/best.pt'
            )

            # Function for safe metric extraction
            def safe_get_metrics(results):
                """Safe metric extraction with fallbacks"""
                if not hasattr(results, 'box'):
                    logger.error("Validation results missing 'box' attribute")
                    return {
                        'mAP50': 0,
                        'mAP50-95': 0,
                        'recall': 0,
                        'precision': 0
                    }

                box = results.box
                return {
                    'mAP50': getattr(box, 'map50', 0),
                    'mAP50-95': getattr(box, 'map', 0),
                    'recall': getattr(box, 'r', 0),
                    'precision': getattr(box, 'p', 0)
                }

            # 4. Collect all results
            baseline_metrics = safe_get_metrics(baseline_results)
            enhanced_metrics = safe_get_metrics(enhanced_results)

            results_dict = {
                'summary_metrics': {
                    'Baseline': {
                        'mAP50': baseline_metrics['mAP50'],
                        'mAP50-95': baseline_metrics['mAP50-95'],
                        'recall': baseline_metrics['recall']
                    },
                    'Enhanced+TTA': {
                        'mAP50': enhanced_metrics['mAP50'],
                        'mAP50-95': enhanced_metrics['mAP50-95'],
                        'recall': enhanced_metrics['recall']
                    }
                },
                # Dummy confusion matrices
                'confusion_matrices': {
                    'Baseline': {
                        'true': [0]*len(self.class_names),
                        'pred': [0]*len(self.class_names)
                    },
                    'Enhanced+TTA': {
                        'true': [0]*len(self.class_names),
                        'pred': [0]*len(self.class_names)
                    }
                }
            }

            # 5. Generate visualizations
            self.visualizer.generate_all_visualizations(
                results_dict,
                self.class_names
            )

            logger.info("Evaluation completed successfully!")
            return results_dict

        except Exception as e:
            logger.error(f"Error during evaluation: {str(e)}", exc_info=True)
            raise

    def test_metrics_extraction(self):
        """Test metrics extraction works with current YOLO version"""
        from unittest.mock import MagicMock

        # Create mock results object
        mock_results = MagicMock()
        mock_results.box = MagicMock()
        mock_results.box.map50 = 0.5
        mock_results.box.map = 0.4
        mock_results.box.r = 0.3
        mock_results.box.p = 0.6
        mock_results.confusion_matrix = None

        # Since safe_get_metrics is inside run_complete_evaluation, redefining it here
        def safe_get_metrics(results):
            if not hasattr(results, 'box'):
                return {
                    'mAP50': 0,
                    'mAP50-95': 0,
                    'recall': 0,
                    'precision': 0
                }

            box = results.box
            return {
                'mAP50': getattr(box, 'map50', 0),
                'mAP50-95': getattr(box, 'map', 0),
                'recall': getattr(box, 'r', 0),
                'precision': getattr(box, 'p', 0)
            }

        metrics = safe_get_metrics(mock_results)
        assert metrics['mAP50'] == 0.5
        assert metrics['recall'] == 0.3
        logger.info("✅ Metrics extraction test passed!")

        # Test missing attribute handling
        mock_results.box = None
        metrics = safe_get_metrics(mock_results)
        assert metrics['mAP50'] == 0
        logger.info("✅ Error handling test passed!")

In [None]:
class ModelEvaluator:
    def __init__(self, base_dir, data_yaml, dataset_dir):
        self.base_dir = base_dir
        self.data_yaml = data_yaml
        self.dataset_dir = dataset_dir
        self.visualizer = ResultsVisualizer(base_dir)

        # Load class names from yaml
        with open(data_yaml, 'r') as f:
            self.data_config = yaml.safe_load(f)
        self.class_names = self.data_config['names']

        logger.info(f"Initialized evaluator with {len(self.class_names)} classes")

    def train_and_evaluate_baseline(self):
        """Train and evaluate baseline YOLOv8 model"""
        logger.info("Starting baseline model training...")

        model = YOLO('yolov8n.pt')
        results = model.train(
            data=self.data_yaml,
            epochs=2,  # change to 2 for quick testing, increase as needed
            imgsz=640,
            project=self.base_dir,
            name='baseline_model'
        )

        # Evaluate
        val_results = model.val(data=self.data_yaml)
        return model, val_results

    def train_and_evaluate_enhanced(self):
        """Train and evaluate enhanced model (larger size + TTA)"""
        logger.info("Starting enhanced model training...")

        model = YOLO('yolov8n.pt')
        results = model.train(
            data=self.data_yaml,
            epochs=2,  # change to 2 for quick testing, increase as needed
            imgsz=1024,
            project=self.base_dir,
            name='enhanced_model'
        )

        # Evaluate with TTA
        val_results = model.val(
            data=self.data_yaml,
            imgsz=1024,
            augment=True
        )
        return model, val_results

    def evaluate_with_sahi(self, model_path):
        """Evaluate using SAHI with better error handling"""
        try:
            from sahi import AutoDetectionModel
            from sahi.predict import get_sliced_prediction

            logger.info(f"Loading model from {model_path}")
            if not os.path.exists(model_path):
                raise FileNotFoundError(f"Model file not found: {model_path}")

            detection_model = AutoDetectionModel.from_pretrained(
                model_type='ultralytics',
                model_path=model_path,
                confidence_threshold=0.3,
                device='cuda'
            )

            test_images_dir = os.path.join(self.dataset_dir, 'test/images')
            if not os.path.exists(test_images_dir):
                raise FileNotFoundError(f"Test images dir not found: {test_images_dir}")

            results = []
            for image_name in os.listdir(test_images_dir):
                if image_name.endswith(('.jpg', '.png')):
                    image_path = os.path.join(test_images_dir, image_name)
                    try:
                        result = get_sliced_prediction(
                            image=image_path,
                            detection_model=detection_model,
                            slice_height=512,
                            slice_width=512,
                            overlap_height_ratio=0.2,
                            overlap_width_ratio=0.2
                        )
                        results.append(result)
                        logger.debug(f"Processed {image_name} successfully")
                    except Exception as e:
                        logger.warning(f"Failed to process {image_name}: {str(e)}")
                        continue

            return results

        except Exception as e:
            logger.error(f"SAHI evaluation failed: {str(e)}")
            return []

    def run_complete_evaluation(self):
        """Run complete evaluation pipeline"""
        try:
            # 1. Baseline evaluation
            baseline_model, baseline_results = self.train_and_evaluate_baseline()

            # 2. Enhanced evaluation
            enhanced_model, enhanced_results = self.train_and_evaluate_enhanced()

            # 3. SAHI evaluation
            sahi_results = self.evaluate_with_sahi(
                f'{self.base_dir}/enhanced_model/weights/best.pt'
            )

            # Function for safe metric extraction
            def safe_get_metrics(results):
                """Safe metric extraction with fallbacks"""
                if not hasattr(results, 'box'):
                    logger.error("Validation results missing 'box' attribute")
                    return {
                        'mAP50': 0,
                        'mAP50-95': 0,
                        'recall': 0,
                        'precision': 0
                    }

                box = results.box
                return {
                    'mAP50': getattr(box, 'map50', 0),
                    'mAP50-95': getattr(box, 'map', 0),
                    'recall': getattr(box, 'r', 0),
                    'precision': getattr(box, 'p', 0)
                }

            # 4. Collect all results
            baseline_metrics = safe_get_metrics(baseline_results)
            enhanced_metrics = safe_get_metrics(enhanced_results)

            # Dummy per-class precision and recall, for each model, for plotting (set to zeros for now)
            precision_dummy = [
                [0]*len(self.class_names),
                [0]*len(self.class_names)
            ]
            recall_dummy = [
                [0]*len(self.class_names),
                [0]*len(self.class_names)
            ]

            results_dict = {
                'summary_metrics': {
                    'Baseline': {
                        'mAP50': baseline_metrics['mAP50'],
                        'mAP50-95': baseline_metrics['mAP50-95'],
                        'recall': baseline_metrics['recall']
                    },
                    'Enhanced+TTA': {
                        'mAP50': enhanced_metrics['mAP50'],
                        'mAP50-95': enhanced_metrics['mAP50-95'],
                        'recall': enhanced_metrics['recall']
                    }
                },
                # Dummy confusion matrices
                'confusion_matrices': {
                    'Baseline': {
                        'true': [0]*len(self.class_names),
                        'pred': [0]*len(self.class_names)
                    },
                    'Enhanced+TTA': {
                        'true': [0]*len(self.class_names),
                        'pred': [0]*len(self.class_names)
                    }
                },
                # Add dummy precision and recall for plotting
                'precision': precision_dummy,
                'recall': recall_dummy
            }

            # 5. Generate visualizations
            self.visualizer.generate_all_visualizations(
                results_dict,
                self.class_names
            )

            logger.info("Evaluation completed successfully!")
            return results_dict

        except Exception as e:
            logger.error(f"Error during evaluation: {str(e)}", exc_info=True)
            raise

    def test_metrics_extraction(self):
        """Test metrics extraction works with current YOLO version"""
        from unittest.mock import MagicMock

        # Create mock results object
        mock_results = MagicMock()
        mock_results.box = MagicMock()
        mock_results.box.map50 = 0.5
        mock_results.box.map = 0.4
        mock_results.box.r = 0.3
        mock_results.box.p = 0.6
        mock_results.confusion_matrix = None

        # Since safe_get_metrics is inside run_complete_evaluation, redefining it here
        def safe_get_metrics(results):
            if not hasattr(results, 'box'):
                return {
                    'mAP50': 0,
                    'mAP50-95': 0,
                    'recall': 0,
                    'precision': 0
                }

            box = results.box
            return {
                'mAP50': getattr(box, 'map50', 0),
                'mAP50-95': getattr(box, 'map', 0),
                'recall': getattr(box, 'r', 0),
                'precision': getattr(box, 'p', 0)
            }

        metrics = safe_get_metrics(mock_results)
        assert metrics['mAP50'] == 0.5
        assert metrics['recall'] == 0.3
        logger.info("✅ Metrics extraction test passed!")

        # Test missing attribute handling
        mock_results.box = None
        metrics = safe_get_metrics(mock_results)
        assert metrics['mAP50'] == 0
        logger.info("✅ Error handling test passed!")

In [None]:
# 5. Main Pipeline

# Initialize evaluator
evaluator = ModelEvaluator(BASE_DIR, DATA_YAML, DATASET_DIR)
evaluator.test_metrics_extraction()

# Run evaluation
results = evaluator.run_complete_evaluation()

logger.info(f"Results saved in: {evaluator.visualizer.results_dir}")

[38;5;39m2025-06-07 11:21:02 - INFO - Created results directory at /content/drive/MyDrive/new scope model/comparison_results_20250607_112102[0m
INFO:YOLOComparison:Created results directory at /content/drive/MyDrive/new scope model/comparison_results_20250607_112102
[38;5;39m2025-06-07 11:21:02 - INFO - Initialized evaluator with 8 classes[0m
INFO:YOLOComparison:Initialized evaluator with 8 classes
[38;5;39m2025-06-07 11:21:02 - INFO - ✅ Metrics extraction test passed![0m
INFO:YOLOComparison:✅ Metrics extraction test passed!
[38;5;39m2025-06-07 11:21:02 - INFO - ✅ Error handling test passed![0m
INFO:YOLOComparison:✅ Error handling test passed!
[38;5;39m2025-06-07 11:21:02 - INFO - Starting baseline model training...[0m
INFO:YOLOComparison:Starting baseline model training...


Ultralytics 8.3.151 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA A100-SXM4-40GB, 40507MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/new scope model/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=2, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=baseline_model12, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience

[34m[1mtrain: [0mScanning /content/drive/MyDrive/Samplesmall_dataset/train/labels.cache... 800 images, 0 backgrounds, 0 corrupt: 100%|██████████| 800/800 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))







[34m[1mval: [0mScanning /content/drive/MyDrive/Samplesmall_dataset/val/labels.cache... 200 images, 0 backgrounds, 0 corrupt: 100%|██████████| 200/200 [00:00<?, ?it/s]


Plotting labels to /content/drive/MyDrive/new scope model/baseline_model12/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000833, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1m/content/drive/MyDrive/new scope model/baseline_model12[0m
Starting training for 2 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/2      2.24G      1.859      2.989      1.262        156        640: 100%|██████████| 50/50 [00:05<00:00,  8.80it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:00<00:00,  7.76it/s]


                   all        200       1235     0.0148      0.365      0.106      0.058

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/2      2.26G      1.662      1.823      1.186        205        640: 100%|██████████| 50/50 [00:04<00:00, 10.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:00<00:00,  7.40it/s]


                   all        200       1235       0.82      0.118      0.243      0.133

2 epochs completed in 0.004 hours.
Optimizer stripped from /content/drive/MyDrive/new scope model/baseline_model12/weights/last.pt, 6.2MB
Optimizer stripped from /content/drive/MyDrive/new scope model/baseline_model12/weights/best.pt, 6.2MB

Validating /content/drive/MyDrive/new scope model/baseline_model12/weights/best.pt...
Ultralytics 8.3.151 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA A100-SXM4-40GB, 40507MiB)
Model summary (fused): 72 layers, 3,007,208 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:01<00:00,  4.89it/s]


                   all        200       1235      0.821      0.118      0.242      0.134
                   Car        104        491      0.923      0.387      0.688      0.424
            Pedestrian        200        530      0.842      0.303       0.54      0.236
                   Van         60         75      0.719       0.16      0.229      0.153
               Cyclist         53         90       0.66     0.0218      0.166       0.07
        Person_sitting          7         19          1          0      0.115     0.0655
                  Misc          6          6          1          0    0.00147   0.000441
                 Truck         13         14      0.422     0.0714      0.177      0.115
                  Tram          3         10          1          0     0.0217     0.0101
Speed: 0.1ms preprocess, 0.4ms inference, 0.0ms loss, 1.3ms postprocess per image
Results saved to [1m/content/drive/MyDrive/new scope model/baseline_model12[0m
Ultralytics 8.3.151 🚀 Python-3.11.13

[34m[1mval: [0mScanning /content/drive/MyDrive/Samplesmall_dataset/val/labels.cache... 200 images, 0 backgrounds, 0 corrupt: 100%|██████████| 200/200 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:01<00:00,  6.58it/s]


                   all        200       1235      0.819      0.118      0.242      0.134
                   Car        104        491      0.926      0.389      0.688      0.424
            Pedestrian        200        530      0.833      0.298      0.538      0.237
                   Van         60         75      0.718       0.16      0.229      0.153
               Cyclist         53         90      0.659     0.0217      0.166     0.0715
        Person_sitting          7         19          1          0      0.115     0.0654
                  Misc          6          6          1          0    0.00148   0.000482
                 Truck         13         14      0.413     0.0714      0.177      0.115
                  Tram          3         10          1          0     0.0217     0.0101
Speed: 0.2ms preprocess, 0.7ms inference, 0.0ms loss, 3.6ms postprocess per image
Results saved to [1m/content/drive/MyDrive/new scope model/baseline_model122[0m


[38;5;39m2025-06-07 11:21:35 - INFO - Starting enhanced model training...[0m
INFO:YOLOComparison:Starting enhanced model training...


Ultralytics 8.3.151 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA A100-SXM4-40GB, 40507MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/new scope model/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=2, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1024, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=enhanced_model6, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience

[34m[1mtrain: [0mScanning /content/drive/MyDrive/Samplesmall_dataset/train/labels.cache... 800 images, 0 backgrounds, 0 corrupt: 100%|██████████| 800/800 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))





[34m[1mval: [0mFast image access ✅ (ping: 0.5±0.2 ms, read: 219.9±113.2 MB/s, size: 800.0 KB)


[34m[1mval: [0mScanning /content/drive/MyDrive/Samplesmall_dataset/val/labels.cache... 200 images, 0 backgrounds, 0 corrupt: 100%|██████████| 200/200 [00:00<?, ?it/s]


Plotting labels to /content/drive/MyDrive/new scope model/enhanced_model6/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000833, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 1024 train, 1024 val
Using 8 dataloader workers
Logging results to [1m/content/drive/MyDrive/new scope model/enhanced_model6[0m
Starting training for 2 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/2      5.57G      1.763       3.09      1.313        158       1024: 100%|██████████| 50/50 [00:07<00:00,  6.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:01<00:00,  6.17it/s]

                   all        200       1235      0.021      0.518      0.134     0.0727






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/2      5.59G      1.534      2.023      1.222        205       1024: 100%|██████████| 50/50 [00:07<00:00,  6.70it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:01<00:00,  5.77it/s]

                   all        200       1235      0.871      0.137      0.283      0.166






2 epochs completed in 0.005 hours.
Optimizer stripped from /content/drive/MyDrive/new scope model/enhanced_model6/weights/last.pt, 6.2MB
Optimizer stripped from /content/drive/MyDrive/new scope model/enhanced_model6/weights/best.pt, 6.2MB

Validating /content/drive/MyDrive/new scope model/enhanced_model6/weights/best.pt...
Ultralytics 8.3.151 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (NVIDIA A100-SXM4-40GB, 40507MiB)
Model summary (fused): 72 layers, 3,007,208 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 7/7 [00:01<00:00,  4.45it/s]


                   all        200       1235       0.87      0.137      0.283      0.166
                   Car        104        491      0.913        0.3      0.713       0.46
            Pedestrian        200        530      0.955      0.198      0.562      0.257
                   Van         60         75      0.603      0.182       0.26      0.197
               Cyclist         53         90      0.816      0.133      0.398      0.185
        Person_sitting          7         19          1          0     0.0137    0.00624
                  Misc          6          6          1          0   0.000116   1.16e-05
                 Truck         13         14      0.673      0.286      0.289      0.209
                  Tram          3         10          1          0     0.0257     0.0104
Speed: 0.1ms preprocess, 1.4ms inference, 0.0ms loss, 3.4ms postprocess per image
Results saved to [1m/content/drive/MyDrive/new scope model/enhanced_model6[0m
Ultralytics 8.3.151 🚀 Python-3.11.13 

[34m[1mval: [0mScanning /content/drive/MyDrive/Samplesmall_dataset/val/labels.cache... 200 images, 0 backgrounds, 0 corrupt: 100%|██████████| 200/200 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 13/13 [00:03<00:00,  4.31it/s]


                   all        200       1235      0.846      0.123      0.257      0.146
                   Car        104        491      0.934       0.32      0.694      0.445
            Pedestrian        200        530      0.956      0.123      0.541      0.252
                   Van         60         75      0.451       0.11      0.194      0.135
               Cyclist         53         90      0.871       0.15      0.287      0.132
        Person_sitting          7         19          1          0     0.0364     0.0179
                  Misc          6          6          1          0   0.000559   5.59e-05
                 Truck         13         14      0.552      0.286      0.243      0.158
                  Tram          3         10          1          0     0.0575      0.027
Speed: 1.1ms preprocess, 3.5ms inference, 0.0ms loss, 3.6ms postprocess per image
Results saved to [1m/content/drive/MyDrive/new scope model/enhanced_model62[0m


[38;5;39m2025-06-07 11:22:12 - INFO - Loading model from /content/drive/MyDrive/new scope model/enhanced_model/weights/best.pt[0m
INFO:YOLOComparison:Loading model from /content/drive/MyDrive/new scope model/enhanced_model/weights/best.pt


Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing predictio

[38;5;39m2025-06-07 11:22:50 - INFO - All visualizations saved in /content/drive/MyDrive/new scope model/comparison_results_20250607_112102[0m
INFO:YOLOComparison:All visualizations saved in /content/drive/MyDrive/new scope model/comparison_results_20250607_112102
[38;5;39m2025-06-07 11:22:50 - INFO - Evaluation completed successfully![0m
INFO:YOLOComparison:Evaluation completed successfully!
[38;5;39m2025-06-07 11:22:50 - INFO - Results saved in: /content/drive/MyDrive/new scope model/comparison_results_20250607_112102[0m
INFO:YOLOComparison:Results saved in: /content/drive/MyDrive/new scope model/comparison_results_20250607_112102


<Figure size 1200x800 with 0 Axes>