In [2]:
# 1. Initial Setup and Imports
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [31]:
# Install required packages
!pip install ultralytics sahi pycocotools pandas seaborn --quiet

import os
import yaml
import json
import logging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from ultralytics import YOLO
from sklearn.metrics import confusion_matrix
import matplotlib.patches as patches
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from ultralytics import __version__ as yolo_version

# First setup the logger
class CustomFormatter(logging.Formatter):
    """Custom formatter with colors"""
    grey = "\x1b[38;21m"
    blue = "\x1b[38;5;39m"
    yellow = "\x1b[38;5;226m"
    red = "\x1b[38;5;196m"
    reset = "\x1b[0m"

    def __init__(self):
        super().__init__(fmt="%(asctime)s - %(levelname)s - %(message)s",
                        datefmt="%Y-%m-%d %H:%M:%S")
        self.FORMATS = {
            logging.DEBUG: self.grey,
            logging.INFO: self.blue,
            logging.WARNING: self.yellow,
            logging.ERROR: self.red
        }

    def format(self, record):
        color = self.FORMATS.get(record.levelno, self.grey)
        log_fmt = f"{color}%(asctime)s - %(levelname)s - %(message)s{self.reset}"
        formatter = logging.Formatter(log_fmt, datefmt="%Y-%m-%d %H:%M:%S")
        return formatter.format(record)

# Setup logger (must come before any logger.info() calls)
logger = logging.getLogger("YOLOComparison")
logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler()
console_handler.setFormatter(CustomFormatter())
logger.addHandler(console_handler)

# Now you can use the logger
logger.info(f"Using Ultralytics YOLO version: {yolo_version}")

# Version compatibility check
if yolo_version < '8.0.0':
    logger.warning("This code was tested with YOLOv8.0.0+, older versions may need adjustments")

[38;5;39m2025-06-06 18:37:19 - INFO - Using Ultralytics YOLO version: 8.3.151[0m
[38;5;39m2025-06-06 18:37:19 - INFO - Using Ultralytics YOLO version: 8.3.151[0m
[38;5;39m2025-06-06 18:37:19 - INFO - Using Ultralytics YOLO version: 8.3.151[0m
[38;5;39m2025-06-06 18:37:19 - INFO - Using Ultralytics YOLO version: 8.3.151[0m
[38;5;39m2025-06-06 18:37:19 - INFO - Using Ultralytics YOLO version: 8.3.151[0m
INFO:YOLOComparison:Using Ultralytics YOLO version: 8.3.151


In [32]:
# 2. Configuration
BASE_DIR = '/content/drive/MyDrive/new scope model'
DATA_YAML = f'{BASE_DIR}/data.yaml'
DATASET_DIR = '/content/drive/MyDrive/Samplesmall_dataset'

In [33]:
# 3. Setup Logging
class CustomFormatter(logging.Formatter):
    """Custom formatter with colors"""
    grey = "\x1b[38;21m"
    blue = "\x1b[38;5;39m"
    yellow = "\x1b[38;5;226m"
    red = "\x1b[38;5;196m"
    reset = "\x1b[0m"

    def __init__(self):
        super().__init__(fmt="%(asctime)s - %(levelname)s - %(message)s",
                        datefmt="%Y-%m-%d %H:%M:%S")
        self.FORMATS = {
            logging.DEBUG: self.grey,
            logging.INFO: self.blue,
            logging.WARNING: self.yellow,
            logging.ERROR: self.red
        }

    def format(self, record):
        color = self.FORMATS.get(record.levelno, self.grey)
        log_fmt = f"{color}%(asctime)s - %(levelname)s - %(message)s{self.reset}"
        formatter = logging.Formatter(log_fmt, datefmt="%Y-%m-%d %H:%M:%S")
        return formatter.format(record)

# Setup logger
logger = logging.getLogger("YOLOComparison")
logger.setLevel(logging.INFO)
console_handler = logging.StreamHandler()
console_handler.setFormatter(CustomFormatter())
logger.addHandler(console_handler)


In [34]:
# 4. Visualization Class
class ResultsVisualizer:
    """Handles all visualization tasks"""

    def __init__(self, base_dir):
        self.base_dir = base_dir
        self.timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

        # Create organized directory structure
        self.results_dir = os.path.join(base_dir, f'comparison_results_{self.timestamp}')
        self.dirs = {
            'plots': os.path.join(self.results_dir, 'plots'),
            'metrics': os.path.join(self.results_dir, 'metrics'),
            'detections': os.path.join(self.results_dir, 'detection_examples'),
            'logs': os.path.join(self.results_dir, 'logs')
        }

        # Create all directories
        for dir_path in self.dirs.values():
            os.makedirs(dir_path, exist_ok=True)

        # Setup file logging
        file_handler = logging.FileHandler(os.path.join(self.dirs['logs'], 'evaluation.log'))
        file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
        logger.addHandler(file_handler)

        logger.info(f"Created results directory at {self.results_dir}")

In [35]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import matplotlib.patches as patches
from sklearn.metrics import confusion_matrix

In [36]:
class ModelEvaluator:
    def __init__(self, base_dir, data_yaml):
        self.base_dir = base_dir
        self.data_yaml = data_yaml
        self.visualizer = ResultsVisualizer(base_dir)

        # Load class names from yaml
        with open(data_yaml, 'r') as f:
            self.data_config = yaml.safe_load(f)
        self.class_names = self.data_config['names']

    def collect_evaluation_data(self, baseline_results, enhanced_results, sahi_results):
        """Collect all evaluation data for visualization"""

        # Collect metrics for each model
        results_dict = {
            'summary_metrics': {
                'Baseline': {
                    'mAP50': baseline_results.box.map50,
                    'mAP50-95': baseline_results.box.map,
                    'Mean Precision': baseline_results.box.mp(),  # Using mp() method
                    'Mean Recall': baseline_results.box.mr()  # Using mr() method
                },
                'Enhanced+TTA': {
                    'mAP50': enhanced_results.box.map50,
                    'mAP50-95': enhanced_results.box.map,
                    'Mean Precision': enhanced_results.box.mp(),  # Using mp() method
                    'Mean Recall': enhanced_results.box.mr()  # Using mr() method
                }
            },
            'confusion_matrices': {
                'Baseline': {
                    'true': baseline_results.box.confusion_matrix.matrix.T,  # Transpose for correct orientation
                    'pred': range(len(self.class_names))
                },
                'Enhanced+TTA': {
                    'true': enhanced_results.box.confusion_matrix.matrix.T,  # Transpose for correct orientation
                    'pred': range(len(self.class_names))
                }
            },
            'precision': [
                baseline_results.box.p,  # Using precision attribute 'p'
                enhanced_results.box.p
            ],
            'recall': [
                baseline_results.box.r,  # Using recall attribute 'r'
                enhanced_results.box.r
            ]
        }

        def get_metrics(results):
            """Safe metric extraction with fallbacks"""
            metrics = {
                'mAP50': getattr(results.box, 'map50', 0),
                'mAP50-95': getattr(results.box, 'map', 0),
                'recall': getattr(results.box, 'r', getattr(results.box, 'recall', 0)),
                'precision': getattr(results.box, 'p', 0)
            }
            return metrics

        # Then use it like this:
        baseline_metrics = get_metrics(baseline_results)
        enhanced_metrics = get_metrics(enhanced_results)


In [37]:
 def __init__(self, base_dir):
        self.base_dir = base_dir
        self.timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

        # Create organized directory structure
        self.results_dir = os.path.join(base_dir, f'comparison_results_{self.timestamp}')
        self.dirs = {
            'plots': os.path.join(self.results_dir, 'plots'),
            'metrics': os.path.join(self.results_dir, 'metrics'),
            'detections': os.path.join(self.results_dir, 'detection_examples'),
            'logs': os.path.join(self.results_dir, 'logs')
        }

        # Create all directories
        for dir_path in self.dirs.values():
            os.makedirs(dir_path, exist_ok=True)

        # Setup file logging
        file_handler = logging.FileHandler(os.path.join(self.dirs['logs'], 'evaluation.log'))
        file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
        logger.addHandler(file_handler)

        logger.info(f"Created results directory at {self.results_dir}")

In [38]:
class ResultsVisualizer:
    """Handles all visualization tasks"""

    def __init__(self, base_dir):
        self.base_dir = base_dir
        self.timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

        # Create organized directory structure
        self.results_dir = os.path.join(base_dir, f'comparison_results_{self.timestamp}')
        self.dirs = {
            'plots': os.path.join(self.results_dir, 'plots'),
            'metrics': os.path.join(self.results_dir, 'metrics'),
            'detections': os.path.join(self.results_dir, 'detection_examples'),
            'logs': os.path.join(self.results_dir, 'logs')
        }

        # Create all directories
        for dir_path in self.dirs.values():
            os.makedirs(dir_path, exist_ok=True)

        logger.info(f"Created results directory structure at {self.results_dir}")

    def plot_metrics_comparison(self, metrics_dict):
        """Create grouped bar plot for metrics comparison"""
        plt.figure(figsize=(12, 8))
        df = pd.DataFrame(metrics_dict).T

        # Create grouped bar plot
        ax = df.plot(kind='bar', width=0.8)
        plt.title('Performance Comparison Across Models')
        plt.xlabel('Model Type')
        plt.ylabel('Score')

        # Add value labels
        for container in ax.containers:
            ax.bar_label(container, fmt='%.3f')

        plt.tight_layout()
        save_path = os.path.join(self.dirs['plots'], 'metrics_comparison.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

        # Save metrics to CSV
        csv_path = os.path.join(self.dirs['metrics'], 'metrics_comparison.csv')
        df.to_csv(csv_path)

        # Create markdown table
        markdown_path = os.path.join(self.dirs['metrics'], 'metrics_summary.md')
        with open(markdown_path, 'w') as f:
            f.write("# Model Performance Comparison\n\n")
            f.write(df.to_markdown())

        return df

    def plot_confusion_matrix(self, true_labels, pred_labels, class_names, model_name):
        """Plot confusion matrix for each model"""
        plt.figure(figsize=(12, 10))
        cm = confusion_matrix(true_labels, pred_labels)
        sns.heatmap(cm, annot=True, fmt='d',
                   xticklabels=class_names,
                   yticklabels=class_names)
        plt.title(f'Confusion Matrix - {model_name}')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')

        save_path = os.path.join(self.dirs['plots'], f'confusion_matrix_{model_name}.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

    def plot_precision_recall_curves(self, precisions, recalls, model_names):
        """Plot precision-recall curves for all models"""
        plt.figure(figsize=(10, 8))
        for i, model_name in enumerate(model_names):
            plt.plot(recalls[i], precisions[i], label=model_name)

        plt.title('Precision-Recall Curves')
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.legend()
        plt.grid(True)

        save_path = os.path.join(self.dirs['plots'], 'precision_recall_curves.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

    def plot_per_class_map(self, class_maps, class_names, model_names):
        """Plot per-class mAP comparison"""
        df = pd.DataFrame(class_maps, index=model_names, columns=class_names)

        plt.figure(figsize=(15, 8))
        ax = df.plot(kind='bar', width=0.8)
        plt.title('Per-Class mAP Comparison')
        plt.xlabel('Model')
        plt.ylabel('mAP')
        plt.legend(title='Classes', bbox_to_anchor=(1.05, 1), loc='upper left')

        # Add value labels
        for container in ax.containers:
            ax.bar_label(container, fmt='%.3f', rotation=90)

        plt.tight_layout()
        save_path = os.path.join(self.dirs['plots'], 'per_class_map.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

    def create_detection_grid(self, images, detections, class_names, model_names):
        """Create a grid of detection examples"""
        n_images = len(images)
        n_models = len(model_names)

        fig, axes = plt.subplots(n_images, n_models,
                                figsize=(5*n_models, 5*n_images))

        for i in range(n_images):
            for j in range(n_models):
                if n_images == 1:
                    ax = axes[j]
                else:
                    ax = axes[i, j]

                ax.imshow(images[i])

                # Plot detections
                for det in detections[j][i]:
                    bbox = det['bbox']
                    label = class_names[det['class_id']]
                    conf = det['confidence']

                    rect = patches.Rectangle(
                        (bbox[0], bbox[1]), bbox[2], bbox[3],
                        linewidth=2, edgecolor='r', facecolor='none'
                    )
                    ax.add_patch(rect)
                    ax.text(bbox[0], bbox[1]-5,
                           f'{label}: {conf:.2f}',
                           color='white', bbox=dict(facecolor='red', alpha=0.5))

                if i == 0:
                    ax.set_title(model_names[j])
                ax.axis('off')

        plt.tight_layout()
        save_path = os.path.join(self.dirs['detections'], 'detection_grid.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

    def save_markdown_report(self, metrics_df, additional_notes=None):
        """Create a comprehensive markdown report"""
        report_path = os.path.join(self.dirs['metrics'], 'complete_report.md')

        with open(report_path, 'w') as f:
            f.write("# Model Comparison Report\n\n")
            f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")

            f.write("## Summary Metrics\n")
            f.write(metrics_df.to_markdown())
            f.write("\n\n")

            f.write("## Visualization Directory Structure\n")
            for dir_name, dir_path in self.dirs.items():
                f.write(f"- {dir_name}: {dir_path}\n")
            f.write("\n")

            if additional_notes:
                f.write("## Additional Notes\n")
                f.write(additional_notes)
                f.write("\n")

            f.write("\n## Plots Generated\n")
            f.write("1. Metrics Comparison (Bar Plot)\n")
            f.write("2. Confusion Matrices\n")
            f.write("3. Precision-Recall Curves\n")
            f.write("4. Per-Class mAP Comparison\n")
            f.write("5. Detection Examples Grid\n")

def generate_all_visualizations(results_dict, class_names, base_dir):
    """Main function to generate all visualizations"""
    visualizer = ResultsVisualizer(base_dir)

    # 1. Overall metrics comparison
    metrics_df = visualizer.plot_metrics_comparison(results_dict['summary_metrics'])

    # 2. Confusion matrices
    for model_name in results_dict['confusion_matrices']:
        visualizer.plot_confusion_matrix(
            results_dict['confusion_matrices'][model_name]['true'],
            results_dict['confusion_matrices'][model_name]['pred'],
            class_names,
            model_name
        )

    # 3. Precision-recall curves
    visualizer.plot_precision_recall_curves(
        results_dict['precision'],
        results_dict['recall'],
        list(results_dict['summary_metrics'].keys())
    )

    # 4. Per-class mAP
    visualizer.plot_per_class_map(
        results_dict['per_class_map'],
        class_names,
        list(results_dict['summary_metrics'].keys())
    )

    # 5. Detection grid
    visualizer.create_detection_grid(
        results_dict['example_images'],
        results_dict['example_detections'],
        class_names,
        list(results_dict['summary_metrics'].keys())
    )

    # 6. Generate markdown report
    visualizer.save_markdown_report(
        metrics_df,
        additional_notes=results_dict.get('notes', None)
    )

    logger.info(f"All visualizations saved in {visualizer.results_dir}")
    return visualizer.results_dir

In [39]:
class ModelEvaluator:
    def __init__(self, base_dir, data_yaml):
        self.base_dir = base_dir
        self.data_yaml = data_yaml
        self.visualizer = ResultsVisualizer(base_dir)

        # Load class names from yaml
        with open(data_yaml, 'r') as f:
            self.data_config = yaml.safe_load(f)
        self.class_names = self.data_config['names']

        logger.info(f"Initialized evaluator with {len(self.class_names)} classes")

    def train_and_evaluate_baseline(self):
        """Train and evaluate baseline YOLOv8 model"""
        logger.info("Starting baseline model training...")

        model = YOLO('yolov8n.pt')
        results = model.train(
            data=self.data_yaml,
            epochs=2,
            imgsz=640,
            project=self.base_dir,
            name='baseline_model'
        )

        # Evaluate
        val_results = model.val(data=self.data_yaml)
        return model, val_results

    def train_and_evaluate_enhanced(self):
        """Train and evaluate enhanced model (larger size + TTA)"""
        logger.info("Starting enhanced model training...")

        model = YOLO('yolov8n.pt')
        results = model.train(
            data=self.data_yaml,
            epochs=2,
            imgsz=1024,
            project=self.base_dir,
            name='enhanced_model'
        )

        # Evaluate with TTA
        val_results = model.val(
            data=self.data_yaml,
            imgsz=1024,
            augment=True
        )
        return model, val_results

    def evaluate_with_sahi(self, model_path):
        """Evaluate using SAHI with better error handling"""
        try:
            from sahi import AutoDetectionModel
            from sahi.predict import get_sliced_prediction

            logger.info(f"Loading model from {model_path}")
            if not os.path.exists(model_path):
                raise FileNotFoundError(f"Model file not found: {model_path}")

            detection_model = AutoDetectionModel.from_pretrained(
                model_type='ultralytics',
                model_path=model_path,
                confidence_threshold=0.3,
                device='cuda'
            )

            test_images_dir = os.path.join(DATASET_DIR, 'test/images')
            if not os.path.exists(test_images_dir):
                raise FileNotFoundError(f"Test images dir not found: {test_images_dir}")

            results = []
            for image_name in os.listdir(test_images_dir):
                if image_name.endswith(('.jpg', '.png')):
                    image_path = os.path.join(test_images_dir, image_name)
                    try:
                        result = get_sliced_prediction(
                            image=image_path,
                            detection_model=detection_model,
                            slice_height=512,
                            slice_width=512,
                            overlap_height_ratio=0.2,
                            overlap_width_ratio=0.2
                        )
                        results.append(result)
                        logger.debug(f"Processed {image_name} successfully")
                    except Exception as e:
                        logger.warning(f"Failed to process {image_name}: {str(e)}")
                        continue

            return results

        except Exception as e:
            logger.error(f"SAHI evaluation failed: {str(e)}")
            return []

    def run_complete_evaluation(self):
        """Run complete evaluation pipeline"""
        try:
            # 1. Baseline evaluation
            baseline_model, baseline_results = self.train_and_evaluate_baseline()

            # 2. Enhanced evaluation
            enhanced_model, enhanced_results = self.train_and_evaluate_enhanced()

            # 3. SAHI evaluation
            sahi_results = self.evaluate_with_sahi(
                f'{self.base_dir}/enhanced_model/weights/best.pt'
            )

            # Function for safe metric extraction
            def safe_get_metrics(results):
                """Safe metric extraction with fallbacks"""
                if not hasattr(results, 'box'):
                    logger.error("Validation results missing 'box' attribute")
                    return {
                        'mAP50': 0,
                        'mAP50-95': 0,
                        'recall': 0,
                        'precision': 0
                    }

                box = results.box
                return {
                    'mAP50': getattr(box, 'map50', 0),
                    'mAP50-95': getattr(box, 'map', 0),
                    'recall': getattr(box, 'r', 0),
                    'precision': getattr(box, 'p', 0)
                }

            # 4. Collect all results
            baseline_metrics = safe_get_metrics(baseline_results)
            enhanced_metrics = safe_get_metrics(enhanced_results)

            results_dict = {
                'summary_metrics': {
                    'Baseline': {
                        'mAP50': baseline_metrics['mAP50'],
                        'mAP50-95': baseline_metrics['mAP50-95'],
                        'recall': baseline_metrics['recall']
                    },
                    'Enhanced+TTA': {
                        'mAP50': enhanced_metrics['mAP50'],
                        'mAP50-95': enhanced_metrics['mAP50-95'],
                        'recall': enhanced_metrics['recall']
                    }
                },
                'confusion_matrices': {
                    'Baseline': baseline_results.confusion_matrix,
                    'Enhanced+TTA': enhanced_results.confusion_matrix
                }
            }

            # 5. Generate visualizations
            self.visualizer.generate_all_visualizations(
                results_dict,
                self.class_names
            )

            logger.info("Evaluation completed successfully!")
            return results_dict

        except Exception as e:
            logger.error(f"Error during evaluation: {str(e)}", exc_info=True)
            raise

    def test_metrics_extraction(self):
        """Test metrics extraction works with current YOLO version"""
        from unittest.mock import MagicMock

        # Create mock results object
        mock_results = MagicMock()
        mock_results.box = MagicMock()
        mock_results.box.map50 = 0.5
        mock_results.box.map = 0.4
        mock_results.box.r = 0.3
        mock_results.box.p = 0.6
        mock_results.confusion_matrix = None

        # Since safe_get_metrics is inside run_complete_evaluation, redefining it here
        def safe_get_metrics(results):
            if not hasattr(results, 'box'):
                return {
                    'mAP50': 0,
                    'mAP50-95': 0,
                    'recall': 0,
                    'precision': 0
                }

            box = results.box
            return {
                'mAP50': getattr(box, 'map50', 0),
                'mAP50-95': getattr(box, 'map', 0),
                'recall': getattr(box, 'r', 0),
                'precision': getattr(box, 'p', 0)
            }

        metrics = safe_get_metrics(mock_results)
        assert metrics['mAP50'] == 0.5
        assert metrics['recall'] == 0.3
        logger.info("✅ Metrics extraction test passed!")

        # Test missing attribute handling
        mock_results.box = None
        metrics = safe_get_metrics(mock_results)
        assert metrics['mAP50'] == 0
        logger.info("✅ Error handling test passed!")


In [27]:
def main():
    logger.info("Starting YOLOv8 comparison pipeline...")

    # Verify paths
    for path in [BASE_DIR, DATA_YAML, DATASET_DIR]:
        if not os.path.exists(path):
            raise FileNotFoundError(f"Path not found: {path}")

    # Initialize evaluator
    evaluator = ModelEvaluator(BASE_DIR, DATA_YAML)
    evaluator.test_metrics_extraction()

    # Run evaluation
    results = evaluator.run_complete_evaluation()

    logger.info(f"Results saved in: {evaluator.visualizer.results_dir}")
    return results

if __name__ == "__main__":
    # Mount Google Drive
    drive.mount('/content/drive')

    # Run the evaluation
    results = main()

KeyboardInterrupt: 

In [30]:
import os
import logging
from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.metrics import confusion_matrix
import matplotlib.patches as patches

logger = logging.getLogger()
logger.setLevel(logging.INFO)

class ResultsVisualizer:
    """Handles all visualization tasks"""

    def __init__(self, base_dir):
        self.base_dir = base_dir
        self.timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')

        # Create organized directory structure
        self.results_dir = os.path.join(base_dir, f'comparison_results_{self.timestamp}')
        self.dirs = {
            'plots': os.path.join(self.results_dir, 'plots'),
            'metrics': os.path.join(self.results_dir, 'metrics'),
            'detections': os.path.join(self.results_dir, 'detection_examples'),
            'logs': os.path.join(self.results_dir, 'logs')
        }

        # Create all directories
        for dir_path in self.dirs.values():
            os.makedirs(dir_path, exist_ok=True)

        # Setup file logging
        file_handler = logging.FileHandler(os.path.join(self.dirs['logs'], 'evaluation.log'))
        file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
        logger.addHandler(file_handler)

        logger.info(f"Created results directory at {self.results_dir}")

    def plot_metrics_comparison(self, metrics_dict):
        """Create grouped bar plot for metrics comparison"""
        plt.figure(figsize=(12, 8))
        df = pd.DataFrame(metrics_dict).T

        ax = df.plot(kind='bar', width=0.8)
        plt.title('Performance Comparison Across Models')
        plt.xlabel('Model Type')
        plt.ylabel('Score')

        for container in ax.containers:
            ax.bar_label(container, fmt='%.3f')

        plt.tight_layout()
        save_path = os.path.join(self.dirs['plots'], 'metrics_comparison.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

        csv_path = os.path.join(self.dirs['metrics'], 'metrics_comparison.csv')
        df.to_csv(csv_path)

        markdown_path = os.path.join(self.dirs['metrics'], 'metrics_summary.md')
        with open(markdown_path, 'w') as f:
            f.write("# Model Performance Comparison\n\n")
            f.write(df.to_markdown())

        return df

    def plot_confusion_matrix(self, true_labels, pred_labels, class_names, model_name):
        """Plot confusion matrix for each model"""
        plt.figure(figsize=(12, 10))
        cm = confusion_matrix(true_labels, pred_labels)
        sns.heatmap(cm, annot=True, fmt='d',
                    xticklabels=class_names,
                    yticklabels=class_names)
        plt.title(f'Confusion Matrix - {model_name}')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')

        save_path = os.path.join(self.dirs['plots'], f'confusion_matrix_{model_name}.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

    def plot_precision_recall_curves(self, precisions, recalls, model_names):
        """Plot precision-recall curves for all models"""
        plt.figure(figsize=(10, 8))
        for i, model_name in enumerate(model_names):
            plt.plot(recalls[i], precisions[i], label=model_name)

        plt.title('Precision-Recall Curves')
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.legend()
        plt.grid(True)

        save_path = os.path.join(self.dirs['plots'], 'precision_recall_curves.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

    def plot_per_class_map(self, class_maps, class_names, model_names):
        """Plot per-class mAP comparison"""
        df = pd.DataFrame(class_maps, index=model_names, columns=class_names)

        plt.figure(figsize=(15, 8))
        ax = df.plot(kind='bar', width=0.8)
        plt.title('Per-Class mAP Comparison')
        plt.xlabel('Model')
        plt.ylabel('mAP')
        plt.legend(title='Classes', bbox_to_anchor=(1.05, 1), loc='upper left')

        for container in ax.containers:
            ax.bar_label(container, fmt='%.3f', rotation=90)

        plt.tight_layout()
        save_path = os.path.join(self.dirs['plots'], 'per_class_map.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

    def create_detection_grid(self, images, detections, class_names, model_names):
        """Create a grid of detection examples"""
        n_images = len(images)
        n_models = len(model_names)

        fig, axes = plt.subplots(n_images, n_models, figsize=(5*n_models, 5*n_images))

        for i in range(n_images):
            for j in range(n_models):
                if n_images == 1:
                    ax = axes[j]
                else:
                    ax = axes[i, j]

                ax.imshow(images[i])

                for det in detections[j][i]:
                    bbox = det['bbox']
                    label = class_names[det['class_id']]
                    conf = det['confidence']

                    rect = patches.Rectangle(
                        (bbox[0], bbox[1]), bbox[2], bbox[3],
                        linewidth=2, edgecolor='r', facecolor='none'
                    )
                    ax.add_patch(rect)
                    ax.text(bbox[0], bbox[1]-5,
                            f'{label}: {conf:.2f}',
                            color='white', bbox=dict(facecolor='red', alpha=0.5))

                if i == 0:
                    ax.set_title(model_names[j])
                ax.axis('off')

        plt.tight_layout()
        save_path = os.path.join(self.dirs['detections'], 'detection_grid.png')
        plt.savefig(save_path, dpi=300, bbox_inches='tight')
        plt.close()

    def save_markdown_report(self, metrics_df, additional_notes=None):
        """Create a comprehensive markdown report"""
        report_path = os.path.join(self.dirs['metrics'], 'complete_report.md')

        with open(report_path, 'w') as f:
            f.write("# Model Comparison Report\n\n")
            f.write(f"Generated on: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")

            f.write("## Summary Metrics\n")
            f.write(metrics_df.to_markdown())
            f.write("\n\n")

            f.write("## Visualization Directory Structure\n")
            for dir_name, dir_path in self.dirs.items():
                f.write(f"- {dir_name}: {dir_path}\n")
            f.write("\n")

            if additional_notes:
                f.write("## Additional Notes\n")
                f.write(additional_notes)
                f.write("\n")

            f.write("\n## Plots Generated\n")
            f.write("1. Metrics Comparison (Bar Plot)\n")
            f.write("2. Confusion Matrices\n")
            f.write("3. Precision-Recall Curves\n")
            f.write("4. Per-Class mAP Comparison\n")
            f.write("5. Detection Examples Grid\n")

    def generate_all_visualizations(self, results_dict, class_names):
        """Main function to generate all visualizations"""

        # 1. Overall metrics comparison
        metrics_df = self.plot_metrics_comparison(results_dict['summary_metrics'])

        # 2. Confusion matrices
        for model_name, cm_object in results_dict['confusion_matrices'].items():
            self.plot_confusion_matrix(
                cm_object.y_true,
                cm_object.y_pred,
                class_names,
                model_name
    )




        # 3. Precision-recall curves
        self.plot_precision_recall_curves(
            results_dict['precision'],
            results_dict['recall'],
            list(results_dict['summary_metrics'].keys())
        )

        # 4. Per-class mAP
        self.plot_per_class_map(
            results_dict['per_class_map'],
            class_names,
            list(results_dict['summary_metrics'].keys())
        )

        # 5. Detection grid
        self.create_detection_grid(
            results_dict['example_images'],
            results_dict['example_detections'],
            class_names,
            list(results_dict['summary_metrics'].keys())
        )

        # 6. Generate markdown report
        self.save_markdown_report(
            metrics_df,
            additional_notes=results_dict.get('notes', None)
        )

        logger.info(f"All visualizations saved in {self.results_dir}")
        return self.results_dir


In [40]:
def main():
    logger.info("Starting YOLOv8 comparison pipeline...")

    # Verify paths
    for path in [BASE_DIR, DATA_YAML, DATASET_DIR]:
        if not os.path.exists(path):
            raise FileNotFoundError(f"Path not found: {path}")

    # Initialize evaluator
    evaluator = ModelEvaluator(BASE_DIR, DATA_YAML)
    evaluator.test_metrics_extraction()

    # Run evaluation
    results = evaluator.run_complete_evaluation()

    logger.info(f"Results saved in: {evaluator.visualizer.results_dir}")
    return results

if __name__ == "__main__":
    # Mount Google Drive
    drive.mount('/content/drive')

    # Run the evaluation
    results = main()

[38;5;39m2025-06-06 18:38:12 - INFO - Starting YOLOv8 comparison pipeline...[0m
[38;5;39m2025-06-06 18:38:12 - INFO - Starting YOLOv8 comparison pipeline...[0m
[38;5;39m2025-06-06 18:38:12 - INFO - Starting YOLOv8 comparison pipeline...[0m
[38;5;39m2025-06-06 18:38:12 - INFO - Starting YOLOv8 comparison pipeline...[0m
[38;5;39m2025-06-06 18:38:12 - INFO - Starting YOLOv8 comparison pipeline...[0m
[38;5;39m2025-06-06 18:38:12 - INFO - Starting YOLOv8 comparison pipeline...[0m
INFO:YOLOComparison:Starting YOLOv8 comparison pipeline...
[38;5;39m2025-06-06 18:38:12 - INFO - Created results directory structure at /content/drive/MyDrive/new scope model/comparison_results_20250606_183812[0m
[38;5;39m2025-06-06 18:38:12 - INFO - Created results directory structure at /content/drive/MyDrive/new scope model/comparison_results_20250606_183812[0m
[38;5;39m2025-06-06 18:38:12 - INFO - Created results directory structure at /content/drive/MyDrive/new scope model/comparison_results_2

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Ultralytics 8.3.151 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/new scope model/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=2, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=Fals

[34m[1mtrain: [0mScanning /content/drive/MyDrive/Samplesmall_dataset/train/labels.cache... 138 images, 54 backgrounds, 0 corrupt: 100%|██████████| 192/192 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))





[34m[1mval: [0mFast image access ✅ (ping: 0.4±0.1 ms, read: 104.5±29.7 MB/s, size: 811.7 KB)


[34m[1mval: [0mScanning /content/drive/MyDrive/Samplesmall_dataset/val/labels.cache... 37 images, 113 backgrounds, 0 corrupt: 100%|██████████| 150/150 [00:00<?, ?it/s]


Plotting labels to /content/drive/MyDrive/new scope model/baseline_model17/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000833, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1m/content/drive/MyDrive/new scope model/baseline_model17[0m
Starting training for 2 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/2      2.19G      1.753      4.417       1.24         82        640: 100%|██████████| 12/12 [00:05<00:00,  2.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 5/5 [00:01<00:00,  3.81it/s]

                   all        150        254   0.000402      0.215   0.000956   0.000253






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/2      2.22G      1.827      3.515      1.218        119        640: 100%|██████████| 12/12 [00:05<00:00,  2.17it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 5/5 [00:01<00:00,  2.86it/s]

                   all        150        254   0.000715      0.214    0.00115   0.000337






2 epochs completed in 0.004 hours.
Optimizer stripped from /content/drive/MyDrive/new scope model/baseline_model17/weights/last.pt, 6.2MB
Optimizer stripped from /content/drive/MyDrive/new scope model/baseline_model17/weights/best.pt, 6.2MB

Validating /content/drive/MyDrive/new scope model/baseline_model17/weights/best.pt...
Ultralytics 8.3.151 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 3,007,208 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 5/5 [00:01<00:00,  2.64it/s]


                   all        150        254   0.000715      0.214    0.00115   0.000338
                   Car         24        122    0.00509      0.189    0.00636    0.00209
            Pedestrian         37         93    0.00043     0.0215    0.00031   6.38e-05
                   Van          8         11          0          0          0          0
               Cyclist          8         14          0          0          0          0
        Person_sitting          1          1   9.94e-05          1    0.00126   0.000377
                  Misc          2          7          0          0          0          0
                 Truck          3          4          0          0          0          0
                  Tram          1          2   0.000101        0.5    0.00131   0.000176
Speed: 0.2ms preprocess, 2.1ms inference, 0.0ms loss, 5.5ms postprocess per image
Results saved to [1m/content/drive/MyDrive/new scope model/baseline_model17[0m
Ultralytics 8.3.151 🚀 Python-3.11.13

[34m[1mval: [0mScanning /content/drive/MyDrive/Samplesmall_dataset/val/labels.cache... 37 images, 113 backgrounds, 0 corrupt: 100%|██████████| 150/150 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:03<00:00,  2.98it/s]


                   all        150        254   0.000766      0.216    0.00118   0.000341
                   Car         24        122    0.00528      0.197    0.00639    0.00208
            Pedestrian         37         93    0.00064     0.0323   0.000418   8.54e-05
                   Van          8         11          0          0          0          0
               Cyclist          8         14          0          0          0          0
        Person_sitting          1          1   9.93e-05          1    0.00131   0.000394
                  Misc          2          7          0          0          0          0
                 Truck          3          4          0          0          0          0
                  Tram          1          2   0.000101        0.5    0.00129   0.000173
Speed: 2.0ms preprocess, 2.1ms inference, 0.0ms loss, 3.1ms postprocess per image
Results saved to [1m/content/drive/MyDrive/new scope model/baseline_model172[0m


[38;5;39m2025-06-06 18:38:58 - INFO - Starting enhanced model training...[0m
[38;5;39m2025-06-06 18:38:58 - INFO - Starting enhanced model training...[0m
[38;5;39m2025-06-06 18:38:58 - INFO - Starting enhanced model training...[0m
[38;5;39m2025-06-06 18:38:58 - INFO - Starting enhanced model training...[0m
[38;5;39m2025-06-06 18:38:58 - INFO - Starting enhanced model training...[0m
[38;5;39m2025-06-06 18:38:58 - INFO - Starting enhanced model training...[0m
INFO:YOLOComparison:Starting enhanced model training...


Ultralytics 8.3.151 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=/content/drive/MyDrive/new scope model/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=2, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1024, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=enhanced_model11, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspe

[34m[1mtrain: [0mScanning /content/drive/MyDrive/Samplesmall_dataset/train/labels.cache... 138 images, 54 backgrounds, 0 corrupt: 100%|██████████| 192/192 [00:00<?, ?it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))





[34m[1mval: [0mFast image access ✅ (ping: 0.8±0.6 ms, read: 186.8±64.2 MB/s, size: 811.7 KB)


[34m[1mval: [0mScanning /content/drive/MyDrive/Samplesmall_dataset/val/labels.cache... 37 images, 113 backgrounds, 0 corrupt: 100%|██████████| 150/150 [00:00<?, ?it/s]


Plotting labels to /content/drive/MyDrive/new scope model/enhanced_model11/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000833, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 1024 train, 1024 val
Using 2 dataloader workers
Logging results to [1m/content/drive/MyDrive/new scope model/enhanced_model11[0m
Starting training for 2 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/2      5.19G      1.664      4.569      1.296         82       1024: 100%|██████████| 12/12 [00:12<00:00,  1.06s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 5/5 [00:02<00:00,  2.28it/s]

                   all        150        254    0.00104     0.0874    0.00101    0.00029






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/2      5.21G        1.7      3.641      1.283        120       1024: 100%|██████████| 12/12 [00:06<00:00,  1.74it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 5/5 [00:03<00:00,  1.48it/s]

                   all        150        254    0.00142      0.156    0.00114   0.000302






2 epochs completed in 0.008 hours.
Optimizer stripped from /content/drive/MyDrive/new scope model/enhanced_model11/weights/last.pt, 6.2MB
Optimizer stripped from /content/drive/MyDrive/new scope model/enhanced_model11/weights/best.pt, 6.2MB

Validating /content/drive/MyDrive/new scope model/enhanced_model11/weights/best.pt...
Ultralytics 8.3.151 🚀 Python-3.11.13 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 3,007,208 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 5/5 [00:02<00:00,  1.72it/s]


                   all        150        254    0.00141     0.0937     0.0011   0.000285
                   Car         24        122     0.0112      0.107    0.00845    0.00218
            Pedestrian         37         93          0          0          0          0
                   Van          8         11          0          0          0          0
               Cyclist          8         14          0          0          0          0
        Person_sitting          1          1          0          0          0          0
                  Misc          2          7   4.86e-05      0.143   4.58e-05   2.75e-05
                 Truck          3          4          0          0          0          0
                  Tram          1          2   9.75e-05        0.5   0.000298   7.48e-05
Speed: 0.3ms preprocess, 2.6ms inference, 0.0ms loss, 10.2ms postprocess per image
Results saved to [1m/content/drive/MyDrive/new scope model/enhanced_model11[0m
Ultralytics 8.3.151 🚀 Python-3.11.1

[34m[1mval: [0mScanning /content/drive/MyDrive/Samplesmall_dataset/val/labels.cache... 37 images, 113 backgrounds, 0 corrupt: 100%|██████████| 150/150 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 10/10 [00:04<00:00,  2.39it/s]


                   all        150        254   0.000915     0.0738    0.00087   0.000306
                   Car         24        122    0.00723     0.0902    0.00681    0.00239
            Pedestrian         37         93          0          0          0          0
                   Van          8         11          0          0          0          0
               Cyclist          8         14          0          0          0          0
        Person_sitting          1          1          0          0          0          0
                  Misc          2          7          0          0          0          0
                 Truck          3          4          0          0          0          0
                  Tram          1          2   9.01e-05        0.5   0.000154   6.18e-05
Speed: 2.9ms preprocess, 8.7ms inference, 0.0ms loss, 4.6ms postprocess per image
Results saved to [1m/content/drive/MyDrive/new scope model/enhanced_model112[0m


[38;5;39m2025-06-06 18:39:59 - INFO - Loading model from /content/drive/MyDrive/new scope model/enhanced_model/weights/best.pt[0m
[38;5;39m2025-06-06 18:39:59 - INFO - Loading model from /content/drive/MyDrive/new scope model/enhanced_model/weights/best.pt[0m
[38;5;39m2025-06-06 18:39:59 - INFO - Loading model from /content/drive/MyDrive/new scope model/enhanced_model/weights/best.pt[0m
[38;5;39m2025-06-06 18:39:59 - INFO - Loading model from /content/drive/MyDrive/new scope model/enhanced_model/weights/best.pt[0m
[38;5;39m2025-06-06 18:39:59 - INFO - Loading model from /content/drive/MyDrive/new scope model/enhanced_model/weights/best.pt[0m
[38;5;39m2025-06-06 18:39:59 - INFO - Loading model from /content/drive/MyDrive/new scope model/enhanced_model/weights/best.pt[0m
INFO:YOLOComparison:Loading model from /content/drive/MyDrive/new scope model/enhanced_model/weights/best.pt


Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.
Performing prediction on 3 slices.


[38;5;196m2025-06-06 18:40:03 - ERROR - Error during evaluation: 'ResultsVisualizer' object has no attribute 'generate_all_visualizations'[0m
Traceback (most recent call last):
  File "<ipython-input-39-4a600c3ee298>", line 156, in run_complete_evaluation
    self.visualizer.generate_all_visualizations(
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'ResultsVisualizer' object has no attribute 'generate_all_visualizations'
[38;5;196m2025-06-06 18:40:03 - ERROR - Error during evaluation: 'ResultsVisualizer' object has no attribute 'generate_all_visualizations'[0m
Traceback (most recent call last):
  File "<ipython-input-39-4a600c3ee298>", line 156, in run_complete_evaluation
    self.visualizer.generate_all_visualizations(
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'ResultsVisualizer' object has no attribute 'generate_all_visualizations'
[38;5;196m2025-06-06 18:40:03 - ERROR - Error during evaluation: 'ResultsVisualizer' object has no attribute 

Performing prediction on 3 slices.


AttributeError: 'ResultsVisualizer' object has no attribute 'generate_all_visualizations'