In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import cv2
import numpy as np
import json
import os
from pathlib import Path
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import warnings

warnings.filterwarnings("ignore")

ModuleNotFoundError: No module named 'sklearn'

In [None]:
class SegmentationMetrics:
    """Comprehensive segmentation evaluation metrics"""

    def __init__(self):
        self.epsilon = 1e-7  # Small value to avoid division by zero

    def iou_score(self, pred_mask, true_mask):
        """
        Calculate Intersection over Union (IoU) score
        Args:
            pred_mask: Predicted binary mask (0s and 1s)
            true_mask: Ground truth binary mask (0s and 1s)
        Returns:
            IoU score (float)
        """
        # Ensure masks are binary
        pred_mask = (pred_mask > 0.5).astype(np.uint8)
        true_mask = (true_mask > 0.5).astype(np.uint8)

        # Calculate intersection and union
        intersection = np.logical_and(pred_mask, true_mask).sum()
        union = np.logical_or(pred_mask, true_mask).sum()

        if union == 0:
            return 1.0 if intersection == 0 else 0.0

        iou = intersection / (union + self.epsilon)
        return float(iou)

    def dice_score(self, pred_mask, true_mask):
        """
        Calculate Dice coefficient (F1-score for segmentation)
        Args:
            pred_mask: Predicted binary mask (0s and 1s)
            true_mask: Ground truth binary mask (0s and 1s)
        Returns:
            Dice score (float)
        """
        # Ensure masks are binary
        pred_mask = (pred_mask > 0.5).astype(np.uint8)
        true_mask = (true_mask > 0.5).astype(np.uint8)

        # Calculate intersection
        intersection = np.logical_and(pred_mask, true_mask).sum()
        total_pixels = pred_mask.sum() + true_mask.sum()

        if total_pixels == 0:
            return 1.0 if intersection == 0 else 0.0

        dice = (2.0 * intersection) / (total_pixels + self.epsilon)
        return float(dice)

    def pixel_accuracy(self, pred_mask, true_mask):
        """
        Calculate pixel-wise accuracy
        Args:
            pred_mask: Predicted binary mask (0s and 1s)
            true_mask: Ground truth binary mask (0s and 1s)
        Returns:
            Pixel accuracy (float)
        """
        pred_mask = (pred_mask > 0.5).astype(np.uint8)
        true_mask = (true_mask > 0.5).astype(np.uint8)

        correct_pixels = np.sum(pred_mask == true_mask)
        total_pixels = pred_mask.size

        return correct_pixels / total_pixels

    def precision_recall_f1(self, pred_mask, true_mask):
        """
        Calculate precision, recall, and F1-score at pixel level
        Args:
            pred_mask: Predicted binary mask (0s and 1s)
            true_mask: Ground truth binary mask (0s and 1s)
        Returns:
            Dictionary with precision, recall, f1 scores
        """
        pred_mask = (pred_mask > 0.5).astype(np.uint8).flatten()
        true_mask = (true_mask > 0.5).astype(np.uint8).flatten()

        # Calculate confusion matrix elements
        tp = np.sum((pred_mask == 1) & (true_mask == 1))
        fp = np.sum((pred_mask == 1) & (true_mask == 0))
        fn = np.sum((pred_mask == 0) & (true_mask == 1))
        tn = np.sum((pred_mask == 0) & (true_mask == 0))

        # Calculate metrics
        precision = tp / (tp + fp + self.epsilon)
        recall = tp / (tp + fn + self.epsilon)
        f1 = 2 * precision * recall / (precision + recall + self.epsilon)
        specificity = tn / (tn + fp + self.epsilon)

        return {
            "precision": float(precision),
            "recall": float(recall),
            "f1_score": float(f1),
            "specificity": float(specificity),
            "true_positives": int(tp),
            "false_positives": int(fp),
            "false_negatives": int(fn),
            "true_negatives": int(tn),
        }

    def hausdorff_distance(self, pred_mask, true_mask):
        """
        Calculate Hausdorff distance between mask boundaries
        Args:
            pred_mask: Predicted binary mask (0s and 1s)
            true_mask: Ground truth binary mask (0s and 1s)
        Returns:
            Hausdorff distance (float)
        """
        try:
            from scipy.spatial.distance import directed_hausdorff

            pred_mask = (pred_mask > 0.5).astype(np.uint8)
            true_mask = (true_mask > 0.5).astype(np.uint8)

            # Find contours/boundaries
            pred_contours, _ = cv2.findContours(
                pred_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
            )
            true_contours, _ = cv2.findContours(
                true_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
            )

            if not pred_contours or not true_contours:
                return float("inf")

            # Get boundary points
            pred_points = np.vstack(
                [contour.reshape(-1, 2) for contour in pred_contours]
            )
            true_points = np.vstack(
                [contour.reshape(-1, 2) for contour in true_contours]
            )

            # Calculate directed Hausdorff distances
            hd1 = directed_hausdorff(pred_points, true_points)[0]
            hd2 = directed_hausdorff(true_points, pred_points)[0]

            return float(max(hd1, hd2))

        except ImportError:
            # Fallback to simple boundary distance if scipy not available
            return self.simple_boundary_distance(pred_mask, true_mask)

    def simple_boundary_distance(self, pred_mask, true_mask):
        """Simple boundary distance calculation without scipy"""
        pred_mask = (pred_mask > 0.5).astype(np.uint8)
        true_mask = (true_mask > 0.5).astype(np.uint8)

        # Calculate boundaries using morphological operations
        kernel = np.ones((3, 3), np.uint8)
        pred_boundary = cv2.morphologyEx(pred_mask, cv2.MORPH_GRADIENT, kernel)
        true_boundary = cv2.morphologyEx(true_mask, cv2.MORPH_GRADIENT, kernel)

        if pred_boundary.sum() == 0 or true_boundary.sum() == 0:
            return float("inf")

        # Simple average distance between boundaries
        pred_coords = np.column_stack(np.where(pred_boundary))
        true_coords = np.column_stack(np.where(true_boundary))

        distances = []
        for pred_point in pred_coords:
            min_dist = np.min(np.linalg.norm(true_coords - pred_point, axis=1))
            distances.append(min_dist)

        return float(np.mean(distances)) if distances else float("inf")


class YOLOSegmentationEvaluator:
    """Evaluator for YOLO segmentation model using IoU/Dice metrics"""

    def __init__(self, model_path=None, device=None):
        self.device = device if device is not None else DEFAULT_DEVICE
        print(f"Using device: {self.device}")

        self.metrics_calculator = SegmentationMetrics()
        self.results = []

        # Class mappings from documentation
        self.tool_classes = list(range(12))  # Classes 0-11
        self.tissue_classes = list(range(12, 21))  # Classes 12-20
        self.class_names = self.get_class_names()

    def get_class_names(self):
        """Define class names based on documentation"""
        # These would need to be filled in with actual class names from your dataset
        tool_names = [f"Tool_{i}" for i in range(12)]
        tissue_names = [f"Tissue_{i}" for i in range(9)]
        return tool_names + tissue_names

    def load_ground_truth_annotations(self, annotation_path):
        """
        Load ground truth annotations
        Expected format: YOLO segmentation format with polygon coordinates
        """
        annotations = {}
        annotation_path = Path(annotation_path)

        if annotation_path.is_file():
            # Single annotation file
            with open(annotation_path, "r") as f:
                annotations[annotation_path.stem] = f.readlines()
        elif annotation_path.is_dir():
            # Directory of annotation files
            for ann_file in annotation_path.glob("*.txt"):
                with open(ann_file, "r") as f:
                    annotations[ann_file.stem] = f.readlines()

        return annotations

    def parse_yolo_annotation(self, annotation_line, img_width, img_height):
        """
        Parse YOLO segmentation annotation line
        Format: class_id x1 y1 x2 y2 ... xn yn (normalized coordinates)
        """
        parts = annotation_line.strip().split()
        if len(parts) < 7:  # Need at least class + 3 points (6 coordinates)
            return None, None

        class_id = int(parts[0])
        coords = list(map(float, parts[1:]))

        # Convert normalized coordinates to pixel coordinates
        points = []
        for i in range(0, len(coords), 2):
            x = int(coords[i] * img_width)
            y = int(coords[i + 1] * img_height)
            points.append([x, y])

        # Create mask from polygon
        mask = np.zeros((img_height, img_width), dtype=np.uint8)
        if len(points) >= 3:
            cv2.fillPoly(mask, [np.array(points, dtype=np.int32)], 255)

        return class_id, mask

    def create_dummy_predictions(self, img_width, img_height, num_objects=3):
        """
        Create dummy predictions for demonstration
        Replace this with actual model inference
        """
        predictions = []

        for i in range(num_objects):
            # Random class
            class_id = np.random.choice(list(range(21)))

            # Create random mask
            mask = np.zeros((img_height, img_width), dtype=np.uint8)

            # Random polygon
            center_x = np.random.randint(img_width // 4, 3 * img_width // 4)
            center_y = np.random.randint(img_height // 4, 3 * img_height // 4)
            radius = np.random.randint(20, min(img_width, img_height) // 4)

            # Create circular mask
            cv2.circle(mask, (center_x, center_y), radius, 255, -1)

            # Random confidence
            confidence = np.random.uniform(0.5, 0.95)

            predictions.append(
                {"class_id": class_id, "mask": mask, "confidence": confidence}
            )

        return predictions

    def evaluate_image(
        self, image_path, annotation_lines, img_width=None, img_height=None
    ):
        """
        Evaluate predictions vs ground truth for a single image
        """
        image_path = Path(image_path)

        # Load image to get dimensions if not provided
        if img_width is None or img_height is None:
            if image_path.exists():
                img = cv2.imread(str(image_path))
                img_height, img_width = img.shape[:2]
            else:
                # Default dimensions if image not found
                img_width, img_height = 640, 640

        # Parse ground truth annotations
        gt_masks = {}  # class_id -> list of masks
        for line in annotation_lines:
            class_id, mask = self.parse_yolo_annotation(line, img_width, img_height)
            if class_id is not None:
                if class_id not in gt_masks:
                    gt_masks[class_id] = []
                gt_masks[class_id].append(mask)

        # Get predictions (replace with actual model inference)
        predictions = self.create_dummy_predictions(img_width, img_height)

        # Evaluate each prediction against ground truth
        image_results = {
            "image_name": image_path.name,
            "image_width": img_width,
            "image_height": img_height,
            "class_results": {},
            "overall_metrics": {},
        }

        all_ious = []
        all_dices = []
        all_pixel_accuracies = []

        for pred in predictions:
            pred_class = pred["class_id"]
            pred_mask = pred["mask"]
            pred_confidence = pred["confidence"]

            if pred_class in gt_masks:
                # Find best matching ground truth mask
                best_iou = 0
                best_metrics = None

                for gt_mask in gt_masks[pred_class]:
                    # Calculate metrics
                    iou = self.metrics_calculator.iou_score(pred_mask, gt_mask)
                    dice = self.metrics_calculator.dice_score(pred_mask, gt_mask)
                    pixel_acc = self.metrics_calculator.pixel_accuracy(
                        pred_mask, gt_mask
                    )
                    prf_metrics = self.metrics_calculator.precision_recall_f1(
                        pred_mask, gt_mask
                    )
                    hausdorff = self.metrics_calculator.hausdorff_distance(
                        pred_mask, gt_mask
                    )

                    if iou > best_iou:
                        best_iou = iou
                        best_metrics = {
                            "class_id": pred_class,
                            "class_name": self.class_names[pred_class],
                            "confidence": pred_confidence,
                            "iou": iou,
                            "dice": dice,
                            "pixel_accuracy": pixel_acc,
                            "hausdorff_distance": hausdorff,
                            **prf_metrics,
                        }

                if best_metrics:
                    if pred_class not in image_results["class_results"]:
                        image_results["class_results"][pred_class] = []
                    image_results["class_results"][pred_class].append(best_metrics)

                    all_ious.append(best_metrics["iou"])
                    all_dices.append(best_metrics["dice"])
                    all_pixel_accuracies.append(best_metrics["pixel_accuracy"])

        # Calculate overall image metrics
        if all_ious:
            image_results["overall_metrics"] = {
                "mean_iou": np.mean(all_ious),
                "mean_dice": np.mean(all_dices),
                "mean_pixel_accuracy": np.mean(all_pixel_accuracies),
                "num_predictions": len(predictions),
                "num_matched": len(all_ious),
                "match_rate": len(all_ious) / len(predictions) if predictions else 0,
            }

        return image_results

    def evaluate_dataset(
        self, images_dir, annotations_dir, output_dir="evaluation_results"
    ):
        """
        Evaluate entire dataset
        """
        images_dir = Path(images_dir)
        annotations_dir = Path(annotations_dir)
        output_dir = Path(output_dir)
        output_dir.mkdir(exist_ok=True)

        print("Starting segmentation evaluation...")
        print(f"Images directory: {images_dir}")
        print(f"Annotations directory: {annotations_dir}")

        # Find image files
        image_extensions = [".jpg", ".jpeg", ".png", ".bmp"]
        image_files = []
        for ext in image_extensions:
            image_files.extend(images_dir.glob(f"*{ext}"))

        print(f"Found {len(image_files)} images")

        # Load annotations
        annotations = self.load_ground_truth_annotations(annotations_dir)
        print(f"Loaded annotations for {len(annotations)} files")

        # Evaluate each image
        all_results = []

        for image_file in image_files:
            image_stem = image_file.stem

            if image_stem in annotations:
                print(f"Evaluating: {image_file.name}")

                try:
                    result = self.evaluate_image(image_file, annotations[image_stem])
                    all_results.append(result)

                except Exception as e:
                    print(f"Error evaluating {image_file.name}: {e}")
            else:
                print(f"No annotation found for: {image_file.name}")

        # Generate comprehensive report
        if all_results:
            self.generate_evaluation_report(all_results, output_dir)

        return all_results

    def generate_evaluation_report(self, all_results, output_dir):
        """Generate comprehensive evaluation report"""
        output_dir = Path(output_dir)

        # Compile statistics
        overall_stats = {
            "total_images": len(all_results),
            "mean_iou": [],
            "mean_dice": [],
            "mean_pixel_accuracy": [],
            "class_performance": {},
        }

        # Per-image summary data
        summary_data = []
        detailed_data = []

        for result in all_results:
            if "overall_metrics" in result and result["overall_metrics"]:
                metrics = result["overall_metrics"]
                overall_stats["mean_iou"].append(metrics["mean_iou"])
                overall_stats["mean_dice"].append(metrics["mean_dice"])
                overall_stats["mean_pixel_accuracy"].append(
                    metrics["mean_pixel_accuracy"]
                )

                summary_data.append(
                    {
                        "Image": result["image_name"],
                        "Width": result["image_width"],
                        "Height": result["image_height"],
                        "Mean IoU": metrics["mean_iou"],
                        "Mean Dice": metrics["mean_dice"],
                        "Mean Pixel Accuracy": metrics["mean_pixel_accuracy"],
                        "Predictions": metrics["num_predictions"],
                        "Matched": metrics["num_matched"],
                        "Match Rate": metrics["match_rate"],
                    }
                )

            # Collect detailed per-class results
            for class_id, class_results in result["class_results"].items():
                for class_result in class_results:
                    detailed_data.append(
                        {
                            "Image": result["image_name"],
                            "Class ID": class_id,
                            "Class Name": class_result["class_name"],
                            "Confidence": class_result["confidence"],
                            "IoU": class_result["iou"],
                            "Dice": class_result["dice"],
                            "Pixel Accuracy": class_result["pixel_accuracy"],
                            "Precision": class_result["precision"],
                            "Recall": class_result["recall"],
                            "F1 Score": class_result["f1_score"],
                            "Specificity": class_result["specificity"],
                            "Hausdorff Distance": class_result["hausdorff_distance"],
                        }
                    )

        # Save CSV files
        if summary_data:
            summary_df = pd.DataFrame(summary_data)
            summary_df.to_csv(output_dir / "evaluation_summary.csv", index=False)

        if detailed_data:
            detailed_df = pd.DataFrame(detailed_data)
            detailed_df.to_csv(output_dir / "detailed_evaluation.csv", index=False)

            # Class-wise performance
            class_stats = (
                detailed_df.groupby(["Class ID", "Class Name"])
                .agg(
                    {
                        "IoU": ["mean", "std", "min", "max", "count"],
                        "Dice": ["mean", "std", "min", "max"],
                        "Pixel Accuracy": ["mean", "std", "min", "max"],
                        "Precision": ["mean", "std"],
                        "Recall": ["mean", "std"],
                        "F1 Score": ["mean", "std"],
                    }
                )
                .round(4)
            )

            class_stats.to_csv(output_dir / "class_performance.csv")

        # Generate text report
        self.generate_text_report(
            overall_stats, summary_data, detailed_data, output_dir
        )

        # Generate visualizations
        self.generate_visualizations(summary_data, detailed_data, output_dir)

        print(f"\n✅ Evaluation complete!")
        print(f"📊 Results saved to: {output_dir}")
        print(f"📈 Summary: {len(all_results)} images evaluated")

        if overall_stats["mean_iou"]:
            print(f"📊 Overall Mean IoU: {np.mean(overall_stats['mean_iou']):.4f}")
            print(f"📊 Overall Mean Dice: {np.mean(overall_stats['mean_dice']):.4f}")

    def generate_text_report(
        self, overall_stats, summary_data, detailed_data, output_dir
    ):
        """Generate detailed text report"""
        report_path = output_dir / "evaluation_report.txt"

        with open(report_path, "w") as f:
            f.write("SEGMENTATION EVALUATION REPORT\n")
            f.write("=" * 50 + "\n\n")
            f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")

            # Overall Statistics
            f.write("OVERALL STATISTICS\n")
            f.write("-" * 20 + "\n")
            f.write(f"Total Images Evaluated: {overall_stats['total_images']}\n")

            if overall_stats["mean_iou"]:
                f.write(
                    f"Dataset Mean IoU: {np.mean(overall_stats['mean_iou']):.4f} ± {np.std(overall_stats['mean_iou']):.4f}\n"
                )
                f.write(
                    f"Dataset Mean Dice: {np.mean(overall_stats['mean_dice']):.4f} ± {np.std(overall_stats['mean_dice']):.4f}\n"
                )
                f.write(
                    f"Dataset Mean Pixel Accuracy: {np.mean(overall_stats['mean_pixel_accuracy']):.4f} ± {np.std(overall_stats['mean_pixel_accuracy']):.4f}\n"
                )
                f.write(
                    f"IoU Range: {np.min(overall_stats['mean_iou']):.4f} - {np.max(overall_stats['mean_iou']):.4f}\n"
                )
                f.write(
                    f"Dice Range: {np.min(overall_stats['mean_dice']):.4f} - {np.max(overall_stats['mean_dice']):.4f}\n"
                )

            f.write("\n")

            # Per-image results
            if summary_data:
                f.write("PER-IMAGE RESULTS\n")
                f.write("-" * 17 + "\n")
                for i, data in enumerate(summary_data[:10], 1):  # Show first 10
                    f.write(f"{i}. {data['Image']}\n")
                    f.write(f"   Resolution: {data['Width']}x{data['Height']}\n")
                    f.write(f"   Mean IoU: {data['Mean IoU']:.4f}\n")
                    f.write(f"   Mean Dice: {data['Mean Dice']:.4f}\n")
                    f.write(
                        f"   Predictions: {data['Predictions']} (Matched: {data['Matched']})\n\n"
                    )

                if len(summary_data) > 10:
                    f.write(f"... and {len(summary_data) - 10} more images\n\n")

            # Class performance summary
            if detailed_data:
                f.write("CLASS PERFORMANCE SUMMARY\n")
                f.write("-" * 25 + "\n")
                detailed_df = pd.DataFrame(detailed_data)
                class_summary = (
                    detailed_df.groupby("Class Name")
                    .agg(
                        {
                            "IoU": "mean",
                            "Dice": "mean",
                            "Precision": "mean",
                            "Recall": "mean",
                            "F1 Score": "mean",
                        }
                    )
                    .round(4)
                )

                for class_name, metrics in class_summary.iterrows():
                    f.write(f"{class_name}:\n")
                    f.write(f"  IoU: {metrics['IoU']:.4f}\n")
                    f.write(f"  Dice: {metrics['Dice']:.4f}\n")
                    f.write(f"  Precision: {metrics['Precision']:.4f}\n")
                    f.write(f"  Recall: {metrics['Recall']:.4f}\n")
                    f.write(f"  F1: {metrics['F1 Score']:.4f}\n\n")

    def generate_visualizations(self, summary_data, detailed_data, output_dir):
        """Generate evaluation visualizations"""
        if not summary_data and not detailed_data:
            return

        plt.style.use("default")

        # 1. Overall metrics distribution
        if summary_data:
            fig, axes = plt.subplots(2, 2, figsize=(15, 12))

            summary_df = pd.DataFrame(summary_data)

            # IoU distribution
            axes[0, 0].hist(summary_df["Mean IoU"], bins=20, alpha=0.7, color="blue")
            axes[0, 0].set_title("IoU Score Distribution")
            axes[0, 0].set_xlabel("Mean IoU")
            axes[0, 0].set_ylabel("Frequency")

            # Dice distribution
            axes[0, 1].hist(summary_df["Mean Dice"], bins=20, alpha=0.7, color="green")
            axes[0, 1].set_title("Dice Score Distribution")
            axes[0, 1].set_xlabel("Mean Dice")
            axes[0, 1].set_ylabel("Frequency")

            # Pixel accuracy distribution
            axes[1, 0].hist(
                summary_df["Mean Pixel Accuracy"], bins=20, alpha=0.7, color="orange"
            )
            axes[1, 0].set_title("Pixel Accuracy Distribution")
            axes[1, 0].set_xlabel("Mean Pixel Accuracy")
            axes[1, 0].set_ylabel("Frequency")

            # Match rate
            axes[1, 1].hist(summary_df["Match Rate"], bins=20, alpha=0.7, color="red")
            axes[1, 1].set_title("Prediction Match Rate Distribution")
            axes[1, 1].set_xlabel("Match Rate")
            axes[1, 1].set_ylabel("Frequency")

            plt.tight_layout()
            plt.savefig(
                output_dir / "metrics_distribution.png", dpi=300, bbox_inches="tight"
            )
            plt.close()

        # 2. Class-wise performance
        if detailed_data:
            detailed_df = pd.DataFrame(detailed_data)

            # Class-wise IoU comparison
            plt.figure(figsize=(12, 8))
            class_iou = (
                detailed_df.groupby("Class Name")["IoU"]
                .mean()
                .sort_values(ascending=True)
            )
            class_iou.plot(kind="barh", color="skyblue")
            plt.title("Mean IoU Score by Class")
            plt.xlabel("Mean IoU")
            plt.tight_layout()
            plt.savefig(
                output_dir / "class_iou_comparison.png", dpi=300, bbox_inches="tight"
            )
            plt.close()

            # Metrics correlation heatmap
            numeric_cols = [
                "IoU",
                "Dice",
                "Pixel Accuracy",
                "Precision",
                "Recall",
                "F1 Score",
            ]
            correlation_matrix = detailed_df[numeric_cols].corr()

            plt.figure(figsize=(10, 8))
            sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", center=0)
            plt.title("Metrics Correlation Matrix")
            plt.tight_layout()
            plt.savefig(
                output_dir / "metrics_correlation.png", dpi=300, bbox_inches="tight"
            )
            plt.close()


def run_yolo_model_evaluation(model_path, test_images_dir, annotations_dir, output_dir):
    """
    Run evaluation using actual YOLO model (placeholder for real implementation)
    This would need to be integrated with your actual YOLO model
    """
    print("🔄 This function would integrate with your actual YOLO segmentation model")
    print("🔄 For now, using dummy predictions for demonstration")

    # This is where you would load and run your actual YOLO model
    # model = YOLO(model_path)  # YOLOv11 loading
    # results = model.predict(test_images_dir)

    return None


def evaluate_tti_classifier_pixels(
    classifier_model_path, test_rois_dir, ground_truth_dir, output_dir
):
    """
    Evaluate TTI classifier at pixel level
    This would compare predicted ROI classifications with ground truth masks
    """
    output_dir = Path(output_dir)
    output_dir.mkdir(exist_ok=True)

    print("\n🔄 TTI Classifier Pixel-Level Evaluation")
    print("=" * 40)

    # This would need your actual TTI classifier and ROI data
    # For now, creating demonstration structure

    results = {
        "total_rois": 0,
        "correct_classifications": 0,
        "pixel_level_metrics": {},
        "class_wise_performance": {},
    }

    # Placeholder for actual implementation
    print("🔄 This would evaluate your TTI classifier predictions against ground truth")
    print("🔄 Metrics would include:")
    print("   - Pixel-wise accuracy for ROI classification")
    print("   - Precision/Recall for TTI vs No-TTI regions")
    print("   - Boundary accuracy for interaction regions")

    return results

In [None]:
images_dir = "./images"
annotations_dir = "./annotations"
output_dir = "./iou_dice_evaluation_results"

print("IoU/Dice Segmentation Evaluation")
print("=" * 40)

# Initialize evaluator
evaluator = YOLOSegmentationEvaluator()

# Check if directories exist
if not Path(images_dir).exists():
    print(f"⚠️  Images directory not found: {images_dir}")
    exit()

if not Path(annotations_dir).exists():
    print(f"⚠️  Annotations directory not found: {annotations_dir}")
    exit()

# Run evaluation
results = evaluator.evaluate_dataset(images_dir, annotations_dir, output_dir)

if results:
    print(f"\n✅ Evaluation completed successfully!")
    print(f"📊 Evaluated {len(results)} images")
    print(f"📁 Results saved to: {output_dir}")
else:
    print("❌ No results generated. Check your image and annotation paths.")