In [None]:
# Final Model Pipeline - Melanoma Detection

This notebook provides a complete, production-ready pipeline for melanoma detection using the **final trained model** with improved architecture.

## Features:
1. **Model Loading**: Load the best trained model from `final_model/` folder
2. **Single Image Prediction**: Predict and visualize results for individual images
3. **Batch Prediction**: Process multiple images efficiently
4. **Export Results**: Save predictions to CSV for further analysis
5. **Visualization**: Display predictions with confidence scores and confusion matrix
6. **Model Evaluation**: Comprehensive evaluation on test set with clinical metrics

## Model Information:
- **Architecture**: Optimized through Optuna hyperparameter search
- **Training**: Based on research best practices for medical imaging
- **Weights**: `final_model/melanoma_model_improved_weights.pth`
- **Focus**: High melanoma recall (sensitivity) for clinical safety

## Usage:
Run all cells and use the `MelanomaDetectionPipeline` class for predictions.

## 1. Import Dependencies

In [None]:
import os
import json
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
from pathlib import Path
import warnings

warnings.filterwarnings("ignore")

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, recall_score, precision_score
import seaborn as sns

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

## 2. Define Model Configuration

Load the model configuration from training.

In [None]:
# Model configuration - adjust these based on your final model
MODEL_CONFIG = {
    "weights_path": "final_model/melanoma_model_improved_weights.pth",
    "model_name": "resnet18",  # Will be loaded from config if available
    "input_size": 224,
    "num_classes": 3,
    "class_names": ["benign", "suspicious", "melanoma"],
    "group_map": {
        "melanoma": "melanoma",
        "basal cell carcinoma": "suspicious",
        "actinic keratoses": "suspicious",
        "melanocytic nevi": "benign",
        "benign keratosis-like lesions": "benign",
        "dermatofibroma": "benign",
        "vascular lesions": "benign",
    },
}

# Try to load model configuration from training results
if os.path.exists("model_results_improved.json"):
    with open("model_results_improved.json", "r") as f:
        results = json.load(f)
        MODEL_CONFIG["model_name"] = results.get("model_architecture", "resnet18")
        MODEL_CONFIG["input_size"] = results.get("input_size", 224)
        print(f"‚úì Loaded model configuration from training results")
        print(f"  Model: {MODEL_CONFIG['model_name']}")
        print(f"  Input size: {MODEL_CONFIG['input_size']}")
else:
    print("‚ö† Using default configuration (model_results_improved.json not found)")

print(f"\nModel weights: {MODEL_CONFIG['weights_path']}")
print(f"Classes: {MODEL_CONFIG['class_names']}")

## 3. Helper Function to Create Model

In [None]:
def create_model(model_name="resnet18", num_classes=3):
    """
    Create a model architecture.
    Supports: resnet18, resnet50, efficientnet_b0, densenet121
    """
    if model_name.startswith("resnet"):
        if model_name == "resnet18":
            model = models.resnet18(pretrained=False)
        elif model_name == "resnet50":
            model = models.resnet50(pretrained=False)
        else:
            raise ValueError(f"Unsupported ResNet variant: {model_name}")

        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, num_classes)

    elif model_name.startswith("efficientnet"):
        try:
            import timm

            model = timm.create_model(model_name, pretrained=False, num_classes=num_classes)
        except ImportError:
            raise ImportError("timm library required for EfficientNet. Install with: pip install timm")

    elif model_name.startswith("densenet"):
        if model_name == "densenet121":
            model = models.densenet121(pretrained=False)
        else:
            raise ValueError(f"Unsupported DenseNet variant: {model_name}")

        num_ftrs = model.classifier.in_features
        model.classifier = nn.Linear(num_ftrs, num_classes)

    else:
        raise ValueError(f"Unsupported model: {model_name}")

    return model


print("‚úì Model creation function defined")

## 4. MelanomaDetectionPipeline Class

In [None]:
class MelanomaDetectionPipeline:
    """
    Complete pipeline for melanoma detection.

    Features:
    - Load model from weights
    - Single image prediction with visualization
    - Batch prediction
    - Export results to CSV
    - Model evaluation with metrics
    """

    def __init__(self, config, device=None):
        """Initialize the pipeline with model configuration."""
        self.config = config
        self.device = device if device else torch.device("cuda" if torch.cuda.is_available() else "cpu")

        # Load model
        print(f"Loading model: {config['model_name']}")
        self.model = create_model(config["model_name"], config["num_classes"])

        if os.path.exists(config["weights_path"]):
            self.model.load_state_dict(torch.load(config["weights_path"], map_location=self.device))
            print(f"‚úì Model weights loaded from: {config['weights_path']}")
        else:
            raise FileNotFoundError(f"Model weights not found: {config['weights_path']}")

        self.model = self.model.to(self.device)
        self.model.eval()

        # Setup transforms
        self.transform = transforms.Compose(
            [
                transforms.Resize((config["input_size"], config["input_size"])),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            ]
        )

        # Class mappings
        self.class_names = config["class_names"]
        self.group_map = config["group_map"]
        self.idx_to_group = {i: name for i, name in enumerate(self.class_names)}
        self.group_to_idx = {name: i for i, name in enumerate(self.class_names)}

        print(f"‚úì Pipeline initialized on device: {self.device}")

    def predict_single(self, image_path, show_image=True, check_annotation=True):
        """
        Predict the class of a single image.

        Args:
            image_path: Path to the image file
            show_image: Whether to display the image with prediction
            check_annotation: Whether to check ground truth annotation

        Returns:
            dict with prediction results
        """
        # Load and transform image
        image = Image.open(image_path).convert("RGB")
        input_tensor = self.transform(image).unsqueeze(0).to(self.device)

        # Predict
        with torch.no_grad():
            outputs = self.model(input_tensor)
            probs = F.softmax(outputs, dim=1)
            all_probs = probs[0].cpu().numpy()
            pred_prob, pred_idx = torch.max(probs, dim=1)

        pred_label = self.idx_to_group[pred_idx.item()]
        pred_probability = pred_prob.item()

        # Create result dictionary
        result = {
            "image_path": image_path,
            "prediction": pred_label,
            "probability": pred_probability,
            "all_probabilities": {self.class_names[i]: float(all_probs[i]) for i in range(len(self.class_names))},
        }

        # Check annotation if available
        if check_annotation:
            try:
                # Construct annotation path
                ann_path = image_path.replace("/img/", "/ann/").replace("\\img\\", "\\ann\\") + ".json"
                if os.path.exists(ann_path):
                    with open(ann_path, "r") as f:
                        ann = json.load(f)
                    true_label = ann["objects"][0]["classTitle"]
                    true_group = self.group_map[true_label]

                    result["true_label"] = true_label
                    result["true_group"] = true_group
                    result["correct"] = true_group == pred_label
            except Exception as e:
                result["annotation_error"] = str(e)

        # Visualize if requested
        if show_image:
            self._visualize_prediction(image, result)

        return result

    def _visualize_prediction(self, image, result):
        """Visualize a single prediction."""
        plt.figure(figsize=(10, 6))

        # Image
        plt.subplot(1, 2, 1)
        plt.imshow(image)
        plt.axis("off")

        # Title with prediction
        title = f"Prediction: {result['prediction']}\nConfidence: {result['probability']:.2%}"
        if "true_group" in result:
            title += f"\nTrue: {result['true_group']}"
            if result["correct"]:
                title += " ‚úì"
            else:
                title += " ‚úó"
        plt.title(title, fontsize=12, fontweight="bold")

        # Probability bars
        plt.subplot(1, 2, 2)
        probs = result["all_probabilities"]
        colors = ["green", "orange", "red"]
        bars = plt.barh(list(probs.keys()), list(probs.values()), color=colors)
        plt.xlabel("Probability")
        plt.title("Class Probabilities")
        plt.xlim(0, 1)

        # Highlight predicted class
        pred_idx = self.group_to_idx[result["prediction"]]
        bars[pred_idx].set_edgecolor("black")
        bars[pred_idx].set_linewidth(3)

        plt.tight_layout()
        plt.show()

    def predict_batch(self, image_paths, batch_size=32, show_progress=True):
        """
        Predict classes for multiple images.

        Args:
            image_paths: List of image file paths
            batch_size: Batch size for processing
            show_progress: Whether to show progress bar

        Returns:
            List of prediction result dictionaries
        """
        results = []

        iterator = tqdm(image_paths, desc="Processing images") if show_progress else image_paths

        for img_path in iterator:
            try:
                result = self.predict_single(img_path, show_image=False, check_annotation=True)
                results.append(result)
            except Exception as e:
                print(f"Error processing {img_path}: {e}")
                results.append({"image_path": img_path, "error": str(e)})

        return results

    def export_results(self, results, output_path="predictions.csv"):
        """
        Export prediction results to CSV.

        Args:
            results: List of prediction dictionaries
            output_path: Path for output CSV file
        """
        # Convert to DataFrame
        df_data = []
        for r in results:
            if "error" not in r:
                row = {
                    "image_path": r["image_path"],
                    "prediction": r["prediction"],
                    "probability": r["probability"],
                    "prob_benign": r["all_probabilities"]["benign"],
                    "prob_suspicious": r["all_probabilities"]["suspicious"],
                    "prob_melanoma": r["all_probabilities"]["melanoma"],
                }
                if "true_group" in r:
                    row["true_label"] = r["true_label"]
                    row["true_group"] = r["true_group"]
                    row["correct"] = r["correct"]
                df_data.append(row)

        df = pd.DataFrame(df_data)
        df.to_csv(output_path, index=False)
        print(f"‚úì Results exported to: {output_path}")
        print(f"  Total predictions: {len(df)}")

        return df

    def evaluate(self, test_loader):
        """
        Comprehensive evaluation on a test dataset.

        Args:
            test_loader: DataLoader for test set

        Returns:
            Dictionary with evaluation metrics
        """
        self.model.eval()

        all_preds = []
        all_labels = []
        all_probs = []

        with torch.no_grad():
            for images, labels in tqdm(test_loader, desc="Evaluating"):
                images = images.to(self.device)
                outputs = self.model(images)
                probs = F.softmax(outputs, dim=1)
                _, predicted = torch.max(outputs, 1)

                all_preds.extend(predicted.cpu().numpy())
                all_labels.extend(labels.numpy())
                all_probs.extend(probs.cpu().numpy())

        all_preds = np.array(all_preds)
        all_labels = np.array(all_labels)
        all_probs = np.array(all_probs)

        # Calculate metrics
        from sklearn.metrics import (
            accuracy_score,
            precision_score,
            recall_score,
            f1_score,
            confusion_matrix,
            classification_report,
        )

        metrics = {
            "accuracy": accuracy_score(all_labels, all_preds),
            "precision_macro": precision_score(all_labels, all_preds, average="macro", zero_division=0),
            "recall_macro": recall_score(all_labels, all_preds, average="macro", zero_division=0),
            "f1_macro": f1_score(all_labels, all_preds, average="macro", zero_division=0),
            "confusion_matrix": confusion_matrix(all_labels, all_preds),
            "classification_report": classification_report(
                all_labels, all_preds, target_names=self.class_names, digits=4, zero_division=0
            ),
        }

        # Per-class metrics (especially melanoma recall)
        metrics["per_class"] = {}
        for i, class_name in enumerate(self.class_names):
            class_preds_binary = (all_preds == i).astype(int)
            class_labels_binary = (all_labels == i).astype(int)

            metrics["per_class"][class_name] = {
                "precision": precision_score(class_labels_binary, class_preds_binary, zero_division=0),
                "recall": recall_score(class_labels_binary, class_preds_binary, zero_division=0),
                "f1": f1_score(class_labels_binary, class_preds_binary, zero_division=0),
                "support": np.sum(all_labels == i),
            }

        return metrics, all_preds, all_labels, all_probs

    def plot_confusion_matrix(self, confusion_matrix, save_path=None):
        """Plot confusion matrix."""
        plt.figure(figsize=(10, 8))
        sns.heatmap(
            confusion_matrix,
            annot=True,
            fmt="d",
            cmap="Blues",
            xticklabels=self.class_names,
            yticklabels=self.class_names,
        )
        plt.ylabel("True Label")
        plt.xlabel("Predicted Label")
        plt.title("Confusion Matrix - Final Model", fontsize=14, fontweight="bold")

        if save_path:
            plt.savefig(save_path, dpi=300, bbox_inches="tight")
            print(f"‚úì Confusion matrix saved to: {save_path}")

        plt.show()

    def print_clinical_metrics(self, metrics):
        """Print clinical-focused metrics."""
        print("\n" + "=" * 70)
        print("CLINICAL METRICS - RECALL FOCUSED")
        print("=" * 70)

        for class_name in self.class_names:
            class_metrics = metrics["per_class"][class_name]
            print(f"\n{class_name.upper()}:")
            print(f"  Recall (Sensitivity): {class_metrics['recall']:.4f}", end="")
            if class_name == "melanoma":
                print(" ‚≠ê CRITICAL METRIC!", end="")
            print()
            print(f"  Precision:            {class_metrics['precision']:.4f}")
            print(f"  F1-Score:             {class_metrics['f1']:.4f}")
            print(f"  Support:              {class_metrics['support']}")

        melanoma_recall = metrics["per_class"]["melanoma"]["recall"]
        print("\n" + "=" * 70)
        print(f"Overall Accuracy:         {metrics['accuracy']:.4f}")
        print(f"Macro Recall (avg):       {metrics['recall_macro']:.4f}")
        print(f"üî¥ MELANOMA RECALL:        {melanoma_recall:.4f} ‚Üê PRIMARY METRIC")
        print("=" * 70)

        # Clinical interpretation
        if melanoma_recall >= 0.95:
            print("\n‚úÖ EXCELLENT: Melanoma recall ‚â•95% - Clinically acceptable")
        elif melanoma_recall >= 0.90:
            print("\n‚úì GOOD: Melanoma recall ‚â•90% - Acceptable with monitoring")
        elif melanoma_recall >= 0.85:
            print("\n‚ö† WARNING: Melanoma recall <90% - Needs improvement")
        else:
            print("\n‚ùå CRITICAL: Melanoma recall <85% - NOT clinically safe")


print("‚úì MelanomaDetectionPipeline class defined")

## 5. Initialize Pipeline

Load the final model and create the pipeline instance.

In [None]:
# Initialize the pipeline
pipeline = MelanomaDetectionPipeline(MODEL_CONFIG, device=device)

print("\n‚úì Pipeline ready for predictions!")
print(f"  Model: {MODEL_CONFIG['model_name']}")
print(f"  Device: {device}")
print(f"  Input size: {MODEL_CONFIG['input_size']}x{MODEL_CONFIG['input_size']}")

## 6. Example: Single Image Prediction

Test the pipeline with a single image.

In [None]:
# Example: Predict on a single image
example_image = "data/ham10000/ds/img/ISIC_0024353.jpg"

if os.path.exists(example_image):
    result = pipeline.predict_single(example_image, show_image=True, check_annotation=True)

    print("\nDetailed Results:")
    print(f"  Image: {result['image_path']}")
    print(f"  Prediction: {result['prediction']}")
    print(f"  Confidence: {result['probability']:.2%}")
    print(f"\n  All Probabilities:")
    for class_name, prob in result["all_probabilities"].items():
        print(f"    {class_name}: {prob:.2%}")

    if "true_group" in result:
        print(f"\n  Ground Truth: {result['true_label']} ({result['true_group']})")
        print(f"  Correct: {'‚úì' if result['correct'] else '‚úó'}")
else:
    print(f"Example image not found: {example_image}")
    print("Please update the path to an existing image in your dataset.")

## 7. Batch Prediction on Multiple Images

Process multiple images and export results.

In [None]:
# Get a sample of images for batch prediction
image_dir = "data/ham10000/ds/img"

if os.path.exists(image_dir):
    # Get first 50 images as a sample
    all_images = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if f.endswith(".jpg")]
    sample_images = all_images[:50]

    print(f"Processing {len(sample_images)} sample images...")

    # Batch prediction
    batch_results = pipeline.predict_batch(sample_images, show_progress=True)

    # Export results
    results_df = pipeline.export_results(batch_results, "sample_predictions.csv")

    # Display summary
    print("\n" + "=" * 70)
    print("BATCH PREDICTION SUMMARY")
    print("=" * 70)

    # Count predictions by class
    pred_counts = results_df["prediction"].value_counts()
    print("\nPredictions by Class:")
    for class_name, count in pred_counts.items():
        print(f"  {class_name}: {count} ({count/len(results_df)*100:.1f}%)")

    # Accuracy if ground truth available
    if "correct" in results_df.columns:
        accuracy = results_df["correct"].mean()
        print(f"\nAccuracy on sample: {accuracy:.2%}")

        # Per-class accuracy
        print("\nPer-Class Performance:")
        for class_name in MODEL_CONFIG["class_names"]:
            class_df = results_df[results_df["true_group"] == class_name]
            if len(class_df) > 0:
                class_acc = class_df["correct"].mean()
                print(f"  {class_name}: {class_acc:.2%} ({len(class_df)} samples)")

    # Display first few results
    print("\nFirst 10 predictions:")
    display_cols = ["image_path", "prediction", "probability", "true_group", "correct"]
    display_cols = [col for col in display_cols if col in results_df.columns]
    print(results_df[display_cols].head(10).to_string(index=False))
else:
    print(f"Image directory not found: {image_dir}")

## 8. Full Test Set Evaluation

Evaluate the model on the complete test set with comprehensive metrics.

In [None]:
# Load test dataset
from torch.utils.data import Dataset, DataLoader


class HAM10000Dataset(Dataset):
    """Dataset class for HAM10000."""

    def __init__(self, image_dir, ann_dir, image_files, transform=None):
        self.image_dir = image_dir
        self.ann_dir = ann_dir
        self.image_files = image_files
        self.transform = transform

        self.group_map = MODEL_CONFIG["group_map"]
        self.group_to_idx = {name: i for i, name in enumerate(MODEL_CONFIG["class_names"])}

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        img_name = self.image_files[idx]
        img_path = os.path.join(self.image_dir, img_name)

        # Load image
        image = Image.open(img_path).convert("RGB")

        # Load annotation
        ann_path = os.path.join(self.ann_dir, img_name + ".json")
        try:
            with open(ann_path, "r") as f:
                ann = json.load(f)
            original_label = ann["objects"][0]["classTitle"]
            group_label = self.group_map[original_label]
            label = self.group_to_idx[group_label]
        except Exception as e:
            label = 0  # Default to benign

        if self.transform:
            image = self.transform(image)

        return image, label


# Load test files
if os.path.exists("test.csv"):
    test_df = pd.read_csv("test.csv")
    test_files = [os.path.basename(p) for p in test_df["image_path"].values]
    print(f"Loaded {len(test_files)} test files from test.csv")
else:
    # Fallback: use last 15% of images as test set
    print("test.csv not found, using fallback test set")
    all_files = [f for f in os.listdir(image_dir) if f.endswith(".jpg")]
    test_files = all_files[-int(len(all_files) * 0.15) :]
    print(f"Using {len(test_files)} images as test set")

# Create test dataset and dataloader
image_dir = "data/ham10000/ds/img"
ann_dir = "data/ham10000/ds/ann"

test_dataset = HAM10000Dataset(image_dir, ann_dir, test_files, transform=pipeline.transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

print(f"‚úì Test dataset created with {len(test_dataset)} samples")

In [None]:
# Evaluate on test set
print("Evaluating model on test set...")
metrics, preds, labels, probs = pipeline.evaluate(test_loader)

# Print classification report
print("\n" + "=" * 70)
print("CLASSIFICATION REPORT")
print("=" * 70)
print(metrics["classification_report"])

# Print clinical metrics
pipeline.print_clinical_metrics(metrics)

In [None]:
# Plot confusion matrix
pipeline.plot_confusion_matrix(metrics["confusion_matrix"], save_path="final_model_confusion_matrix.png")

## 9. Visualize Sample Predictions

Display predictions for a few test images to qualitatively assess model performance.

In [None]:
# Visualize predictions for sample images from each class
import random

# Get samples from each class
samples_per_class = 2

for class_idx, class_name in enumerate(MODEL_CONFIG["class_names"]):
    print(f"\n{'='*70}")
    print(f"Examples of {class_name.upper()} lesions:")
    print("=" * 70)

    # Find images of this class
    class_images = []
    for img_file in test_files[:100]:  # Check first 100 for efficiency
        img_path = os.path.join(image_dir, img_file)
        ann_path = os.path.join(ann_dir, img_file + ".json")

        try:
            with open(ann_path, "r") as f:
                ann = json.load(f)
            original_label = ann["objects"][0]["classTitle"]
            group_label = MODEL_CONFIG["group_map"][original_label]

            if group_label == class_name:
                class_images.append(img_path)

            if len(class_images) >= samples_per_class:
                break
        except:
            continue

    # Predict and visualize
    for img_path in class_images[:samples_per_class]:
        result = pipeline.predict_single(img_path, show_image=True, check_annotation=True)

## 10. Error Analysis

Analyze misclassified samples to understand model limitations.

In [None]:
# Find misclassified examples
misclassified_indices = np.where(preds != labels)[0]

print(
    f"Total misclassifications: {len(misclassified_indices)} out of {len(labels)} ({len(misclassified_indices)/len(labels)*100:.1f}%)"
)

# Analyze misclassification patterns
print("\n" + "=" * 70)
print("MISCLASSIFICATION PATTERNS")
print("=" * 70)

for true_idx, true_name in enumerate(MODEL_CONFIG["class_names"]):
    for pred_idx, pred_name in enumerate(MODEL_CONFIG["class_names"]):
        if true_idx != pred_idx:
            # Count this type of error
            error_mask = (labels == true_idx) & (preds == pred_idx)
            error_count = np.sum(error_mask)

            if error_count > 0:
                total_true = np.sum(labels == true_idx)
                print(
                    f"\n{true_name} ‚Üí {pred_name}: {error_count} errors ({error_count/total_true*100:.1f}% of {true_name})"
                )

# Show a few misclassified examples (especially melanoma misclassifications)
print("\n" + "=" * 70)
print("CRITICAL MISCLASSIFICATIONS (Melanoma Cases)")
print("=" * 70)

melanoma_idx = MODEL_CONFIG["class_names"].index("melanoma")
melanoma_misclassified = np.where((labels == melanoma_idx) & (preds != melanoma_idx))[0]

if len(melanoma_misclassified) > 0:
    print(f"\nFound {len(melanoma_misclassified)} melanoma cases that were misclassified")
    print("Showing first 3 examples:\n")

    for i, idx in enumerate(melanoma_misclassified[:3]):
        img_file = test_files[idx]
        img_path = os.path.join(image_dir, img_file)

        print(f"\nExample {i+1}:")
        print(f"  True: melanoma")
        print(f"  Predicted: {MODEL_CONFIG['class_names'][preds[idx]]}")
        print(f"  Confidence: {probs[idx][preds[idx]]:.2%}")
        print(f"  Melanoma probability: {probs[idx][melanoma_idx]:.2%}")

        # Visualize
        result = pipeline.predict_single(img_path, show_image=True, check_annotation=True)
else:
    print("\n‚úÖ No melanoma cases were misclassified!")

## 11. Export Full Test Results

Save all test predictions to a CSV file for further analysis.

In [None]:
# Create comprehensive results dataframe
test_results = []

for idx, (img_file, pred, label, prob) in enumerate(zip(test_files, preds, labels, probs)):
    img_path = os.path.join(image_dir, img_file)

    # Get annotation
    ann_path = os.path.join(ann_dir, img_file + ".json")
    try:
        with open(ann_path, "r") as f:
            ann = json.load(f)
        true_label = ann["objects"][0]["classTitle"]
    except:
        true_label = "unknown"

    result = {
        "image_file": img_file,
        "image_path": img_path,
        "true_label": true_label,
        "true_group": MODEL_CONFIG["class_names"][label],
        "predicted_group": MODEL_CONFIG["class_names"][pred],
        "correct": (pred == label),
        "confidence": float(prob[pred]),
        "prob_benign": float(prob[0]),
        "prob_suspicious": float(prob[1]),
        "prob_melanoma": float(prob[2]),
    }
    test_results.append(result)

# Convert to DataFrame
test_results_df = pd.DataFrame(test_results)

# Save to CSV
output_path = "final_model_test_results.csv"
test_results_df.to_csv(output_path, index=False)

print(f"‚úì Test results saved to: {output_path}")
print(f"  Total samples: {len(test_results_df)}")
print(f"  Columns: {', '.join(test_results_df.columns)}")

# Display summary statistics
print("\n" + "=" * 70)
print("TEST RESULTS SUMMARY")
print("=" * 70)

print(f"\nOverall Accuracy: {test_results_df['correct'].mean():.2%}")

print("\nPrediction Distribution:")
print(test_results_df["predicted_group"].value_counts())

print("\nTrue Label Distribution:")
print(test_results_df["true_group"].value_counts())

print("\nAverage Confidence by Class:")
for class_name in MODEL_CONFIG["class_names"]:
    class_df = test_results_df[test_results_df["predicted_group"] == class_name]
    if len(class_df) > 0:
        avg_conf = class_df["confidence"].mean()
        print(f"  {class_name}: {avg_conf:.2%}")

# Display first few rows
print("\nFirst 10 test results:")
display_cols = ["image_file", "true_group", "predicted_group", "confidence", "correct"]
print(test_results_df[display_cols].head(10).to_string(index=False))

## 12. Pipeline Summary and Usage Guide

Complete guide for using this pipeline in production.

### Quick Reference Guide

#### 1. Initialize Pipeline
```python
from pipeline_code_above import MelanomaDetectionPipeline

config = {
    "weights_path": "final_model/melanoma_model_improved_weights.pth",
    "model_name": "resnet18",  # or your model architecture
    "input_size": 224,
    "num_classes": 3,
    "class_names": ["benign", "suspicious", "melanoma"]
}

pipeline = MelanomaDetectionPipeline(config)
```

#### 2. Single Image Prediction
```python
result = pipeline.predict_single("path/to/image.jpg", show_image=True)
print(f"Prediction: {result['prediction']}")
print(f"Confidence: {result['probability']:.2%}")
```

#### 3. Batch Prediction
```python
image_paths = ["image1.jpg", "image2.jpg", "image3.jpg"]
results = pipeline.predict_batch(image_paths)
df = pipeline.export_results(results, "predictions.csv")
```

#### 4. Model Evaluation
```python
metrics, preds, labels, probs = pipeline.evaluate(test_loader)
pipeline.print_clinical_metrics(metrics)
pipeline.plot_confusion_matrix(metrics['confusion_matrix'])
```

### Key Features

‚úÖ **Production Ready**: Robust error handling and logging  
‚úÖ **Clinical Focus**: Emphasis on melanoma recall (sensitivity)  
‚úÖ **Visualization**: Built-in plotting for predictions and metrics  
‚úÖ **Export Capability**: Save results to CSV for analysis  
‚úÖ **Batch Processing**: Efficient processing of multiple images  
‚úÖ **Comprehensive Metrics**: Accuracy, precision, recall, F1, confusion matrix

### Model Performance Summary

**Model**: {model_name}  
**Architecture**: {architecture_details}  
**Test Accuracy**: {test_accuracy}  
**Melanoma Recall**: {melanoma_recall} ‚≠ê (Most Important)  

### Clinical Interpretation

- **Benign**: Low-risk lesions, no immediate action needed
- **Suspicious**: Moderate-risk, recommend clinical examination
- **Melanoma**: High-risk, urgent medical attention required

**Note**: This model is designed to prioritize sensitivity for melanoma detection, meaning it may flag suspicious cases for further examination rather than miss dangerous lesions. This is the clinically appropriate approach.

### Next Steps for Deployment

1. **API Development**: Wrap pipeline in FastAPI/Flask REST API
2. **Model Monitoring**: Track prediction distributions in production
3. **ONNX Export**: Convert model for cross-platform deployment
4. **Mobile Optimization**: Quantize model for mobile devices
5. **Regulatory Compliance**: Prepare documentation for FDA/CE marking

### Files Generated

- `final_model_test_results.csv`: Complete test set predictions
- `final_model_confusion_matrix.png`: Confusion matrix visualization
- `sample_predictions.csv`: Sample batch predictions

### Support and Documentation

For questions or issues:
- Check model training notebook: `6_model_improvement.ipynb`
- Review hyperparameter optimization: `optuna_trials_report.txt`
- Consult model configuration: `model_results_improved.json`

In [None]:
# Final Summary
print("=" * 70)
print("MELANOMA DETECTION PIPELINE - FINAL SUMMARY")
print("=" * 70)

print(f"\n‚úì Model: {MODEL_CONFIG['model_name']}")
print(f"‚úì Weights: {MODEL_CONFIG['weights_path']}")
print(f"‚úì Device: {device}")
print(f"‚úì Classes: {', '.join(MODEL_CONFIG['class_names'])}")

if "metrics" in locals():
    print(f"\n‚úì Test Accuracy: {metrics['accuracy']:.2%}")
    print(f"‚úì Melanoma Recall: {metrics['per_class']['melanoma']['recall']:.2%} ‚≠ê")
    print(f"‚úì Test Samples: {len(test_dataset)}")

print("\n‚úì Pipeline is ready for production use!")
print("\n" + "=" * 70)