# MVTec-AD Unified Evaluation Notebook

This notebook provides a unified approach to evaluate multiple anomaly detection models (GLASS, DDAD, DiffusionAD, Dinomaly) on the MVTec-AD dataset.

## Features:
- Automatic dataset download
- GPU-enabled training and evaluation
- Unified metrics computation
- Results visualization
- Statistical analysis

## 1. Setup Environment

In [None]:
# Check if running in Google Colab
import sys
IN_COLAB = 'google.colab' in sys.modules

if IN_COLAB:
    print("Running in Google Colab")
    !nvidia-smi
else:
    print("Running in local environment")

In [None]:
# Clone the repository if in Colab
if IN_COLAB:
    !git clone https://github.com/Christian-Beddawi/Deep-Learning-for-Industrial-Image-Anomaly-Detection.git
    %cd Deep-Learning-for-Industrial-Image-Anomaly-Detection
    !ls -la

## 2. Install Dependencies

In [None]:
# Install required packages
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q scikit-learn scikit-image opencv-python-headless
!pip install -q tqdm pandas numpy matplotlib seaborn
!pip install -q pyyaml click

In [None]:
# Import necessary libraries
import os
import sys
import json
import yaml
import time
from pathlib import Path
from datetime import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torchvision
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm

# Set style for plots
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Check PyTorch and CUDA
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU: {torch.cuda.get_device_name(0)}")

## 3. Download MVTec-AD Dataset

In [None]:
# Set data directory based on environment
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    DATA_DIR = "/content/drive/MyDrive/mvtec_data"
else:
    DATA_DIR = "./datasets"

print(f"Data directory: {DATA_DIR}")

In [None]:
# Download MVTec-AD dataset
sys.path.append('./scripts')
from download_mvtec import download_mvtec_ad

# Download dataset (will skip if already exists)
dataset_path = download_mvtec_ad(data_dir=DATA_DIR, force_download=False)
print(f"\nDataset ready at: {dataset_path}")

## 4. Configure Evaluation Settings

In [None]:
# Load configuration
config_path = "configs/unified_config.yaml"
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Update dataset path
config['dataset']['path'] = str(dataset_path)

# Configure which models to evaluate
EVALUATE_MODELS = {
    'glass': True,
    'ddad': True,
    'diffusion_ad': True,
    'dinomaly': True
}

# Update config
for model_name, enabled in EVALUATE_MODELS.items():
    if model_name in config['models']:
        config['models'][model_name]['enabled'] = enabled

# Configure evaluation settings
config['evaluation']['batch_size'] = 16 if torch.cuda.is_available() else 4
config['evaluation']['num_workers'] = 2 if IN_COLAB else 4

print("Configuration updated:")
print(f"  Dataset path: {config['dataset']['path']}")
print(f"  Batch size: {config['evaluation']['batch_size']}")
print(f"  Models to evaluate: {[k for k, v in EVALUATE_MODELS.items() if v]}")

## 5. Run Unified Evaluation

In [None]:
# Import evaluation module
from unified_evaluation import UnifiedEvaluator

# Save temporary config
import tempfile
with tempfile.NamedTemporaryFile(mode='w', suffix='.yaml', delete=False) as f:
    yaml.dump(config, f)
    temp_config_path = f.name

# Create evaluator
evaluator = UnifiedEvaluator(temp_config_path)

print("\nStarting evaluation...")
print("This may take a while depending on the number of models and GPU availability.")

In [None]:
# Run evaluation for all models
start_time = time.time()
results = evaluator.evaluate_all_models()
total_time = time.time() - start_time

print(f"\nTotal evaluation time: {total_time/60:.2f} minutes")

# Clean up temp config
os.unlink(temp_config_path)

## 6. Analyze Results

In [None]:
# Create summary DataFrame
summary_data = []
for model_name, model_results in results.items():
    if 'overall' in model_results:
        row = {'Model': model_name.upper()}
        # Extract key metrics
        for metric in ['image_auroc_mean', 'pixel_auroc_mean', 'pro_score_mean']:
            if metric in model_results['overall']:
                row[metric.replace('_mean', '').replace('_', ' ').title()] = model_results['overall'][metric]
        summary_data.append(row)

df_summary = pd.DataFrame(summary_data)
df_summary = df_summary.round(4)

print("\nEVALUATION SUMMARY")
print("="*50)
print(df_summary.to_string(index=False))

In [None]:
# Create detailed results by category
detailed_data = []
for model_name, model_results in results.items():
    for category, cat_results in model_results.items():
        if category != 'overall' and isinstance(cat_results, dict) and 'error' not in cat_results:
            row = {
                'Model': model_name.upper(),
                'Category': category,
            }
            # Extract metrics
            for metric in ['image_auroc', 'pixel_auroc', 'pro_score']:
                if metric in cat_results:
                    row[metric.replace('_', ' ').title()] = cat_results[metric]
            detailed_data.append(row)

df_detailed = pd.DataFrame(detailed_data)

# Show sample of detailed results
print("\nDETAILED RESULTS (first 10 rows)")
print("="*50)
print(df_detailed.head(10).to_string(index=False))

## 7. Visualize Results

In [None]:
# Plot comparison of models across metrics
if not df_summary.empty:
    fig, axes = plt.subplots(1, 3, figsize=(15, 5))
    
    metrics_to_plot = ['Image Auroc', 'Pixel Auroc', 'Pro Score']
    
    for idx, metric in enumerate(metrics_to_plot):
        if metric in df_summary.columns:
            ax = axes[idx]
            df_summary.plot(x='Model', y=metric, kind='bar', ax=ax, legend=False)
            ax.set_title(f'{metric} Comparison', fontsize=14, fontweight='bold')
            ax.set_xlabel('Model', fontsize=12)
            ax.set_ylabel(metric, fontsize=12)
            ax.set_ylim([0.8, 1.0] if 'auroc' in metric.lower() else [0.6, 1.0])
            ax.grid(axis='y', alpha=0.3)
            
            # Add value labels on bars
            for container in ax.containers:
                ax.bar_label(container, fmt='%.3f')
    
    plt.suptitle('Model Performance Comparison on MVTec-AD', fontsize=16, fontweight='bold', y=1.02)
    plt.tight_layout()
    plt.show()

In [None]:
# Plot performance by category
if not df_detailed.empty:
    fig, ax = plt.subplots(figsize=(15, 8))
    
    # Pivot data for heatmap
    metric_to_show = 'Image Auroc'
    if metric_to_show in df_detailed.columns:
        pivot_data = df_detailed.pivot(index='Category', columns='Model', values=metric_to_show)
        
        # Create heatmap
        sns.heatmap(pivot_data, annot=True, fmt='.3f', cmap='YlOrRd', 
                    vmin=0.8, vmax=1.0, cbar_kws={'label': metric_to_show})
        plt.title(f'{metric_to_show} Performance Across Categories', fontsize=14, fontweight='bold')
        plt.xlabel('Model', fontsize=12)
        plt.ylabel('Category', fontsize=12)
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.show()

In [None]:
# Plot category-wise performance distribution
if not df_detailed.empty:
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    axes = axes.ravel()
    
    models = df_detailed['Model'].unique()
    
    for idx, model in enumerate(models[:4]):
        ax = axes[idx]
        model_data = df_detailed[df_detailed['Model'] == model]
        
        if 'Image Auroc' in model_data.columns:
            model_data.plot(x='Category', y='Image Auroc', kind='bar', ax=ax, legend=False)
            ax.set_title(f'{model} Performance by Category', fontsize=12, fontweight='bold')
            ax.set_xlabel('Category', fontsize=10)
            ax.set_ylabel('Image AUROC', fontsize=10)
            ax.set_ylim([0.7, 1.0])
            ax.tick_params(axis='x', rotation=45, labelsize=8)
            ax.grid(axis='y', alpha=0.3)
    
    plt.suptitle('Category-wise Performance Distribution', fontsize=14, fontweight='bold')
    plt.tight_layout()
    plt.show()

## 8. Statistical Analysis

In [None]:
# Compute statistical significance (if multiple runs available)
print("\nSTATISTICAL ANALYSIS")
print("="*50)

# Compute variance and confidence intervals
for model_name, model_results in results.items():
    if 'overall' in model_results:
        print(f"\n{model_name.upper()}:")
        for metric in ['image_auroc', 'pixel_auroc', 'pro_score']:
            mean_key = f"{metric}_mean"
            std_key = f"{metric}_std"
            
            if mean_key in model_results['overall'] and std_key in model_results['overall']:
                mean_val = model_results['overall'][mean_key]
                std_val = model_results['overall'][std_key]
                
                # 95% confidence interval
                ci_lower = mean_val - 1.96 * std_val / np.sqrt(15)  # 15 categories
                ci_upper = mean_val + 1.96 * std_val / np.sqrt(15)
                
                print(f"  {metric}:")
                print(f"    Mean: {mean_val:.4f} ± {std_val:.4f}")
                print(f"    95% CI: [{ci_lower:.4f}, {ci_upper:.4f}]")

In [None]:
# Identify best performing model for each category
if not df_detailed.empty and 'Image Auroc' in df_detailed.columns:
    print("\nBEST MODEL PER CATEGORY")
    print("="*50)
    
    categories = df_detailed['Category'].unique()
    
    for category in categories:
        cat_data = df_detailed[df_detailed['Category'] == category]
        if not cat_data.empty:
            best_idx = cat_data['Image Auroc'].idxmax()
            best_model = cat_data.loc[best_idx, 'Model']
            best_score = cat_data.loc[best_idx, 'Image Auroc']
            print(f"{category:15} -> {best_model:10} (AUROC: {best_score:.4f})")

## 9. Save Results

In [None]:
# Save results to files
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
results_dir = Path("results")
results_dir.mkdir(exist_ok=True)

# Save JSON results
json_path = results_dir / f"mvtec_results_{timestamp}.json"
with open(json_path, 'w') as f:
    json.dump(results, f, indent=2, default=float)
print(f"Results saved to {json_path}")

# Save summary CSV
if not df_summary.empty:
    csv_path = results_dir / f"mvtec_summary_{timestamp}.csv"
    df_summary.to_csv(csv_path, index=False)
    print(f"Summary saved to {csv_path}")

# Save detailed CSV
if not df_detailed.empty:
    csv_path = results_dir / f"mvtec_detailed_{timestamp}.csv"
    df_detailed.to_csv(csv_path, index=False)
    print(f"Detailed results saved to {csv_path}")

## 10. Generate LaTeX Table for Paper

In [None]:
# Generate LaTeX table for the paper
if not df_summary.empty:
    print("\nLaTeX Table for Paper:")
    print("="*50)
    
    # Format for LaTeX
    latex_df = df_summary.copy()
    
    # Convert to LaTeX
    latex_str = latex_df.to_latex(
        index=False,
        column_format='l' + 'c' * (len(latex_df.columns) - 1),
        float_format="%.3f",
        caption="Comparison of SOTA IAD models on MVTec-AD dataset",
        label="tab:mvtec_results"
    )
    
    print(latex_str)
    
    # Save to file
    latex_path = results_dir / f"mvtec_table_{timestamp}.tex"
    with open(latex_path, 'w') as f:
        f.write(latex_str)
    print(f"\nLaTeX table saved to {latex_path}")

## Summary

This notebook has successfully:
1. Downloaded the MVTec-AD dataset
2. Configured the evaluation environment
3. Evaluated multiple anomaly detection models
4. Computed comprehensive metrics (Image AUROC, Pixel AUROC, PRO Score)
5. Performed statistical analysis
6. Generated visualizations
7. Saved results in multiple formats

The results are ready to be included in your paper revision!