## 1. Setup & Load Model

In [None]:
import os
import torch
import cv2
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, roc_auc_score, confusion_matrix
from anomalib.models import Patchcore
import warnings
warnings.filterwarnings('ignore')

# Device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device: {DEVICE}")

# Paths
CHECKPOINT_DIR = Path("checkpoints")
TEST_GOOD = Path("dataset") / "test" / "good"
TEST_DEFECT = Path("dataset") / "test" / "defect"
RESULTS_DIR = Path("evaluation_results")
RESULTS_DIR.mkdir(exist_ok=True)

# Load model
model = Patchcore.load_from_checkpoint(CHECKPOINT_DIR / "patchcore_trained.ckpt")
model = model.to(DEVICE)
model.eval()

print("‚úì Model loaded successfully")

## 2. Prepare Test Data

In [None]:
from torchvision import transforms
from PIL import Image

# Prepare transform (same as training)
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225])
])

def load_test_images(folder_path):
    """
    Load all images from a folder.
    """
    images = []
    paths = []
    
    for ext in ['*.png', '*.jpg', '*.jpeg']:
        for img_path in Path(folder_path).glob(ext):
            try:
                img = Image.open(img_path).convert('RGB')
                img_tensor = test_transform(img)
                images.append(img_tensor)
                paths.append(img_path)
            except Exception as e:
                print(f"Error loading {img_path}: {e}")
    
    return images, paths

# Load test images
print("Loading test images...")
good_images, good_paths = load_test_images(TEST_GOOD)
defect_images, defect_paths = load_test_images(TEST_DEFECT)

print(f"‚úì Good images: {len(good_images)}")
print(f"‚úì Defect images: {len(defect_images)}")
print(f"‚úì Total test images: {len(good_images) + len(defect_images)}")

## 3. Generate Anomaly Scores

In [None]:
def predict_anomaly_score(model, images, device):
    """
    Generate anomaly scores for a batch of images.
    Returns: anomaly scores (higher = more anomalous)
    """
    scores = []
    
    with torch.no_grad():
        for img in images:
            img_batch = img.unsqueeze(0).to(device)
            output = model.predict(img_batch)
            
            # Extract anomaly score
            if isinstance(output, dict):
                score = output.get('anomaly_score', output.get('score', 0.0))
            else:
                score = output.item() if isinstance(output, torch.Tensor) else float(output)
            
            scores.append(score)
    
    return np.array(scores)

# Generate scores
print("Generating anomaly scores...")
good_scores = predict_anomaly_score(model, good_images, DEVICE)
defect_scores = predict_anomaly_score(model, defect_images, DEVICE)

print(f"‚úì Good images anomaly scores generated")
print(f"  Mean: {good_scores.mean():.4f}, Std: {good_scores.std():.4f}")
print(f"  Min: {good_scores.min():.4f}, Max: {good_scores.max():.4f}")

print(f"‚úì Defect images anomaly scores generated")
print(f"  Mean: {defect_scores.mean():.4f}, Std: {defect_scores.std():.4f}")
print(f"  Min: {defect_scores.min():.4f}, Max: {defect_scores.max():.4f}")

## 4. ROC Curve & Metrics

In [None]:
# Combine labels: 0 = good, 1 = defect
y_true = np.concatenate([np.zeros(len(good_scores)), np.ones(len(defect_scores))])
y_scores = np.concatenate([good_scores, defect_scores])

# Calculate ROC curve
fpr, tpr, thresholds = roc_curve(y_true, y_scores)
roc_auc = auc(fpr, tpr)

# Find optimal threshold (Youden's index)
youden_index = tpr - fpr
optimal_idx = np.argmax(youden_index)
optimal_threshold = thresholds[optimal_idx]

print("="*50)
print("EVALUATION METRICS")
print("="*50)
print(f"\nROC-AUC Score: {roc_auc:.4f}")
print(f"Optimal Threshold: {optimal_threshold:.4f}")
print(f"Youden Index: {youden_index[optimal_idx]:.4f}")
print(f"\nAt optimal threshold:")
print(f"  True Positive Rate: {tpr[optimal_idx]:.4f}")
print(f"  False Positive Rate: {fpr[optimal_idx]:.4f}")

# Confusion matrix
y_pred = (y_scores >= optimal_threshold).astype(int)
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()

print(f"\nConfusion Matrix:")
print(f"  True Negatives (Good detected as good): {tn}")
print(f"  False Positives (Good detected as defect): {fp}")
print(f"  False Negatives (Defect detected as good): {fn}")
print(f"  True Positives (Defect detected as defect): {tp}")

sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0
specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
print(f"\nSensitivity (Recall): {sensitivity:.4f}")
print(f"Specificity: {specificity:.4f}")

## 5. ROC Curve Plot

In [None]:
# Plot ROC curve
plt.figure(figsize=(10, 7))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (AUC = {roc_auc:.4f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--', label='Random Classifier')
plt.scatter(fpr[optimal_idx], tpr[optimal_idx], color='red', s=100, marker='o', label=f'Optimal Threshold = {optimal_threshold:.4f}')

plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Patchcore Anomaly Detection')
plt.legend(loc="lower right")
plt.grid(alpha=0.3)
plt.tight_layout()

# Save
roc_path = RESULTS_DIR / "roc_curve.png"
plt.savefig(roc_path, dpi=150)
print(f"‚úì ROC curve saved to: {roc_path}")
plt.show()

## 6. Score Distribution Plot

In [None]:
# Plot score distributions
plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.hist(good_scores, bins=20, alpha=0.7, label='Good', color='green', edgecolor='black')
plt.hist(defect_scores, bins=20, alpha=0.7, label='Defect', color='red', edgecolor='black')
plt.axvline(optimal_threshold, color='blue', linestyle='--', linewidth=2, label=f'Threshold = {optimal_threshold:.4f}')
plt.xlabel('Anomaly Score')
plt.ylabel('Frequency')
plt.title('Anomaly Score Distribution')
plt.legend()
plt.grid(alpha=0.3)

plt.subplot(1, 2, 2)
plt.boxplot([good_scores, defect_scores], labels=['Good', 'Defect'])
plt.axhline(optimal_threshold, color='blue', linestyle='--', linewidth=2)
plt.ylabel('Anomaly Score')
plt.title('Score Distribution (Boxplot)')
plt.grid(alpha=0.3)

plt.tight_layout()

# Save
dist_path = RESULTS_DIR / "score_distribution.png"
plt.savefig(dist_path, dpi=150)
print(f"‚úì Distribution plot saved to: {dist_path}")
plt.show()

## 7. Sample Visualizations

In [None]:
# Visualize some examples with their scores
fig, axes = plt.subplots(3, 4, figsize=(16, 12))
fig.suptitle('Patchcore Anomaly Detection Examples', fontsize=14, fontweight='bold')

# Good images
for i in range(3):
    idx = i * 2 % len(good_paths)
    img = Image.open(good_paths[idx]).convert('RGB')
    score = good_scores[idx]
    
    axes[0, i].imshow(img)
    axes[0, i].set_title(f'Good (Score: {score:.3f})', color='green', fontweight='bold')
    axes[0, i].axis('off')

# Defect images
for i in range(3):
    idx = i * 2 % len(defect_paths)
    img = Image.open(defect_paths[idx]).convert('RGB')
    score = defect_scores[idx]
    
    axes[1, i].imshow(img)
    axes[1, i].set_title(f'Defect (Score: {score:.3f})', color='red', fontweight='bold')
    axes[1, i].axis('off')

# Hide unused subplots
for i in range(3, 4):
    for j in range(4):
        axes[i, j].axis('off')

plt.tight_layout()

# Save
samples_path = RESULTS_DIR / "sample_detections.png"
plt.savefig(samples_path, dpi=150)
print(f"‚úì Samples saved to: {samples_path}")
plt.show()

## 8. Save Evaluation Results

In [None]:
import json

# Save results summary
results = {
    "model": "Patchcore (wide_resnet50_2)",
    "metrics": {
        "roc_auc": float(roc_auc),
        "optimal_threshold": float(optimal_threshold),
        "sensitivity": float(sensitivity),
        "specificity": float(specificity),
        "true_positives": int(tp),
        "true_negatives": int(tn),
        "false_positives": int(fp),
        "false_negatives": int(fn)
    },
    "score_statistics": {
        "good_mean": float(good_scores.mean()),
        "good_std": float(good_scores.std()),
        "defect_mean": float(defect_scores.mean()),
        "defect_std": float(defect_scores.std())
    },
    "test_set_sizes": {
        "good_images": len(good_scores),
        "defect_images": len(defect_scores)
    }
}

results_path = RESULTS_DIR / "evaluation_results.json"
with open(results_path, 'w') as f:
    json.dump(results, f, indent=2)

print(f"‚úì Results saved to: {results_path}")

## 9. Summary

In [None]:
print("\n" + "="*60)
print("EVALUATION COMPLETE")
print("="*60)
print(f"\nüìä Results:")
print(f"  ROC-AUC: {roc_auc:.4f}")
print(f"  Optimal Threshold: {optimal_threshold:.4f}")
print(f"  Sensitivity: {sensitivity:.4f}")
print(f"  Specificity: {specificity:.4f}")
print(f"\nüìÅ Saved outputs:")
print(f"  - ROC curve: {roc_path}")
print(f"  - Score distribution: {dist_path}")
print(f"  - Sample detections: {samples_path}")
print(f"  - Metrics JSON: {results_path}")
print(f"\nüöÄ Next: Run 04_anomalib_inference.ipynb for inference on new images")
print("="*60)