Description: This notebook implements a defect detection system using a pre-trained CNN (ResNet50) on the MVTec AD dataset.

## Step 1: Setup and Imports

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from torchvision import transforms, datasets, models
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


## Step 2: Data Preparation
Assume you have preprocessed the MVTec AD dataset into `train/normal`, `train/defect`, `test/normal`, `test/defect`

In [3]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],
                         [0.229, 0.224, 0.225])
])

In [4]:
from torch.utils.data import Dataset
from glob import glob
import os

class MVTecBinaryClassificationDataset(Dataset):
    def __init__(self, root_dir, item_name, split='train', transform=None):
        self.transform = transform
        self.samples = []
        self.labels = []

        base_path = os.path.join(root_dir, item_name, split)
        if split == 'train':
            good_images = glob(os.path.join(base_path, 'good', '*.png'))
            self.samples += good_images
            self.labels += [0] * len(good_images)
        elif split == 'test':
            for defect_type in os.listdir(base_path):
                defect_path = os.path.join(base_path, defect_type)
                images = glob(os.path.join(defect_path, '*.png'))
                label = 0 if defect_type == 'good' else 1
                self.samples += images
                self.labels += [label] * len(images)

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path = self.samples[idx]
        label = self.labels[idx]
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image, label


In [5]:
item_name = 'bottle'  # Change this to the item you are evaluating

train_dataset = MVTecBinaryClassificationDataset(
    root_dir='mvtec_anomaly_detection',
    item_name=item_name,
    split='train',
    transform=transform
)

test_dataset = MVTecBinaryClassificationDataset(
    root_dir='mvtec_anomaly_detection',
    item_name=item_name,
    split='test',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


## Step 3: Load and Fine-tune the Pre-trained Model

In [6]:
model = models.resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\ankim/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth
100.0%


In [7]:
for param in model.parameters():
    param.requires_grad = False  # freeze base layers

In [8]:
# Replace final layer for binary classification
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 2)

In [9]:
model = model.to(device)

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

## Step 4: Training Loop

In [11]:
def train_model(model, dataloader, criterion, optimizer, epochs=5):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f"Epoch {epoch+1}, Loss: {running_loss/len(dataloader):.4f}, Accuracy: {correct/total:.4f}")

In [12]:
train_model(model, train_loader, criterion, optimizer, epochs=5)

Epoch 1, Loss: 0.1127, Accuracy: 0.9665
Epoch 2, Loss: 0.0001, Accuracy: 1.0000
Epoch 3, Loss: 0.0000, Accuracy: 1.0000
Epoch 4, Loss: 0.0000, Accuracy: 1.0000
Epoch 5, Loss: 0.0000, Accuracy: 1.0000


## Step 5: Evaluation

In [13]:
def evaluate_model(model, dataloader):
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.numpy())

    print("Classification Report:")
    print(classification_report(all_labels, all_preds, target_names=test_dataset.classes))

    cm = confusion_matrix(all_labels, all_preds)
    sns.heatmap(cm, annot=True, fmt='d', xticklabels=test_dataset.classes, yticklabels=test_dataset.classes)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.show()

In [14]:
evaluate_model(model, test_loader)

Classification Report:


AttributeError: 'MVTecBinaryClassificationDataset' object has no attribute 'classes'

Step 6: Visualizing with Grad-CAM (Optional Advanced)