In [None]:
!pip install -q scikit-learn tqdm

import os
import torch
from torch.utils.data import DataLoader, Dataset
import numpy as np
from torchvision import transforms
from PIL import Image

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f' Using device: {device}')

 Using device: cuda


### Question1


In [None]:
# !unzip /content/drive/MyDrive/bottle-20250407T063408Z-001.zip
!rm -r /content/bottle/ground_truth/

rm: cannot remove '/content/bottle/ground_truth/': No such file or directory


In [None]:
# Number of images used

def count_images_in_folder(folder_path):
    return len([f for f in os.listdir(folder_path) if f.endswith(('.png', '.jpg', '.jpeg'))])

base_dir = 'bottle'
train_dir = os.path.join(base_dir, 'train', 'good')
test_dir = os.path.join(base_dir, 'test')
test_classes = ['broken_large', 'broken_small', 'contamination', 'good']

total_images = 0
train_count = count_images_in_folder(train_dir)
total_images += train_count

test_counts = {}
for cls in test_classes:
    cls_path = os.path.join(test_dir, cls)
    test_counts[cls] = count_images_in_folder(cls_path)
    total_images += test_counts[cls]

print(f"訓練集圖片數量: {train_count}")
print("測試集各類別圖片數量:")
for k, v in test_counts.items():
    print(f"  - {k}: {v} 張")
print(f"總圖片數量: {total_images}")

訓練集圖片數量: 209
測試集各類別圖片數量:
  - broken_large: 20 張
  - broken_small: 22 張
  - contamination: 21 張
  - good: 20 張
總圖片數量: 292


In [None]:
# Image dimensions
sample_img_path = os.path.join(train_dir, os.listdir(train_dir)[0])
img = Image.open(sample_img_path)
print(f"影像尺寸: {img.size} (width x height)")

影像尺寸: (900, 900) (width x height)


### Question2

Method 1: Baseline Deep SVDD（Simple CNN）


In [None]:
# training
import os
import torch
import torch.nn as nn
from torchvision import transforms, datasets
from torch.utils.data import DataLoader
import numpy as np
from tqdm import tqdm
from sklearn.metrics import classification_report, roc_auc_score

# 1. 超參數
BATCH_SIZE = 32
IMAGE_SIZE = 128
EPOCHS = 20
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

# 2. 資料增強 + 前處理
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5], std=[0.5])
])

# 3. 載入訓練資料（只有 good）
train_dir = 'bottle/train/good'
train_dataset = datasets.ImageFolder(root=os.path.dirname(train_dir), transform=transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

# 4. CNN 特徵萃取模型
class SimpleCNN(nn.Module):
    def __init__(self, out_dim=128):
        super(SimpleCNN, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(),
            nn.AdaptiveAvgPool2d(1)
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128, out_dim)
        )
    def forward(self, x):
        x = self.features(x)
        x = self.fc(x)
        return x

model = SimpleCNN().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
criterion = nn.MSELoss()

# 5. 初始化中心點 c
def get_init_center(model, loader):
    model.eval()
    outputs = []
    with torch.no_grad():
        for x, _ in loader:
            x = x.to(DEVICE)
            out = model(x)
            outputs.append(out)
    all_feats = torch.cat(outputs)
    return torch.mean(all_feats, dim=0)

center_c = get_init_center(model, train_loader).detach()

# 6. 訓練 Deep SVDD：最小化與中心 c 的距離
for epoch in range(EPOCHS):
    model.train()
    losses = []
    for x, _ in train_loader:
        x = x.to(DEVICE)
        feat = model(x)
        loss = torch.mean(torch.sum((feat - center_c) ** 2, dim=1))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {np.mean(losses)}")


Epoch 1/20 - Loss: 0.00035729595141934363
Epoch 2/20 - Loss: 0.00013535481931674958
Epoch 3/20 - Loss: 7.424688711970313e-05
Epoch 4/20 - Loss: 4.010630722664895e-05
Epoch 5/20 - Loss: 2.2183275827306454e-05
Epoch 6/20 - Loss: 1.5340047996557716e-05
Epoch 7/20 - Loss: 1.106273781000969e-05
Epoch 8/20 - Loss: 9.03044747246895e-06
Epoch 9/20 - Loss: 8.225047232761945e-06
Epoch 10/20 - Loss: 7.1645557519722e-06
Epoch 11/20 - Loss: 6.6832679164820415e-06
Epoch 12/20 - Loss: 6.4873692541108796e-06
Epoch 13/20 - Loss: 6.498223813520911e-06
Epoch 14/20 - Loss: 7.000891595712996e-06
Epoch 15/20 - Loss: 7.488056066254753e-06
Epoch 16/20 - Loss: 6.67018931770664e-06
Epoch 17/20 - Loss: 5.9951876210107e-06
Epoch 18/20 - Loss: 6.028211893343334e-06
Epoch 19/20 - Loss: 6.1449779553056165e-06
Epoch 20/20 - Loss: 6.2518991269046505e-06


In [None]:
# validation
from PIL import Image
from sklearn.metrics import roc_auc_score, classification_report

# 1. 建立測試集資料
test_dir = 'bottle/test'
test_classes = ['good', 'broken_large', 'broken_small', 'contamination']

test_images = []
test_labels = []

for cls in test_classes:
    folder = os.path.join(test_dir, cls)
    for img_name in os.listdir(folder):
        if img_name.endswith(('.jpg', '.png', '.jpeg')):
            img_path = os.path.join(folder, img_name)
            test_images.append(img_path)
            test_labels.append(0 if cls == 'good' else 1)  # good = 0, abnormal = 1

# 2. 定義圖像前處理
def preprocess_image(path):
    image = Image.open(path).convert('RGB')
    image = transform(image)
    return image

# 3. 計算每張圖片的距離分數
model.eval()
scores = []

with torch.no_grad():
    for path in tqdm(test_images):
        img = preprocess_image(path).unsqueeze(0).to(DEVICE)
        feat = model(img)
        score = torch.sum((feat - center_c) ** 2, dim=1)  # 歐式距離平方
        scores.append(score.item())

# 4. 設定閾值（95 percentile of train set distance）
train_distances = []

with torch.no_grad():
    for x, _ in train_loader:
        x = x.to(DEVICE)
        feat = model(x)
        dist = torch.sum((feat - center_c) ** 2, dim=1)
        train_distances.extend(dist.cpu().numpy())

threshold = np.percentile(train_distances, 95)  # 調整這個也可以做 sensitivity 分析
print(f"使用的距離閾值：{threshold}")

# 5. 預測結果 & 評估
preds = [1 if score > threshold else 0 for score in scores]

print("\n📊 Classification Report:")
print(classification_report(test_labels, preds, target_names=['good', 'anomaly']))

auc = roc_auc_score(test_labels, scores)
print(f"🔍 ROC AUC Score: {auc:.4f}")


100%|██████████| 83/83 [00:03<00:00, 25.09it/s]


使用的距離閾值：1.6692018107278273e-05

📊 Classification Report:
              precision    recall  f1-score   support

        good       0.25      0.75      0.37        20
     anomaly       0.77      0.27      0.40        63

    accuracy                           0.39        83
   macro avg       0.51      0.51      0.39        83
weighted avg       0.65      0.39      0.39        83

🔍 ROC AUC Score: 0.5714


Method 2: ResNet18取代CNN

In [None]:
import os
import torch
import torch.nn as nn
from torchvision import models, transforms, datasets
from torch.utils.data import DataLoader
from PIL import Image
import numpy as np
from tqdm import tqdm
from sklearn.metrics import classification_report, roc_auc_score

# ==== 參數 ====
BATCH_SIZE = 32
IMAGE_SIZE = 224
EPOCHS = 20
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

# ==== 資料前處理 ====
transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet 的均值與標準差
                         std=[0.229, 0.224, 0.225])
])

# ==== 載入 train/good 圖片 ====
train_dir = 'bottle/train/good'
train_dataset = datasets.ImageFolder(root=os.path.dirname(train_dir), transform=transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

# ==== 修改 ResNet18 模型 ====
class ResNet18FeatureExtractor(nn.Module):
    def __init__(self, output_dim=128):
        super().__init__()
        resnet = models.resnet18(pretrained=True)
        modules = list(resnet.children())[:-1]  # 去掉最後分類層 fc
        self.backbone = nn.Sequential(*modules)
        self.fc = nn.Linear(512, output_dim)  # 加上轉換層

    def forward(self, x):
        x = self.backbone(x)
        x = x.view(x.size(0), -1)  # flatten (B, 512)
        x = self.fc(x)
        return x

model = ResNet18FeatureExtractor().to(DEVICE)

# ==== SVDD 損失與優化器 ====
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# ==== 初始化中心點 c ====
def get_init_center(model, loader):
    model.eval()
    outputs = []
    with torch.no_grad():
        for x, _ in loader:
            x = x.to(DEVICE)
            feat = model(x)
            outputs.append(feat)
    all_feats = torch.cat(outputs)
    return torch.mean(all_feats, dim=0)

center_c = get_init_center(model, train_loader).detach()

# ==== 訓練 Deep SVDD ====
for epoch in range(EPOCHS):
    model.train()
    losses = []
    for x, _ in train_loader:
        x = x.to(DEVICE)
        feat = model(x)
        loss = torch.mean(torch.sum((feat - center_c) ** 2, dim=1))  # L2 距離平方
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {np.mean(losses):.4f}")

# ==== 計算訓練資料距離以設定閾值 ====
train_distances = []
with torch.no_grad():
    for x, _ in train_loader:
        x = x.to(DEVICE)
        feat = model(x)
        dist = torch.sum((feat - center_c) ** 2, dim=1)
        train_distances.extend(dist.cpu().numpy())

threshold = np.percentile(train_distances, 95)
print(f"\n✅ 使用的距離閾值：{threshold}")

# ==== 測試 ====
test_dir = 'bottle/test'
test_classes = ['good', 'broken_large', 'broken_small', 'contamination']
test_images = []
test_labels = []

for cls in test_classes:
    folder = os.path.join(test_dir, cls)
    for img_name in os.listdir(folder):
        if img_name.endswith(('.jpg', '.png', '.jpeg')):
            img_path = os.path.join(folder, img_name)
            test_images.append(img_path)
            test_labels.append(0 if cls == 'good' else 1)

def preprocess_image(path):
    image = Image.open(path).convert('RGB')
    image = transform(image)
    return image

model.eval()
scores = []
with torch.no_grad():
    for path in tqdm(test_images):
        img = preprocess_image(path).unsqueeze(0).to(DEVICE)
        feat = model(img)
        score = torch.sum((feat - center_c) ** 2, dim=1)
        scores.append(score.item())

# ==== 評估 ====
preds = [1 if score > threshold else 0 for score in scores]

print("\n📊 Classification Report:")
print(classification_report(test_labels, preds, target_names=['good', 'anomaly']))

auc = roc_auc_score(test_labels, scores)
print(f"🔍 ROC AUC Score: {auc:.4f}")




Epoch 1/20 - Loss: 29.6427
Epoch 2/20 - Loss: 4.3604
Epoch 3/20 - Loss: 0.8684
Epoch 4/20 - Loss: 0.4549
Epoch 5/20 - Loss: 0.1662
Epoch 6/20 - Loss: 0.0784
Epoch 7/20 - Loss: 0.0510
Epoch 8/20 - Loss: 0.0326
Epoch 9/20 - Loss: 0.0248
Epoch 10/20 - Loss: 0.0194
Epoch 11/20 - Loss: 0.0167
Epoch 12/20 - Loss: 0.0148
Epoch 13/20 - Loss: 0.0134
Epoch 14/20 - Loss: 0.0124
Epoch 15/20 - Loss: 0.0116
Epoch 16/20 - Loss: 0.0110
Epoch 17/20 - Loss: 0.0104
Epoch 18/20 - Loss: 0.0098
Epoch 19/20 - Loss: 0.0094
Epoch 20/20 - Loss: 0.0089

✅ 使用的距離閾值：0.012546464800834656


100%|██████████| 83/83 [00:03<00:00, 24.30it/s]


📊 Classification Report:
              precision    recall  f1-score   support

        good       1.00      0.25      0.40        20
     anomaly       0.81      1.00      0.89        63

    accuracy                           0.82        83
   macro avg       0.90      0.62      0.65        83
weighted avg       0.85      0.82      0.77        83

🔍 ROC AUC Score: 0.9960





Method 3: 加上data augmentation

In [None]:
# 相比於方法二，只需要調整transform。
train_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(degrees=10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.05),
    transforms.RandomResizedCrop(IMAGE_SIZE, scale=(0.9, 1.0), ratio=(0.9, 1.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],  # ImageNet normalization
                         std=[0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

In [None]:
# ==== 參數 ====
BATCH_SIZE = 32
IMAGE_SIZE = 224
EPOCHS = 20
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

# ==== 資料前處理 ====
transform = train_transform

# ==== 載入 train/good 圖片 ====
train_dir = 'bottle/train/good'
train_dataset = datasets.ImageFolder(root=os.path.dirname(train_dir), transform=transform)
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

# ==== 修改 ResNet18 模型 ====
class ResNet18FeatureExtractor(nn.Module):
    def __init__(self, output_dim=128):
        super().__init__()
        resnet = models.resnet18(pretrained=True)
        modules = list(resnet.children())[:-1]  # 去掉最後分類層 fc
        self.backbone = nn.Sequential(*modules)
        self.fc = nn.Linear(512, output_dim)  # 加上轉換層

    def forward(self, x):
        x = self.backbone(x)
        x = x.view(x.size(0), -1)  # flatten (B, 512)
        x = self.fc(x)
        return x

model = ResNet18FeatureExtractor().to(DEVICE)

# ==== SVDD 損失與優化器 ====
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

# ==== 初始化中心點 c ====
def get_init_center(model, loader):
    model.eval()
    outputs = []
    with torch.no_grad():
        for x, _ in loader:
            x = x.to(DEVICE)
            feat = model(x)
            outputs.append(feat)
    all_feats = torch.cat(outputs)
    return torch.mean(all_feats, dim=0)

center_c = get_init_center(model, train_loader).detach()

# ==== 訓練 Deep SVDD ====
for epoch in range(EPOCHS):
    model.train()
    losses = []
    for x, _ in train_loader:
        x = x.to(DEVICE)
        feat = model(x)
        loss = torch.mean(torch.sum((feat - center_c) ** 2, dim=1))  # L2 距離平方
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {np.mean(losses):.4f}")

# ==== 計算訓練資料距離以設定閾值 ====
train_distances = []
with torch.no_grad():
    for x, _ in train_loader:
        x = x.to(DEVICE)
        feat = model(x)
        dist = torch.sum((feat - center_c) ** 2, dim=1)
        train_distances.extend(dist.cpu().numpy())

threshold = np.percentile(train_distances, 95)
print(f"\n✅ 使用的距離閾值：{threshold}")

# ==== 測試 ====
test_dir = 'bottle/test'
test_classes = ['good', 'broken_large', 'broken_small', 'contamination']
test_images = []
test_labels = []
transform = test_transform
for cls in test_classes:
    folder = os.path.join(test_dir, cls)
    for img_name in os.listdir(folder):
        if img_name.endswith(('.jpg', '.png', '.jpeg')):
            img_path = os.path.join(folder, img_name)
            test_images.append(img_path)
            test_labels.append(0 if cls == 'good' else 1)

def preprocess_image(path):
    image = Image.open(path).convert('RGB')
    image = transform(image)
    return image

model.eval()
scores = []
with torch.no_grad():
    for path in tqdm(test_images):
        img = preprocess_image(path).unsqueeze(0).to(DEVICE)
        feat = model(img)
        score = torch.sum((feat - center_c) ** 2, dim=1)
        scores.append(score.item())

# ==== 評估 ====
preds = [1 if score > threshold else 0 for score in scores]

print("\n📊 Classification Report:")
print(classification_report(test_labels, preds, target_names=['good', 'anomaly']))

auc = roc_auc_score(test_labels, scores)
print(f"🔍 ROC AUC Score: {auc:.4f}")




Epoch 1/20 - Loss: 30.4516
Epoch 2/20 - Loss: 7.4997
Epoch 3/20 - Loss: 2.2265
Epoch 4/20 - Loss: 0.9296
Epoch 5/20 - Loss: 0.5097
Epoch 6/20 - Loss: 0.2991
Epoch 7/20 - Loss: 0.1697
Epoch 8/20 - Loss: 0.1187
Epoch 9/20 - Loss: 0.0904
Epoch 10/20 - Loss: 0.0767
Epoch 11/20 - Loss: 0.0620
Epoch 12/20 - Loss: 0.0580
Epoch 13/20 - Loss: 0.0520
Epoch 14/20 - Loss: 0.0475
Epoch 15/20 - Loss: 0.0447
Epoch 16/20 - Loss: 0.0422
Epoch 17/20 - Loss: 0.0393
Epoch 18/20 - Loss: 0.0359
Epoch 19/20 - Loss: 0.0349
Epoch 20/20 - Loss: 0.0335

✅ 使用的距離閾值：0.048705387860536575


100%|██████████| 83/83 [00:03<00:00, 22.33it/s]


📊 Classification Report:
              precision    recall  f1-score   support

        good       0.83      1.00      0.91        20
     anomaly       1.00      0.94      0.97        63

    accuracy                           0.95        83
   macro avg       0.92      0.97      0.94        83
weighted avg       0.96      0.95      0.95        83

🔍 ROC AUC Score: 0.9889





Method 4: 把 歐式距離平方 改成用 Mahalanobis 距離

In [None]:
import numpy as np
from numpy.linalg import inv
from tqdm import tqdm
from sklearn.metrics import classification_report, roc_auc_score
from PIL import Image

# 🔹 1. 建立測試資料列表與標籤
test_dir = 'bottle/test'
test_classes = ['good', 'broken_large', 'broken_small', 'contamination']
test_images = []
test_labels = []

for cls in test_classes:
    folder = os.path.join(test_dir, cls)
    for img_name in os.listdir(folder):
        if img_name.endswith(('.jpg', '.png', '.jpeg')):
            img_path = os.path.join(folder, img_name)
            test_images.append(img_path)
            test_labels.append(0 if cls == 'good' else 1)

# 🔹 2. 使用與訓練相同的 transform
test_transform = transforms.Compose([
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

def preprocess_image(path):
    image = Image.open(path).convert('RGB')
    image = test_transform(image)
    return image

# 🔹 3. 取得訓練特徵向量 → 計算 mean & covariance
model.eval()
train_features = []

with torch.no_grad():
    for x, _ in train_loader:
        x = x.to(DEVICE)
        feat = model(x)
        train_features.append(feat.cpu())

train_features = torch.cat(train_features, dim=0).numpy()
mean_vec = np.mean(train_features, axis=0)
cov_mat = np.cov(train_features, rowvar=False)

# 正則化避免奇異矩陣
eps = 1e-6
cov_mat += eps * np.eye(cov_mat.shape[0])
inv_cov = inv(cov_mat)

# 🔹 4. 定義 Mahalanobis 距離計算
def mahalanobis_distance(x, mean_vec, inv_cov):
    diff = x - mean_vec
    return np.sqrt(np.dot(np.dot(diff, inv_cov), diff.T))

# 🔹 5. 測試所有 test 圖像，計算 Mahalanobis 距離
mahalanobis_scores = []

with torch.no_grad():
    for path in tqdm(test_images):
        img = preprocess_image(path).unsqueeze(0).to(DEVICE)
        feat = model(img).cpu().numpy().squeeze()
        score = mahalanobis_distance(feat, mean_vec, inv_cov)
        mahalanobis_scores.append(score)

# 🔹 6. 設定閾值（95% 訓練資料 Mahalanobis 距離）
train_distances = [mahalanobis_distance(f, mean_vec, inv_cov) for f in train_features]
threshold = np.percentile(train_distances, 95)
print(f"\n✅ Mahalanobis 閾值：{threshold:.4f}")

# 🔹 7. 預測 & 評估
preds = [1 if s > threshold else 0 for s in mahalanobis_scores]

print("\n📊 Classification Report:")
print(classification_report(test_labels, preds, target_names=['good', 'anomaly']))

auc = roc_auc_score(test_labels, mahalanobis_scores)
print(f"🔍 ROC AUC Score: {auc:.4f}")


100%|██████████| 83/83 [00:03<00:00, 24.37it/s]


✅ Mahalanobis 閾值：12.1587

📊 Classification Report:
              precision    recall  f1-score   support

        good       0.00      0.00      0.00        20
     anomaly       0.76      1.00      0.86        63

    accuracy                           0.76        83
   macro avg       0.38      0.50      0.43        83
weighted avg       0.58      0.76      0.66        83

🔍 ROC AUC Score: 0.9881



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
