In [1]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import Dataset, DataLoader

import torch.nn as nn
import torch.optim as optim
import os
from PIL import Image
from sklearn.metrics import accuracy_score
# Define transformations for the training and test sets
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    #使用ImageNet的均值和标准差进行归一化
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

batch_size = 64

In [2]:
class CustomCatDogDataset(Dataset):
    def __init__(self, root, transform=None, limit=None):
        self.transform = transform
        self.samples = []
        # Assuming each subfolder in root is a class
        classes = sorted(os.listdir(root))
        self.class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)} #{'cats': 0, 'dogs': 1}
        for cls in classes:
            class_dir = os.path.join(root, cls)
            if os.path.isdir(class_dir):
                # Only use the first 'limit' images in each class folder
                if limit:
                    image_files = sorted(os.listdir(class_dir))[:limit+1] #+1是因为每个文件夹下有奇怪的檔案
                else:
                    image_files = sorted(os.listdir(class_dir))
                for img_file in image_files:
                    if  img_file.endswith('.jpg') or img_file.endswith('.jpeg') or img_file.endswith('.png'):
                        full_path = os.path.join(class_dir, img_file)
                        self.samples.append((full_path, self.class_to_idx[cls]))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, index):
        path, label = self.samples[index]
        image = Image.open(path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# Create custom datasets for training and test sets using the existing transform from CELL 0
custom_train_set = CustomCatDogDataset(root='data/CatsAndDogs/training_set', transform=transform, limit=100)
custom_test_set = CustomCatDogDataset(root='data/CatsAndDogs/test_set', transform=transform)

# Create dataloaders for the custom datasets
custom_train_loader = DataLoader(custom_train_set, batch_size=batch_size, shuffle=True)
custom_test_loader = DataLoader(custom_test_set, batch_size=batch_size, shuffle=False)

In [3]:
len(custom_train_set)

200

In [4]:
import torchvision.models as models

# 讀取VGG16的pretrain model
model = models.vgg16(weights='VGG16_Weights.DEFAULT')
# 將model的parameters設定成不需要梯度
for param in model.parameters():
    param.requires_grad = False

In [5]:
from torchsummary import summary

# Print the summary of the vgg16 model
summary(model, (3, 224, 224), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

In [6]:
model.avgpool = nn.AdaptiveAvgPool2d(output_size=(1,1))
model.classifier = nn.Sequential(nn.Flatten(),
                                 nn.Linear(512, 128),
                                 nn.ReLU(),
                                 nn.Dropout(0.2),
                                 nn.Linear(128, 1),
                                 nn.Sigmoid())
model

VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [7]:
summary(model, input_size=(3,224,224), device='cpu')

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
         MaxPool2d-5         [-1, 64, 112, 112]               0
            Conv2d-6        [-1, 128, 112, 112]          73,856
              ReLU-7        [-1, 128, 112, 112]               0
            Conv2d-8        [-1, 128, 112, 112]         147,584
              ReLU-9        [-1, 128, 112, 112]               0
        MaxPool2d-10          [-1, 128, 56, 56]               0
           Conv2d-11          [-1, 256, 56, 56]         295,168
             ReLU-12          [-1, 256, 56, 56]               0
           Conv2d-13          [-1, 256, 56, 56]         590,080
             ReLU-14          [-1, 256,

In [8]:
device='cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
loss_fn = nn.BCELoss()
optimizer = torch.optim.Adam(model.parameters(),lr= 1e-3)

# 先試跑看看準確率

In [9]:
#使用scikit-learn的accuracy_score函數計算準確率

model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in custom_test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = torch.round(outputs.data)
        total += labels.size(0)
        correct += accuracy_score(labels.cpu(), predicted.cpu()) * labels.size(0)
print(f'Accuracy: {100 * correct / total}%')

Accuracy: 58.42807711319822%


In [10]:
# Training the model
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for inputs, labels in custom_train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        #unsqueeze(1)是因為labels的shape是[32]，但是loss function需要的shape是[32,1]
        labels = labels.unsqueeze(1).float()
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        #計算accuray，因為是binary classification，所以只要看predicted和labels是否相等即可
        predicted = torch.round(outputs.data) #四捨五入
        total += labels.size(0)
        correct += accuracy_score(labels.cpu(), predicted.cpu()) * labels.size(0)
    print(f'train-Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(custom_train_loader)}')
    print(f'train-Accuracy: {100 * correct / total}%')

# Evaluating the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in custom_test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        predicted = torch.round(outputs.data)
        total += labels.size(0)
        correct += accuracy_score(labels.cpu(), predicted.cpu()) * labels.size(0)

print(f'Eval-Accuracy: {100 * correct / total}%')

train-Epoch 1/5, Loss: 0.5872324854135513
train-Accuracy: 77.5%
train-Epoch 2/5, Loss: 0.3601258024573326
train-Accuracy: 98.5%
train-Epoch 3/5, Loss: 0.21657095476984978
train-Accuracy: 98.0%
train-Epoch 4/5, Loss: 0.12634022161364555
train-Accuracy: 99.0%
train-Epoch 5/5, Loss: 0.08992180693894625
train-Accuracy: 98.5%
Eval-Accuracy: 98.26989619377163%
