In [1]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

import torch.nn as nn
import torch.optim as optim

# Define transformations for the training and test sets
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    #使用ImageNet的均值和标准差进行归一化
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Load the datasets
# ImageFolder类会自动加载指定目录下的子目录，并将每个子目录中的图像文件视为一个类别
# 例如，如果目录结构如下：
# training_set
# ├── cats
# │   ├── cat1.jpg
# │   ├── cat2.jpg
# │   └── ...
# └── dogs
#     ├── dog1.jpg
#     ├── dog2.jpg
#     └── ...
# 那么ImageFolder类会将cats视为类别0，dogs视为类别1
# 然后在加载图像文件时，会将文件名中的类别作为标签加载到数据集中
# 例如，cat1.jpg的标签为0，dog1.jpg的标签为1
# 这样就可以直接使用torchvision提供的ImageFolder类来加载数据集
# 然后使用DataLoader类来创建数据加载器
train_set = datasets.ImageFolder(root='data/CatsAndDogs/training_set', transform=transform)
test_set = datasets.ImageFolder(root='data/CatsAndDogs/test_set', transform=transform)

batch_size = 32
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

device='cuda' if torch.cuda.is_available() else 'cpu'
print(device)
# Define the CNN model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(64 * 56 * 56, 512)
        self.fc2 = nn.Linear(512, 2)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(-1, 64 * 56 * 56)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize the model, loss function and optimizer
model = SimpleCNN()
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')

# Evaluating the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {100 * correct / total}%')

cuda
Epoch 1/10, Loss: 1.0920734109869041
Epoch 2/10, Loss: 0.5631721707216772
Epoch 3/10, Loss: 0.4079714103405219
Epoch 4/10, Loss: 0.1879150915252735
Epoch 5/10, Loss: 0.04831936940053604
Epoch 6/10, Loss: 0.03293175690930775
Epoch 7/10, Loss: 0.005447876316028999
Epoch 8/10, Loss: 0.001172953256216541
Epoch 9/10, Loss: 0.00021828066243880997
Epoch 10/10, Loss: 0.00010518740631835451
Accuracy: 70.24221453287197%


In [2]:
from torchsummary import summary
model.to(device)
print(next(model.parameters()).is_cuda)
summary(model, input_size=(3, 224, 224), batch_size=1)

True
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [1, 32, 224, 224]             896
              ReLU-2          [1, 32, 224, 224]               0
         MaxPool2d-3          [1, 32, 112, 112]               0
            Conv2d-4          [1, 64, 112, 112]          18,496
              ReLU-5          [1, 64, 112, 112]               0
         MaxPool2d-6            [1, 64, 56, 56]               0
            Linear-7                   [1, 512]     102,760,960
              ReLU-8                   [1, 512]               0
            Linear-9                     [1, 2]           1,026
Total params: 102,781,378
Trainable params: 102,781,378
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.57
Forward/backward pass size (MB): 41.35
Params size (MB): 392.08
Estimated Total Size (MB): 434.01
-------------------------

In [3]:
import os
from PIL import Image
from torch.utils.data import Dataset, DataLoader

class CustomCatDogDataset(Dataset):
    def __init__(self, root, transform=None, limit=None):
        self.transform = transform
        self.samples = []
        # Assuming each subfolder in root is a class
        classes = sorted(os.listdir(root))
        self.class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)}
        for cls in classes:
            class_dir = os.path.join(root, cls)
            if os.path.isdir(class_dir):
                # Only use the first 'limit' images in each class folder
                if limit:
                    image_files = sorted(os.listdir(class_dir))[:limit+1] #+1是因为每个文件夹下有奇怪的檔案
                else:
                    image_files = sorted(os.listdir(class_dir))
                for img_file in image_files:
                    if  img_file.endswith('.jpg') or img_file.endswith('.jpeg') or img_file.endswith('.png'):
                        full_path = os.path.join(class_dir, img_file)
                        self.samples.append((full_path, self.class_to_idx[cls]))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, index):
        path, label = self.samples[index]
        image = Image.open(path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, label

# Create custom datasets for training and test sets using the existing transform from CELL 0
custom_train_set = CustomCatDogDataset(root='data/CatsAndDogs/training_set', transform=transform, limit=300)
custom_test_set = CustomCatDogDataset(root='data/CatsAndDogs/test_set', transform=transform)

# Create dataloaders for the custom datasets
custom_train_loader = DataLoader(custom_train_set, batch_size=32, shuffle=True)
custom_test_loader = DataLoader(custom_test_set, batch_size=32, shuffle=False)

In [4]:
# Initialize the model, loss function and optimizer
model = SimpleCNN()
model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in custom_train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(custom_train_loader)}')

# Evaluating the model
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in custom_test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy: {100 * correct / total}%')

Epoch 1/10, Loss: 3.1490386097054732
Epoch 2/10, Loss: 0.6619077638575905
Epoch 3/10, Loss: 0.6255062598931161
Epoch 4/10, Loss: 0.45962070791344894
Epoch 5/10, Loss: 0.20601213527353188
Epoch 6/10, Loss: 0.06679893137985154
Epoch 7/10, Loss: 0.013411815351757565
Epoch 8/10, Loss: 0.0038303664891588454
Epoch 9/10, Loss: 0.003263291339144895
Epoch 10/10, Loss: 0.0009143601798170589
Accuracy: 64.75531389026199%
