<a href="https://colab.research.google.com/github/kilosonc/kaggle/blob/master/dog_and_cat/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [21]:
!mkdir -p ~/.kaggle
!touch ~/.kaggle/kaggle.json


In [22]:
# from google.colab import userdata
# with open("/root/.kaggle/kaggle.json", "w") as f:
#   f.write(userdata.get('kaggle'))


In [23]:
# !kaggle competitions download -c dog-vs-cat-classification
# !yes | unzip dog-vs-cat-classification.zip -d ./dataset > /dev/null
# !rm -f dog-vs-cat-classification.zip

In [24]:
import pandas as pd

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(
                3, 64, 3, padding = 1,
            ),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(
                64, 64, 3, padding = 1,
            ),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,2), stride=2),

            nn.Conv2d(
                64, 128, 3, padding = 1,
            ),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(
                128, 128, 3, padding = 1,
            ),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,2), stride=2),

            nn.Conv2d(
                128, 256, 3, padding = 1,
            ),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(
                256, 256, 3, padding = 1,
            ),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(
                256, 256, 3, padding = 1,
            ),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,2), stride=2),

            nn.Conv2d(
                256, 512, 3, padding = 1,
            ),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(
                512, 512, 3, padding = 1,
            ),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(
                512, 512, 3, padding = 1,
            ),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,2), stride=2),

            nn.Conv2d(
                512, 512, 3, padding = 1,
            ),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(
                512, 512, 3, padding = 1,
            ),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.Conv2d(
                512, 512, 3, padding = 1,
            ),
            nn.BatchNorm2d(512),
            nn.ReLU(inplace=True),
            nn.MaxPool2d((2,2), stride=2),
        )
        self.output = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 2),
            nn.Softmax(dim=1),
        )

    def forward(self, input):
        x = self.conv1(input)
        x = x.view(x.size(0), -1)
        return self.output(x)

In [26]:
# 检查 MPS 可用性
if torch.backends.mps.is_available():
    device = torch.device("mps")  # Apple GPU
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")  # 回退到 CPU
print(f"Using device: {device}")

Using device: mps


In [27]:
cnn = Net()
cnn.to(device=device)
print(cnn)

Net(
  (conv1): Sequential(
    (0): Conv2d(3, 52, kernel_size=(40, 40), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(5, 5), stride=(5, 5), padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(52, 100, kernel_size=(20, 20), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(5, 5), stride=(5, 5), padding=0, dilation=1, ceil_mode=False)
  )
  (output): Sequential(
    (0): Linear(in_features=1600, out_features=1000, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1000, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=200, bias=True)
    (5): ReLU()
    (6): Linear(in_features=200, out_features=50, bias=True)
    (7): ReLU()
    (8): Linear(in_features=50, out_features=10, bias=True)
    (9): ReLU()
    (10): Linear(in_features=10, out_features=2, bias=True)
  )
)


In [28]:
from torchvision import transforms
from torch.utils.data import Dataset
from PIL import Image
import os
os.listdir

class DogsAndCatsDataset(Dataset):
    def __init__(self, root):
        data = []
        for (path, _, files) in os.walk(root):
            for f in files:
                file_path = f"{path}/{f}"
                if path.endswith("dogs"):
                    data.append([file_path, "dog"])
                elif path.endswith("cats"):
                    data.append([file_path, "cat"])
                elif path.endswith("test"):
                    data.append([file_path, ""])
        self.train_set = pd.DataFrame(data, columns=["path", "type"])
        self.transform = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.ToTensor(),
            transforms.Normalize(0.5, 0.5)
        ])

    def __len__(self):
        # return len(self.train_set)
        return 10

    def __getitem__(self, index):
        item = self.train_set.loc[index, ["path","type"]]
        path = item.loc["path"]
        label = [0.0 if item.loc["type"] == "dog" else 1.0, 1.0 if item.loc["type"] == "dog" else 0.0]
        image = Image.open(path).convert("RGB")
        return self.transform(image), torch.tensor(label)

data = DogsAndCatsDataset("./dataset/train")
img, _ = data[1]
print(img.size())

torch.Size([3, 256, 256])


In [29]:
from torch.utils.data import DataLoader, random_split

train_set_size = int(len(data) * 0.8)
validate_set_size = len(data) - train_set_size

[train_set, validate_set] = random_split(data, [train_set_size, validate_set_size])
test_set = DogsAndCatsDataset("./dataset/test")

train_set_loader = DataLoader(dataset=train_set,batch_size=10,shuffle=True)
validate_set_loader = DataLoader(dataset=validate_set,batch_size=10,shuffle=True)
test_set_loader = DataLoader(dataset=test_set,batch_size=10,shuffle=True)

In [30]:
from tqdm import tqdm
optimizer = torch.optim.Adam(cnn.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

for epoch in range(5):
  loss = ""
  for imgs , labels in tqdm(train_set_loader):
      imgs, labels = imgs.to(device), labels.to(device)
      optimizer.zero_grad()
      output = cnn(imgs.view(-1, 3, 256, 256).to(device))
      loss = loss_func(output, labels)
      loss.backward()
      optimizer.step()
  print(loss)

100%|██████████| 1/1 [00:04<00:00,  4.62s/it]


tensor(0.5735, device='mps:0', grad_fn=<DivBackward1>)


100%|██████████| 1/1 [00:00<00:00,  7.05it/s]


tensor(0.6216, device='mps:0', grad_fn=<DivBackward1>)


100%|██████████| 1/1 [00:00<00:00,  6.72it/s]


tensor(0.2865, device='mps:0', grad_fn=<DivBackward1>)


100%|██████████| 1/1 [00:00<00:00, 23.70it/s]


tensor(0.0022, device='mps:0', grad_fn=<DivBackward1>)


100%|██████████| 1/1 [00:00<00:00, 19.50it/s]


tensor(4.4703e-08, device='mps:0', grad_fn=<DivBackward1>)


In [31]:
print(loss)

tensor(4.4703e-08, device='mps:0', grad_fn=<DivBackward1>)


In [32]:
cnn.eval()
correct = 0
total = 0
with torch.no_grad():
    for imgs, labels in tqdm(validate_set_loader):
        imgs, labels = imgs.to(device), labels.to(device)
        output = cnn(imgs.view(-1, 3, 256, 256).to(device))
        result = torch.argmax(output, dim=1)

        correct += (result == labels).sum().item()  # 统计预测正确的样本数
        total += labels.size(0)  # 统计总样本数

accuracy = correct / total
print(f"Accuracy: {accuracy:.4f}")

100%|██████████| 1/1 [00:01<00:00,  1.03s/it]

Accuracy: 1.0000





In [None]:
cnn.eval()
with torch.no_grad():
    for imgs, _ in tqdm(test_set_loader):
        imgs = imgs.to(device)
        output = cnn(imgs.view(-1, 3, 256, 256).to(device))
        result = torch.argmax(output, dim=1)
        for i, r in enumerate(result):
            r.int()

100%|██████████| 1/1 [00:00<00:00,  2.00it/s]
