<a href="https://colab.research.google.com/github/kilosonc/kaggle/blob/master/dog_and_cat/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [11]:
!mkdir -p ~/.kaggle
!touch ~/.kaggle/kaggle.json


In [12]:
from google.colab import userdata
with open("/root/.kaggle/kaggle.json", "w") as f:
  f.write(userdata.get('kaggle'))


In [13]:
!kaggle competitions download -c dog-vs-cat-classification
!yes | unzip dog-vs-cat-classification.zip -d ./dataset > /dev/null
!rm -f dog-vs-cat-classification.zip

Downloading dog-vs-cat-classification.zip to /content
 98% 703M/718M [00:07<00:00, 276MB/s]
100% 718M/718M [00:08<00:00, 93.6MB/s]
replace ./dataset/sample_submission.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: replace ./dataset/test/test/000000.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: replace ./dataset/test/test/000001.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: replace ./dataset/test/test/000002.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: replace ./dataset/test/test/000003.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: replace ./dataset/test/test/000004.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: replace ./dataset/test/test/000005.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: replace ./dataset/test/test/000006.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: replace ./dataset/test/test/000007.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: replace ./dataset/test/test/000008.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: replace ./dataset/test/test/000009.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: replace ./

In [14]:
import pandas as pd

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(
                3, 512, 5
            ),
            nn.ReLU(),
            nn.MaxPool2d((10,10))
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(512, 1024, 5),
            nn.ReLU(),
            nn.MaxPool2d((10,10))
        )
        self.output = nn.Sequential(
            # nn.Linear(3810304, 3721),
            # nn.ReLU(),
            nn.Linear(4096, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 200),
            nn.ReLU(),
            nn.Linear(200, 50),
            nn.ReLU(),
            nn.Linear(50, 10),
            nn.ReLU(),
            nn.Linear(10, 2),
        )

    def forward(self, input):
        x = self.conv1(input)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)
        return self.output(x)

In [16]:
# 检查 MPS 可用性
if torch.backends.mps.is_available():
    device = torch.device("mps")  # Apple GPU
elif torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")  # 回退到 CPU
print(f"Using device: {device}")

Using device: cuda


In [17]:
cnn = Net()
cnn.to(device=device)
print(cnn)

Net(
  (conv1): Sequential(
    (0): Conv2d(3, 512, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(10, 10), stride=(10, 10), padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(512, 1024, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(10, 10), stride=(10, 10), padding=0, dilation=1, ceil_mode=False)
  )
  (output): Sequential(
    (0): Linear(in_features=4096, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=200, bias=True)
    (5): ReLU()
    (6): Linear(in_features=200, out_features=50, bias=True)
    (7): ReLU()
    (8): Linear(in_features=50, out_features=10, bias=True)
    (9): ReLU()
    (10): Linear(in_features=10, out_features=2, bias=True)
  )
)


In [None]:
from torchvision import transforms
from torch.utils.data import Dataset
from PIL import Image
import os

class DogsAndCatsDataset(Dataset):
    def __init__(self):
        data = []
        for (path, _, files) in os.walk("./dataset/train"):
            for f in files:
                index = int(f.split(".")[1])
                file_path = f"{path}/{f}"
                if path.endswith("dogs"):
                    data.append([index, file_path, "dog"])
                elif path.endswith("cats"):
                    data.append([index, file_path, "cat"])
        self.train_set = pd.DataFrame(data, columns=["id", "path", "type"])
        self.transform = transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.ToTensor(),
            transforms.Normalize(0.5, 0.5)
        ])

    def __len__(self):
        return len(self.train_set)

    def __getitem__(self, index):
        item = self.train_set.loc[index, ["path","type"]]
        path = item.loc["path"]
        label = 1 if item.loc["type"] == "dog" else 0
        image = Image.open(path).convert("RGB")
        return self.transform(image), label

data = DogsAndCatsDataset()
img, _ = data[1]
print(img.size())

torch.Size([3, 256, 256])


In [None]:
from torch.utils.data import DataLoader, random_split

train_set_size = int(len(data) * 0.8)
validate_set_size = len(data) - train_set_size

[train_set, validate_set] = random_split(data, [train_set_size, validate_set_size])

train_set_loader = DataLoader(dataset=train_set,batch_size=10,shuffle=True)
validate_set_loader = DataLoader(dataset=validate_set,batch_size=10,shuffle=True)

In [None]:
from tqdm import tqdm
optimizer = torch.optim.Adam(cnn.parameters(), lr=0.001)
loss_func = nn.CrossEntropyLoss()

for epoch in range(5):
  loss = ""
  for imgs , labels in tqdm(train_set_loader):
      imgs, labels = imgs.to(device), labels.to(device)
      optimizer.zero_grad()
      output = cnn(imgs.view(-1, 3, 256, 256).to(device))
      loss = loss_func(output, labels)
      loss.backward()
      optimizer.step()
  print(loss)

100%|██████████| 2500/2500 [06:50<00:00,  6.08it/s]
100%|██████████| 2500/2500 [06:50<00:00,  6.09it/s]
100%|██████████| 2500/2500 [06:50<00:00,  6.09it/s]
100%|██████████| 2500/2500 [06:50<00:00,  6.10it/s]
100%|██████████| 2500/2500 [06:48<00:00,  6.11it/s]


In [21]:
print(loss)

tensor(0.6966, device='cuda:0', grad_fn=<NllLossBackward0>)


In [None]:
cnn.eval()
with torch.no_grad():
    for imgs, labels in tqdm(validate_set_loader):
        imgs, labels = imgs.to(device), labels.to(device)
        output = cnn(imgs.view(-1, 3, 256, 256).to(device))
        