In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dogs-vs-cats-redux-kernels-edition/sample_submission.csv
/kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip
/kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip


In [None]:
!unzip /kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip
!unzip /kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip

In [3]:
import os
import numpy as np
import pandas as pd
from PIL import Image
from time import time as timer
import matplotlib.pyplot as plt

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision.transforms as transforms
from torch.utils.data import random_split, Dataset, DataLoader

In [4]:
train_dir = "/kaggle/working/train"

train_imgs = sorted(os.listdir(train_dir))
print(train_imgs[:10])

img1 = Image.open(train_dir + "/" + train_imgs[0])
img1.size

['cat.0.jpg', 'cat.1.jpg', 'cat.10.jpg', 'cat.100.jpg', 'cat.1000.jpg', 'cat.10000.jpg', 'cat.10001.jpg', 'cat.10002.jpg', 'cat.10003.jpg', 'cat.10004.jpg']


(500, 374)

In [5]:
class CustomDataset(Dataset):
    def __init__(self, images_dir, transform=None):
        self.transforms = transform
        self.imgs_dir = images_dir
        self.arr = sorted(os.listdir(self.imgs_dir))

    def __len__(self):
        return len(os.listdir(self.imgs_dir))
    
    def __getitem__(self, idx):
        img_name = self.arr[idx]
        label = self.arr[idx][:3]
        if label == 'cat':
            label = 0
        else:
            label = 1

        img_path = os.path.join(self.imgs_dir, img_name)

        img = Image.open(img_path).convert("RGB")

        if self.transforms:
            img = self.transforms(img)
        
        return img, label

In [13]:
train_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(p=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,)),
])

test_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [14]:
train_dataset = CustomDataset(
    images_dir = "/kaggle/working/train", transform = train_transform
)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [None]:
from tqdm import tqdm

import torch.nn as nn
import torch.nn.functional as F

class CNNModel(nn.Module):
    def __init__(self, num_classes=10):
        super(CNNModel, self).__init__()

        self.conv_block = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),  # [B, 32, 128, 128]
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),  # [B, 32, 64, 64]

            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),  # [B, 64, 64, 64]
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),  # [B, 64, 32, 32]

            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),  # [B, 128, 32, 32]
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2),  # [B, 128, 16, 16]
        )

        self.fc_block = nn.Sequential(
            nn.Flatten(),  # [B, 128 * 16 * 16]
            nn.Linear(128 * 16 * 16, 256),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.conv_block(x)
        x = self.fc_block(x)
        return x


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = CNNModel(num_classes=10).to(device)

cr = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

corrects = 0
epochs = 10

for i in range(epochs):
    cost = 0
    for image, answer in tqdm(train_loader):
        image = image.to(device)
        answer = answer.to(device)

        optimizer.zero_grad()
        predictions = model(image)
        error = cr(predictions, answer)
        error.backward()
        optimizer.step()
        cost += error.item()

        if i == epochs - 1:
            corrects += (predictions.argmax(1) == answer).sum().item()

    print(f"Epoch {i+1}, Loss: {cost / len(train_loader):.4f}")

total_samples = len(train_loader.dataset)
print("Accuracy:", corrects, "/", total_samples, "=", corrects / total_samples)

100%|██████████| 782/782 [01:12<00:00, 10.85it/s]


Epoch 2, Loss: 0.6954


100%|██████████| 782/782 [01:12<00:00, 10.85it/s]


Epoch 3, Loss: 0.6945


 78%|███████▊  | 612/782 [00:55<00:15, 11.04it/s]

In [18]:
test_dir = "/kaggle/working/test"

test_imgs = sorted(test_imgs, key=lambda x: int(x.split('.')[0]))
print(test_imgs[:10])

img1 = Image.open(test_dir + "/" + test_imgs[5])
img1.size

['1.jpg', '2.jpg', '3.jpg', '4.jpg', '5.jpg', '6.jpg', '7.jpg', '8.jpg', '9.jpg', '10.jpg']


(499, 375)

In [23]:
class CustomTestDataset(Dataset):
    def __init__(self, images_dir, transform=None):
        self.transforms = transform
        self.imgs_dir = images_dir
        self.arr = (os.listdir(self.imgs_dir))

    def __len__(self):
        return len(os.listdir(self.imgs_dir))
    
    def __getitem__(self, idx):
        img_name = self.arr[idx]

        img_path = os.path.join(self.imgs_dir, img_name)

        img = Image.open(img_path).convert("RGB")

        if self.transforms:
            img = self.transforms(img)
        
        return img, idx

In [24]:
test_dataset = CustomTestDataset("/kaggle/working/test", test_transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [25]:
model.eval()

CNNModel(
  (conv_block): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_block): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=32768, out_features=256, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=256, out_features=10, bias=True)
  )
)

In [26]:
all_preds = []
img_idxs = []

with torch.no_grad():
    for images, idxs in tqdm(test_loader):
        images = images.to(device)

        outputs = model(images)

        _, preds = torch.max(outputs, 1)

        all_preds.extend(preds.cpu().numpy())
        img_idxs.extend(idxs)

 13%|█▎        | 52/391 [00:04<00:27, 12.55it/s]


KeyboardInterrupt: 

In [10]:
subm = pd.read_csv('/kaggle/input/dogs-vs-cats-redux-kernels-edition/sample_submission.csv')
subm

Unnamed: 0,id,label
0,1,0.5
1,2,0.5
2,3,0.5
3,4,0.5
4,5,0.5
...,...,...
12495,12496,0.5
12496,12497,0.5
12497,12498,0.5
12498,12499,0.5
