Kaggle link: https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Import everything needed

In [None]:
import zipfile
import glob
from PIL import Image
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
np.random.seed(0)
torch.manual_seed(0)
#CUDA_LAUNCH_BLOCKING=1
torch.cuda.manual_seed(0)


In [None]:
import wandb
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("WANDB_KEY")
wandb.login(key=secret_value_0)
wandb.init(project='Cats_vs_dogs', save_code=True)

## Unzip datasets

In [None]:
UNZIP = True
if UNZIP:
    train_dir = 'train'
    test_dir = 'test'
    with zipfile.ZipFile('/kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip') as train_zip:
        train_zip.extractall('')

    with zipfile.ZipFile('/kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip') as test_zip:
        test_zip.extractall('')
    train_list = glob.glob(os.path.join(train_dir,'*.jpg'))
    test_list = glob.glob(os.path.join(test_dir, '*.jpg'))
    print(f"Train Data: {len(train_list)}")
    print(f"Test Data: {len(test_list)}")

In [None]:
labels = [path.split('/')[-1].split('.')[0] for path in train_list]

## Plot random image with their label

In [None]:
random_idx = np.random.randint(1, len(train_list), size=9)
fig, axes = plt.subplots(3, 3, figsize=(16, 12))

for idx, ax in enumerate(axes.ravel()):
    img = Image.open(train_list[idx])
    ax.set_title(labels[idx])
    ax.imshow(img)

## Use Sklearn to split data

In [None]:
train_list, valid_list = train_test_split(train_list, 
                                          test_size=0.2,
                                          stratify=labels,
                                          random_state=0)
print(f"Train Data: {len(train_list)}")
print(f"Validation Data: {len(valid_list)}")
print(f"Test Data: {len(test_list)}")

We will discuss this in more detail in a near future...

In [None]:
train_transforms = transforms.Compose([
        transforms.Resize(128), # makes it easier for the GPU
        transforms.RandomResizedCrop(112),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor()])

val_transforms = transforms.Compose([
        transforms.Resize(128),
        transforms.CenterCrop(112),
        transforms.ToTensor()])

test_transforms = transforms.Compose([
        transforms.Resize(128),
        transforms.CenterCrop(112),
        transforms.ToTensor()])

Define the dataset using PIL to read image

In [None]:
class CatsDogsDataset(Dataset):
    def __init__(self, file_list, transform=None):
        self.file_list = file_list
        self.transform = transform
        self.filelength = len(file_list)

    def __len__(self):
        return self.filelength

    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        img = Image.open(img_path)
        img_transformed = self.transform(img)
        label = img_path.split("/")[-1].split(".")[0]
        label = 1 if label == "dog" else 0
        return img_transformed, label

In [None]:
train_data = CatsDogsDataset(train_list, transform=train_transforms)
valid_data = CatsDogsDataset(valid_list, transform=test_transforms)
test_data = CatsDogsDataset(test_list, transform=test_transforms)

Create dataloader, you can modify the batch size if needed

In [None]:
batch_size = 32
train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(dataset=valid_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False)

# AlexNet (modfied)

In [None]:
if False: 
    def init_my_layer(m):
        torch.nn.init.xavier_normal_(m.weight, nn.init.calculate_gain('tanh'))
        torch.nn.init.constant_(m.bias, 0)
        return m

    class AlexNet(nn.Module):    
        def __init__(self):
            super(Net, self).__init__()
            self.cnn_features = nn.Sequential(
                nn.Conv2d(3, 96, kernel_size=11, stride=4),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2),
                nn.Conv2d(96, 256, kernel_size=5, padding=2),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2),
                nn.Conv2d(256, 384, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(384, 256, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv2d(256, 256, kernel_size=3, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(kernel_size=3, stride=2),
            )
            self.linear_layers = nn.Sequential(
                nn.Dropout(p=0.5),
                init_my_layer(nn.Linear(1024, 4096)),
                nn.ReLU(inplace=True),
                nn.Dropout(p=0.5),
                init_my_layer(nn.Linear(4096, 4096)),
                nn.ReLU(inplace=True),
                init_my_layer(nn.Linear(4096, 2)),
            )
            self.sigmoid = nn.Sigmoid()

        def forward(self, x):
            x = self.cnn_features(x)
            x = torch.flatten(x, 1)
            x = self.linear_layers(x)
            x = self.sigmoid(x)
            return x

# Small NN (3x convolution + 2 linear layers)

In [None]:

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()

        self.cnn_layer1 = nn.Sequential(
            nn.Conv2d(3,16,kernel_size=3, padding=0,stride=2), # 2D convolution
            nn.BatchNorm2d(16),# normalize the input by the mean and standard deviation of the batch
            nn.ReLU(), # max(0,x)
        )

        self.cnn_layer2 = nn.Sequential(
            nn.Conv2d(16,32, kernel_size=3, padding=0, stride=2), # 2D convolution
            nn.BatchNorm2d(32), # normalize the input by the mean and standard deviation of the batch
            nn.ReLU(), # max(0,x)
            nn.MaxPool2d(2) # aggregate pixel values together, no learnable weights
            )

        self.cnn_layer3 = nn.Sequential(
            nn.Conv2d(32,64, kernel_size=3, padding=0, stride=2), # 2D convolution
            nn.BatchNorm2d(64), # normalize the input by the mean and standard deviation of the batch
            nn.ReLU(), # max(0,x)
            nn.MaxPool2d(2) # aggregate pixel values together, no learnable weights
        )


        self.linear_layer1 = nn.Linear(3*3*64,10) # linear layer
        self.dropout = nn.Dropout(0.5) # inject random noise during training (deactivate neurons to prevent overfitting)
        self.linear_layer2 = nn.Linear(10,1) # linear layer
        self.relu = nn.ReLU() # max(0,x)
        self.sigmoid = nn.Sigmoid() # map between 0 and 1

    def forward(self,x):
        out = self.cnn_layer1(x)
        out = self.cnn_layer2(out)
        out = self.cnn_layer3(out)
        out = out.view(out.size(0),-1)
        out = self.relu(self.linear_layer1(out))
        out = self.linear_layer2(out)
        out = self.sigmoid(out)
        return out.flatten().float()

# ResNet

In [None]:
#ResNet
class Residual(nn.Module):
    def __init__(self, input_channels, num_channels, use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.Conv2d(input_channels, num_channels, kernel_size=3,
                               padding=1, stride=strides)
        self.conv2 = nn.Conv2d(num_channels, num_channels, kernel_size=3,
                               padding=1)
        if use_1x1conv:
            self.conv3 = nn.Conv2d(input_channels, num_channels,
                                   kernel_size=1, stride=strides)
        else:
            self.conv3 = None
        self.bn1 = nn.BatchNorm2d(num_channels)
        self.bn2 = nn.BatchNorm2d(num_channels)

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        Y += X
        return F.relu(Y)

def resnet_block(input_channels, num_channels, num_residuals):
    blk = []
    for i in range(num_residuals):
        if i == 0 and input_channels != num_channels: # 1x1 applied only once per block if needed
            blk.append(
                Residual(input_channels, num_channels, use_1x1conv=True,
                         strides=2))
        else:
            blk.append(Residual(num_channels, num_channels))
    return blk

In [None]:
b1 = nn.Sequential(nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=3),
                              nn.BatchNorm2d(64), nn.ReLU(),
                              nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
b2 = nn.Sequential(*resnet_block(64, 64, 2))
b3 = nn.Sequential(*resnet_block(64, 128, 2))
b4 = nn.Sequential(*resnet_block(128, 256, 2))
b5 = nn.Sequential(*resnet_block(256, 512, 2))

# Choose model

Check out shape of train_loader

In [None]:
X,y = next(iter(train_loader))
print(X.shape)
print(y.shape)

In [None]:
!pip install torchinfo

In [None]:
from torchinfo import summary

# model = AlexNet()
# model = Net()
model = model = nn.Sequential(b1, b2, b3, b4, b5, nn.AdaptiveAvgPool2d((1, 1)),
                    nn.Flatten(), nn.Linear(512, 1), nn.Sigmoid())


summary(model, input_size=(32, 3, 112, 112))

In [None]:
# check if a GPU with Cuda capacities is available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #0, first GPU if multiple one
model.to(device)

# Train

In [None]:
learning_rate = 1e-3

criterion = torch.nn.BCELoss()#nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

wandb.watch(model, log="all", criterion=criterion, log_freq=1,  log_graph=(True)) 

In [None]:
def get_accuracy(y_true, y_prob):
    assert y_true.ndim == 1 and y_true.size() == y_prob.size()
    y_prob = y_prob > 0.5
    return (y_true == y_prob).sum().item() / y_true.size(0)

In [None]:
TRAIN = True

if TRAIN:
    nb_epochs = 20
    # Training
    for t in range(nb_epochs):
        
        model.train() # put the network in training mode
        epoch_loss = 0
        epoch_accuracy = 0
        
        for X, y in train_loader:
            # put on GPU if available
            X, y = X.to(device), y.to(device)
            # Feed forward to get the logits
            y_pred = model(X)

            # Compute the loss 
            loss = criterion(y_pred, y.float().view(y_pred.shape))

            # zero the gradients before running
            # the backward pass.
            optimizer.zero_grad()

            # Backward pass to compute the gradient
            # of loss w.r.t our learnable params. 
            loss.backward()

            # Update params
            optimizer.step()
            
            
            # get accuracy and loss per epoch
            acc = get_accuracy(y.float(), y_pred.flatten())
            epoch_accuracy += acc/len(train_loader)
            epoch_loss += loss/len(train_loader)

        wandb.log({'mlp/train_loss': epoch_loss.item(),'mlp/train_acc': epoch_accuracy})
        print('Epoch : {}, train accuracy : {}, train loss : {}'.format(t+1, epoch_accuracy,epoch_loss))

        with torch.no_grad(): # validation run
            epoch_val_accuracy = 0
            epoch_val_loss = 0
            model.eval() # validation mode

            for X, y in valid_loader:
                # put on GPU if available
                X, y = X.to(device), y.to(device)

                y_pred = model(X)
                #print(len(X))
                #print(y_pred)
                #print(y)
                #print(y.float().view(32,1))
                val_loss = criterion(y_pred, y.float().view(y_pred.shape))

                # get validation accuracy and loss per epoch
                acc = get_accuracy(y.float(), y_pred.flatten())
                epoch_val_accuracy += acc/ len(valid_loader)
                epoch_val_loss += val_loss/ len(valid_loader)

            wandb.log({'mlp/val_loss': epoch_val_loss.item(),'mlp/val_acc': epoch_val_accuracy})
            print('Epoch : {}, val_accuracy : {}, val_loss : {}'.format(t+1, epoch_val_accuracy,epoch_val_loss))
else:
    model.load_state_dict(torch.load('/kaggle/input/model-resnet/model_resnet.pt'))

In [None]:
if True:
    torch.save(model.state_dict(), 'model.pt')

# Evaluation

In [None]:
prob = []
model.eval()
with torch.no_grad():
    for X, y in test_loader:
        X = X.to(device)
        y_pred = model(X).flatten().tolist()
        #y_pred = F.softmax(model(X), dim=1)[:, 1].tolist()
        prob = prob + y_pred 

idx = [i for i in range(1,len(prob)+1)]   
submission = pd.DataFrame({'id':idx,'label':prob})
submission.to_csv('submission.csv',index=False)

In [None]:
submission

Show examplary classifications

In [None]:
import random
import matplotlib.pyplot as plt

id_list = []

fig, axes = plt.subplots(3, 6, figsize=(18, 10), facecolor='w')

for ax in axes.ravel():
    
    i = random.choice(submission['id'].values)
    
    label = submission.loc[submission['id'] == i, 'label'].values[0]
    if label > 0.5:
        label = 'dog'
    else:
        label = 'cat'
        
    img_path = os.path.join(test_dir, '{}.jpg'.format(i))
    img = Image.open(img_path)
    
    ax.set_title(label)
    ax.imshow(img)