Kaggle link: https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!pip install torchinfo

## Import everything needed

In [None]:
import zipfile
import glob
from PIL import Image
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
np.random.seed(0)
torch.manual_seed(0)
torch.cuda.manual_seed(0)

import wandb

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("wandb")

wandb.init(project='Cat-vs-Dog-CNN', save_code=True)

# https://github.com/TylerYep/torchinfo
from torchinfo import summary # conda install -c conda-forge torchinfo

## Unzip datasets

In [None]:
train_dir = 'train'
test_dir = 'test'
with zipfile.ZipFile('/kaggle/input/dogs-vs-cats-redux-kernels-edition/train.zip') as train_zip:
    train_zip.extractall('')
    
with zipfile.ZipFile('/kaggle/input/dogs-vs-cats-redux-kernels-edition/test.zip') as test_zip:
    test_zip.extractall('')
train_list = glob.glob(os.path.join(train_dir,'*.jpg'))
test_list = glob.glob(os.path.join(test_dir, '*.jpg'))
print(f"Train Data: {len(train_list)}")
print(f"Test Data: {len(test_list)}")

In [None]:
labels = [path.split('/')[-1].split('.')[0] for path in train_list]

## Plot random image with their label

In [None]:
random_idx = np.random.randint(1, len(train_list), size=9)
fig, axes = plt.subplots(3, 3, figsize=(16, 12))

for idx, ax in enumerate(axes.ravel()):
    img = Image.open(train_list[idx])
    ax.set_title(labels[idx])
    ax.imshow(img)

## Use Sklearn to split data

In [None]:
# we reserve 20% of the training set for the validation and the remaining 80% for training
VALIDATION_RATIO = 0.2

train_list, valid_list = train_test_split(train_list, 
                                          test_size=VALIDATION_RATIO,
                                          stratify=labels,
                                          random_state=0)

training_size = len(train_list)
validation_size = len(valid_list)
testing_size = len(test_list)

print(f"Train Data: {training_size}")
print(f"Validation Data: {validation_size}")
print(f"Test Data: {testing_size}")

We will discuss this in more detail in a near future...

In [None]:
train_transforms = transforms.Compose([
#         transforms.Resize(128), # makes it easier for the GPU
        transforms.Resize((227, 227)),
#         transforms.RandomResizedCrop(112),
#         transforms.RandomResizedCrop(227, 227),
#         transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

val_transforms = transforms.Compose([
#         transforms.Resize(128),
        transforms.Resize((227, 227)),
#         transforms.CenterCrop(112),
#         transforms.CenterCrop(227, 227),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])


test_transforms = transforms.Compose([
#         transforms.Resize(128),
        transforms.Resize((227, 227)),
#         transforms.CenterCrop(112),
#         transforms.CenterCrop(227, 227),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

Define the dataset using PIL to read image

In [None]:
class CatsDogsDataset(Dataset):
    def __init__(self, file_list, transform=None):
        self.file_list = file_list
        self.transform = transform
        self.filelength = len(file_list)

    def __len__(self):
        return self.filelength

    def __getitem__(self, idx):
        img_path = self.file_list[idx]
        img = Image.open(img_path)
        img_transformed = self.transform(img)
        label = img_path.split("/")[-1].split(".")[0]
        label = 1 if label == "dog" else 0
        return img_transformed, label

In [None]:
train_data = CatsDogsDataset(train_list, transform=train_transforms)
valid_data = CatsDogsDataset(valid_list, transform=test_transforms)
test_data = CatsDogsDataset(test_list, transform=test_transforms)

Create dataloader, you can modify the batch size if needed

In [None]:
batch_size = 32
train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(dataset=valid_data, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False)

In [None]:
dataiter = iter(train_loader)
images, labels = dataiter.next()

print(images.shape) # (number_of_images, batch, image_width, image_height)
print(torch.unique(labels).size(dim=0))

In [None]:
plt.imshow(images[0].permute(1, 2, 0).cpu().squeeze())
plt.title("Ground Truth: {}".format(labels[0]))
plt.show()

# Convolutional Neural Network

In [None]:
# If you think 224x224 is the right input size for AlexNet please
# @see see https://cs231n.github.io/convolutional-networks/
# @see https://stackoverflow.com/questions/36733636/number-of-neurons-in-alexnet
# @see https://datascience.stackexchange.com/questions/29245/what-is-the-input-size-of-alex-net
# @see https://learnopencv.com/understanding-alexnet/
class AlexNet(nn.Module):
    def __init__(self, num_classes: int):
        super(AlexNet, self).__init__()
        
        #----------------------------
        # CONVOLUTIONAL LAYERS
        #----------------------------
        self.feature_extraction = nn.Sequential(
            # 1st Convolutional Layer
            nn.Conv2d(in_channels=3, out_channels=96, kernel_size=11, stride=4, padding=2, bias=False),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
            
            # 2nd Convolutional Layer
            nn.Conv2d(in_channels=96, out_channels=192, kernel_size=5, stride=1, padding=2, bias=False),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
            
            # 3rd Convolutional Layer
            nn.Conv2d(in_channels=192, out_channels=384, kernel_size=3, stride=1, padding=1, bias=False),
            nn.ReLU(inplace=True),

            # 4th Convolutional Layer
            nn.Conv2d(in_channels=384, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False),
            nn.ReLU(inplace=True),
            
            # 5th Convolutional Layer
            nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, bias=False),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
        )

        #----------------------------
        # FULLY CONNECTED LAYERS
        #----------------------------
        self.classifier = nn.Sequential(
            # 1st Fully Connected Layer
            nn.Linear(in_features=(256 * 6 * 6), out_features=4096),
            nn.ReLU(inplace=True),
            # Dropout to prevent overfitting
            nn.Dropout(p=0.5),

            # 2nd Fully Connected Layer
            nn.Linear(in_features=4096, out_features=4096),
            nn.ReLU(inplace=True),
            # Dropout to prevent overfitting
            nn.Dropout(p=0.5),

            # 3rd Fully Connected Layer
            nn.Linear(in_features=4096, out_features=num_classes),
        )

    # Here we actually _build_ the net.    
    def forward(self,x) -> torch.Tensor:
        # Convolutional layers
        x = self.feature_extraction(x)
        # Flatten
        x = x.view(-1, 256 * 6 * 6)
        # Fully connected layers
        x = self.classifier(x)
        return x

In [None]:
model = AlexNet(2)
# print(model)

summary(model, input_size=(batch_size, 3, 227, 227))

# Run NN on CPU or GPU?
Init the model and put it on GPU

In [None]:
# cuda:0, in case of multiple GPUs we will use the first one (0)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('Detected device: {}'.format(device))
# Put model to device
model.to(device)
# batch.to(device)
print('Training on {}!'.format(device))

In [None]:
# Defines the learning rates for the parameter updates
lr_rate = 1e-3 # e.q to 0.003, you can change it if needed
# To update the hyperparameters of the model.
optimizer = torch.optim.SGD(model.parameters(), lr=lr_rate) # momentum=0.5 decreased the accuracy of almost 2%
criterion = nn.CrossEntropyLoss()

EPOCHS = 50

for epoch in range(EPOCHS):
    # TRAINING LOOP
    training_loss = 0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        # -------------
        # Forward Pass
        # -------------
        # Clear the gradients as we don't want any gradient from previous epoch
        # to carry forward: don't want to cummulate gradients.
        optimizer.zero_grad()
        # Forward Pass
        output = model(images)
        # Find the Loss
        loss = criterion(output, labels)
        # Calculate gradients
        loss.backward()
        # Update Weights
        optimizer.step()
        training_loss += loss.item()
    
    # VALIDATION LOOP
    with torch.no_grad(): # we don't need gradients in the validation phase
        validation_loss = 0
        correct_classified = 0
        for images, labels in valid_loader:
            images = images.to(device)
            labels = labels.to(device)
            # -------------
            # Forward Pass
            # -------------
            output = model(images)
            # Returns the maximum value of all elements in the input tensor (predictions).
            # dim: the dimension to reduce the tensor to.
            # _: prediction confidence
            # predicted: prediction label
            _, predicted = torch.max(output, dim=1)
            # Find the Loss
            loss = criterion(output, labels)
            # Calculate Loss
            validation_loss += loss.item()
            
            # Comparing the prediction (predicted) with the ground truth (labels)
            correct_classified += int(predicted.eq(labels).sum().item())

    validation_accuracy = correct_classified / validation_size
    print('Epoch {}'.format(epoch+1),
          "\t training_loss: ", training_loss,
          '\t validation_loss: ', validation_loss,
          '\t validation_accuracy: ', validation_accuracy),
    wandb.log({'training_loss': training_loss, 'validation_loss': validation_loss, 'validation_accuracy': validation_accuracy})