<a href="https://colab.research.google.com/github/shazzad-hasan/practice-deep-learning-with-pytorch/blob/main/image_classification/cat_vs_dog.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In this notebook, we will train a ConvNet to classify whether images contain either a dog or a cat using [Dogs vs. Cats](https://www.kaggle.com/c/dogs-vs-cats/overview) dataset. The dataset contains 25,000 images of dogs and cats (12,500 from each class).

The process will be broken down into the following steps:

    1. Load and visualize the dataset
    2. Define a pre-trained model
        1. Load in a pre-trained model
        2. Freeze all the parameters, so that the network acts as a fixed feature extractor
        3. Remove the last layer
        4. Replace the last layer with a linear classifier
    3. Define a loss function and optimizer
    4. Train the model on the training dataset
    5. Evaluate the performance of the trained model on the test dataset



In [None]:
# upload kaggle API key from your local machine
from google.colab import files
files.upload()

In [None]:
# make a kaggle dir, copy the API key to it
# and make sure the file in only readable by yourself (chmod 600)
!mkdir ~/.kaggle 
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [None]:
# use API command to download the dataset
!kaggle competitions download -c dogs-vs-cats

In [None]:
# uncompress the dataset
!unzip -qq dogs-vs-cats
!unzip -qq train.zip
!unzip -qq test1.zip

In [None]:
# import required libraries
import torch
import torchvision

import os
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# check if cuda is available
train_on_gpu = torch.cuda.is_available()

if train_on_gpu:
  print("CUDA is available!")
else:
  print("CUDA is not available!")

device = torch.device("cuda") if train_on_gpu else torch.device("cpu")

In [None]:
print(len(os.listdir("/content/train")))
print(len(os.listdir("/content/test1")))

In [None]:
# import required libraries
import os, shutil, pathlib

# path to dir where original dataset was uncompressed
original_train_dir = pathlib.Path("train")
original_test_dir = pathlib.Path("test1")

# dir of the smaller dataset
new_base_dir = pathlib.Path("cats_vs_dogs")

def make_train_valid_subset(subset_name, original_dir, new_base_dir, start_index, end_index):
  for category in ("cat", "dog"):
    dir = new_base_dir / subset_name / category
    os.makedirs(dir)
    fnames = [f"{category}.{i}.jpg" for i in range(start_index, end_index)]
    for fname in fnames:
      shutil.copyfile(src = original_dir / fname, dst = dir / fname)

def make_test_subset(subset_name, original_dir, new_base_dir, start_index, end_index):
  for category in ("cat", "dog"):
    dir = new_base_dir / subset_name / category
    os.makedirs(dir)
    fnames = [f"{i}.jpg" for i in range(start_index, end_index)]
    for fname in fnames:
      shutil.copyfile(src = original_dir / fname, dst = dir / fname)

# make 3 subsets: trai, validation, test
make_train_valid_subset("train", original_train_dir, new_base_dir, start_index=0, end_index=10000)
make_train_valid_subset("valid", original_train_dir, new_base_dir, start_index=10000, end_index=12000)
make_test_subset("test", original_test_dir, new_base_dir, start_index=1, end_index=2000)

In [None]:
train_dir = os.path.join(new_base_dir, "train")
valid_dir = os.path.join(new_base_dir, "valid")
test_dir = os.path.join(new_base_dir, "test")

In [None]:
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler

# load and transform data using ImageFolder
data_transform = transforms.Compose([
        transforms.Resize((244, 244)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225])
])

train_data = datasets.ImageFolder(train_dir, transform=data_transform)
valid_data = datasets.ImageFolder(valid_dir, transform=data_transform)
test_data = datasets.ImageFolder(test_dir, transform=data_transform)

print("Number of training images: ", len(train_data))
print("Number of validation images: ", len(valid_data))
print("Number of test images: ", len(test_data))

In [None]:
# define dataloader parameters

# number of subprocess to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 32

# prepare train and validation data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)
valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, num_workers=num_workers, shuffle=True)

# image classes in the dataset
classes = train_data.classes
print(classes)
num_classes = len(classes)

In [None]:
# visualize a batch of training data

def imshow(img):
  # unnormalize
  img = np.transpose(img, (1,2,0))
  mean = np.array([0.485, 0.456, 0.406])
  std = np.array([0.229, 0.224, 0.225])
  img = std * img + mean
  img = np.clip(img, 0, 1)
  plt.imshow(img)

# obtain one batch on training images
dataiter = iter(train_loader)
images, labels = dataiter.next()
# convert images to numpy for display
images = images.numpy() 

# plot the images in the batch along with the corresponding labels
fig = plt.figure(figsize=(10,4))
# display 10 images
for ind in np.arange(10):
  ax = fig.add_subplot(2, 10/2, ind+1, xticks=[], yticks=[])
  imshow(images[ind])
  ax.set_title(classes[labels[ind]])

In [None]:
from torchvision import models

# load a pre-trained model
model = models.resnet34(pretrained=True)

# print out the model stracture
print(model)

In [None]:
print(model.fc.in_features)
print(model.fc.out_features)

In [None]:
# Freeze training for all features layers, so that the network acts as a fixed feature extractor
# for param in model.parameters():
#   param.requires_grad = False

# freeze training for all features layers, except the batchnorm layers
for name, param in model.named_parameters():
  if ("bn" not in name):
    param.requires_grad = False

In [None]:
import torch.nn as nn

# replace final classification layer with a new one
model.fc = nn.Sequential(nn.Linear(model.fc.in_features, 512),
                         nn.ReLU(),
                         nn.Dropout(),
                         nn.Linear(512, num_classes))

# move model to the right device
model.to(device)

In [None]:
import torch.optim as optim

# specify loss function
criterion = nn.CrossEntropyLoss() # categorical cross-entropy

# specify optimizer
params = model.parameters()
optimizer = optim.Adam(params, lr=0.001)
# learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1)

In [None]:
# number of epochs to train the model
num_epochs = 30
# track training loss
train_loss, valid_loss = [], []
# initialize tracker for min validation loss
min_valid_loss = np.inf

for epoch in range(num_epochs):
  running_train_loss = 0.0
  running_valid_loss = 0.0

  # --------- train the model -----------------
  # set model to training mode
  model.train()

  for batch_idx, data in enumerate(train_loader):
    # get the inputs, data is a list of [inputs, targets]
    inputs, targets = data
    # mode tensor to the right device
    inputs, targets = inputs.to(device), targets.to(device)
    # clear the gradients of all optimized variables
    optimizer.zero_grad()
    # forward pass
    outputs = model(inputs)
    # calculate the batch loss
    loss = criterion(outputs, targets)
    # backward pass
    loss.backward()
    # update parameters
    optimizer.step()
    # update training loss
    running_train_loss += loss.item()

  # update learning rate
  lr_scheduler.step()

  # ---------- validate the model ------------
  # set the model to evaluation mode
  model.eval()

  # since we're not training, we don't need to calculate the gradients for out outputs
  with torch.no_grad():
    for batch_idx, (inputs, targets) in enumerate(valid_loader):
      # move tensor to the right device
      inputs, targets = inputs.to(device), targets.to(device)
      # forward pass
      outputs = model(inputs)
      # calculate the batch loss
      loss = criterion(outputs, targets)
      # update validation loss
      running_valid_loss += loss.item()

  # calculate average loss over an epoch
  running_train_loss = running_train_loss / len(train_loader)
  running_valid_loss = running_valid_loss / len(valid_loader)

  train_loss.append(running_train_loss)
  valid_loss.append(running_valid_loss)

  print("Epoch: {} \tTraining loss: {:.6f} \tValidation loss: {:.6f}".format(epoch+1, running_train_loss, running_valid_loss))

  # save model if validation loss has decressed
  if running_valid_loss <= min_valid_loss:
    print("Validation loss decressed ({:.6f} --> {:.6f}). Saving model ...".format(min_valid_loss, running_valid_loss))
    torch.save(model.state_dict(), "model.pt")
    min_valid_loss = running_valid_loss

print("Finished training!")

In [None]:
# plot training and validation loss for each epoch
epochs = range(1, num_epochs+1)
plt.plot(epochs, train_loss, 'bo', label='Training loss')
plt.plot(epochs, valid_loss, 'b', label='Validation loss')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.title('Training and validation loss')
plt.legend(loc='upper right')
plt.show()

### Evaluate the performance of the trained model on the test dataset

In [None]:
# load the model with the lowest validation loss
model.load_state_dict(torch.load('model.pt'))

In [None]:
# track test loss and accuracy
test_loss = 0.0
class_correct = [0 for i in range(len(classes))]
class_total = [0 for i in range(len(classes))]

model.eval()

with torch.no_grad():
  for batch_idx, (inputs, targets) in enumerate(test_loader):
    inputs, targets = inputs.to(device), targets.to(device)
    # forward pass
    outputs = model(inputs)
    # calculate the batch loss
    loss = criterion(outputs, targets)
    # update test loss
    test_loss += loss.item()
    # convert output probabilities to predicted class
    _, predictions = torch.max(outputs, 1)
    # compare predictions to true labels
    correct_tensor = predictions.eq(targets.data.view_as(predictions))
    correct = np.squeeze(correct_tensor.numpy()) if not train_on_gpu else np.squeeze(correct_tensor.cpu().numpy())
    # calculate test accuracy for each class
    for i in range(len(targets)):
      label = targets.data[i]
      class_correct[label] += correct[i].item()
      class_total[label] += 1

# average test loss
test_loss = test_loss / len(test_loader.dataset)
print("Test loss (overall): {:6f}\n".format(test_loss))

# print test accuracy for each classes
for i in range(len(classes)):
  if class_total[i] > 0:
    accuracy = (100 * class_correct[i]) / class_total[i]
    print(f'Test accuracy of {classes[i]:10s}: {accuracy:.1f} % ({np.sum(class_correct[i])}/{np.sum(class_total[i])})')

# overall test accuracy
test_acc = 100 * np.sum(class_correct) / np.sum(class_total)
print("\nTest accuracy (overall): %2d%% (%2d/%2d)" % ( 
      test_acc, np.sum(class_correct), np.sum(class_total)))

In [None]:
# prepare to count predictions for each class
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}

# again no gradients needed
model.eval()
with torch.no_grad():
    for data in test_loader:
        images, labels = data
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predictions = torch.max(outputs, 1)
        # collect the correct predictions for each class
        for label, prediction in zip(labels, predictions):
            if label == prediction:
                correct_pred[classes[label]] += 1
            total_pred[classes[label]] += 1


# print accuracy for each class
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname:5s} is {accuracy:.1f} %')