We'll make binary classifier for images of cats and dogs, which we will later use for detection in videos. We'll use dogs_vs_cats dataset from Kaggle as well as transfer learning and ResNet50 model. 

First, import the libraries.

In [None]:
import os
import cv2
import time
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
from torchvision.models import resnet50
from sklearn.model_selection import train_test_split
from PIL import Image

Load the images.

In [None]:
TRAIN_DIR = "/home/marija/dogs-vs-cats/train/"
imgs = os.listdir(TRAIN_DIR)

TEST_DIR = "/home/marija/dogs-vs-cats/test1"
test_imgs = os.listdir(TEST_DIR)

imgs = os.listdir(TRAIN_DIR) 
test_imgs = os.listdir(TEST_DIR)

Then we'll divide each image to dog-class or cat-class and give those classes integer values.

In [None]:
dogs_list = [img for img in imgs if img.split(".")[0] == "dog"]
cats_list = [img for img in imgs if img.split(".")[0] == "cat"]

class_to_int = {"dog" : 0, "cat" : 1}
int_to_class = {0 : "dog", 1 : "cat"}

Some data preprocessing and spliting for validation dataset.

In [None]:
def train_transform():
    return T.Compose([
        T.RandomHorizontalFlip(p=0.5),
        T.RandomRotation(15),
        T.RandomCrop(204),
        T.ToTensor(),
        T.Normalize((0, 0, 0),(1, 1, 1))])
    
def test_transform():
    return T.Compose([
        T.ToTensor(),
        T.Normalize((0, 0, 0),(1, 1, 1))])

In [None]:
class CatOrDog(Dataset):
    
    def __init__(self, imgs, class_to_int, mode = "train", transforms = None):
        
        super().__init__()
        self.imgs = imgs
        self.class_to_int = class_to_int
        self.mode = mode
        self.transforms = transforms
        
    def __getitem__(self, idx):
        
        image_name = self.imgs[idx]
        img = Image.open(TRAIN_DIR + image_name)
        img = img.resize((224, 224))       
        if self.mode == "train" or self.mode == "val":
            label = self.class_to_int[image_name.split(".")[0]]
            label = torch.tensor(label, dtype = torch.float32)
            img = self.transforms(img)
            return img, label
        
        elif self.mode == "test":
            img = self.transforms(img)
            return img
                    
    def __len__(self):
        return len(self.imgs)

In [None]:
train_imgs, val_imgs = train_test_split(imgs, test_size = 0.25)

In [None]:
train_data = CatOrDog(train_imgs, class_to_int, mode = "train", transforms = train_transform())
val_data = CatOrDog(val_imgs, class_to_int, mode = "val", transforms = test_transform())
test_data = CatOrDog(test_imgs, class_to_int, mode = "test", transforms = test_transform())

trainloader = DataLoader(dataset = train_data, num_workers = 4, batch_size = 32, shuffle = True)

valoader = DataLoader(dataset = val_data, num_workers = 4, batch_size = 32, shuffle = True)

testloader = DataLoader(dataset = test_data, num_workers = 4, batch_size = 32, shuffle = True)

Define a device.

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
device

Functions for accuracy, training and validating.

In [None]:
def accuracy(predictions, trues):
    predictions = [1 if predictions[i] >= 0.5 else 0 for i in range(len(predictions))]
    acc = [1 if predictions[i] == trues[i] else 0 for i in range(len(predictions))]
    acc = np.sum(acc) / len(predictions)    
    return (acc * 100)  

In [None]:
def training(trainloader):

    epoch_loss = []
    epoch_acc = []
    start_time = time.time()

    for images, labels in trainloader:
        
        images = images.to(device)
        labels = labels.to(device)
        labels = labels.reshape((labels.shape[0], 1))

        optimizer.zero_grad()
        predictions = model(images)
        _loss = criterion(predictions, labels)
        loss = _loss.item()
        epoch_loss.append(loss)
  
        acc = accuracy(predictions, labels)
        epoch_acc.append(acc)
        _loss.backward()
        optimizer.step()

    end_time = time.time()
    epoch_loss = np.mean(epoch_loss)
    epoch_acc = np.mean(epoch_acc)

    train_logs["loss"].append(epoch_loss)
    train_logs["accuracy"].append(epoch_acc)
        
    return epoch_loss, epoch_acc    

In [None]:
def validating(valoader, best_acc):

    epoch_loss = []
    epoch_acc = []
    start_time = time.time()

    for images, labels in valoader:

        images = images.to(device)
        labels = labels.to(device)
        labels = labels.reshape((labels.shape[0], 1)) 

        predictions = model(images)
        _loss = criterion(predictions, labels)
        loss = _loss.item()
        epoch_loss.append(loss)
        acc = accuracy(predictions, labels)
        epoch_acc.append(acc)

    end_time = time.time()
    epoch_loss = np.mean(epoch_loss)
    epoch_acc = np.mean(epoch_acc)

    val_logs["loss"].append(epoch_loss)
    val_logs["accuracy"].append(epoch_acc)

    if epoch_acc > best_val_acc:
        best_acc = epoch_acc
        torch.save(model.state_dict(),"tfl_model.pth")
        
    return epoch_loss, epoch_acc, best_acc

As we are using ResNet50 model, only last layer should be trained.

In [None]:
model = resnet50(pretrained = True)

model.fc = nn.Sequential(
    nn.Linear(2048, 1, bias = True),
    nn.Sigmoid())

Adam optimizer gives good results for binary classification, as well as BCELoss() function. Laerning rate should be small because we don't want to change our weights too much.

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr = 0.0001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size = 5, gamma = 0.5)
criterion = nn.BCELoss()

train_logs = {"loss" : [], "accuracy" : []}
val_logs = {"loss" : [], "accuracy" : []}

model.to(device)

In [None]:
best_acc = 0
for epoch in range(3):

    loss, acc = training(trainloader)

    print("\nTraining")
    print("Epoch {}".format(epoch+1))
    print("Loss : {}".format(round(loss, 4)))
    print("Acc : {}".format(round(acc, 4)))
    
    loss, acc, best_acc = validating(valoader, best_acc)

    print("\nValidating")
    print("Epoch {}".format(epoch+1))
    print("Loss : {}".format(round(loss, 4)))
    print("Acc : {}".format(round(acc, 4))) 