In [8]:
import numpy as np
from glob import glob
import cv2
import os
import torch
from torchvision import datasets
import torchvision.models as models
from PIL import Image, ImageFile
import torch.optim as optim
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt                        
%matplotlib inline                               

Download link for dataset: [dog dataset](https://s3-us-west-1.amazonaws.com/udacity-aind/dog-project/dogImages.zip)

Unzip and place it at /dogImages.

In [7]:
# load filenames for human and dog images
human_files = np.array(glob("lfw/*/*"))
dog_files = np.array(glob("dogImages/*/*/*"))

# print number of images in each dataset
print('%d dog images.' % len(dog_files))

8351 dog images.


In [9]:
use_cuda = 1

### Now we will try different approaches/models:


## 1. Pre-trained VGG16

In [9]:
VGG16 = models.vgg16(pretrained=True)
# move model to GPU if CUDA is available
if use_cuda:
    VGG16 = VGG16.cuda()

In [11]:
ImageFile.LOAD_TRUNCATED_IMAGES = True

def VGG16_predict(img_path):
    ## Load and pre-process an image from the given img_path
    ## Return index fo predicted class
    transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    ])
    image = Image.open(img_path)
    image = transform(image)  #.cuda()
    pred = VGG16(image[None, ...])  # because batch is expected
    return torch.argmax(pred) - 1 # predicted class index

pred = VGG16_predict('dogImages/train/001.Affenpinscher/Affenpinscher_00001.jpg')
pred

tensor(251, grad_fn=<SubBackward0>)

In [12]:
### returns "True" if a dog is detected in the image stored at img_path
def dog_detector(img_path):
    pred = VGG16_predict(img_path)
    if 151 <= pred <= 268:
        return True
    else:
        return False

In [13]:
# Test the performance of the face_detector algorithm 
# on the images in human_files_short and dog_files_short.
human_count = 0
for human_file in human_files_short:
    out = dog_detector(human_file)
    if out:
        human_count += 1
print(human_count)

dog_count = 0
for dog_file in dog_files_short:
    out = dog_detector(dog_file)
    if out:
        dog_count += 1
print(dog_count)

0
92


## 2. CNN from Scratch

In [5]:
# data loaders for training, validation, and test sets
train_files = np.array(glob("dogImages/train/*/*"))
test_files = np.array(glob("dogImages/test/*/*"))
val_files = np.array(glob("dogImages/valid/*/*"))

IMG_SIZE = 224

train_transform = transforms.Compose([transforms.RandomRotation(30),
                                      transforms.Resize(IMG_SIZE),
                                      transforms.CenterCrop(IMG_SIZE),
                                      transforms.RandomVerticalFlip(), # randomly flip and rotate
                                      transforms.ToTensor(),
                                      transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                 std=[0.229, 0.224, 0.225])
    ])

test_transform = transforms.Compose([transforms.Resize(IMG_SIZE),
                                     transforms.CenterCrop(IMG_SIZE),
                                     transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                                 std=[0.229, 0.224, 0.225])
    ])

training = datasets.ImageFolder("dogImages/train", transform=train_transform)
validation = datasets.ImageFolder("dogImages/valid", transform=test_transform)
testing = datasets.ImageFolder("dogImages/test",transform=test_transform)

NUM_WORKERS = 8
BATCH_SIZE = 32

train_loader = torch.utils.data.DataLoader(training,
                                          batch_size=BATCH_SIZE,
                                          shuffle=True,
                                          num_workers=NUM_WORKERS)
test_loader = torch.utils.data.DataLoader(testing,
                                          batch_size=BATCH_SIZE,
                                          shuffle=True,
                                          num_workers=NUM_WORKERS)
val_loader = torch.utils.data.DataLoader(validation,
                                         batch_size=BATCH_SIZE,
                                         shuffle=True,
                                         num_workers=NUM_WORKERS)

In [6]:
# define the CNN architecture
L1_IN = 3
L1_OUT = 16
L2_OUT = L1_OUT * 2
L3_OUT = L2_OUT * 2
FLATTEN = L3_OUT*28*28
FC = 800
BREEDS = len(training.classes)

class Net(nn.Module):
    def __init__(self):
        super().__init__()
              # w*h is constant
        self.conv1 = nn.Conv2d(L1_IN,L1_OUT,3,padding=1) #3,16
        self.conv2 = nn.Conv2d(L1_OUT,L2_OUT,3,padding=1) #16,32
        self.conv3 = nn.Conv2d(L2_OUT,L3_OUT,3,padding=1) #32,64
        
        self.pool = nn.MaxPool2d(2, 2)

        self.fc1 = nn.Linear(FLATTEN, FC)
        self.fc2 = nn.Linear(FC, BREEDS)
        
        self.dropout = nn.Dropout(0.2)
        
        self.batch_norm = nn.BatchNorm1d(FC)
    
    def forward(self, x):
        ## Define forward behavior
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        
        # flatten image
        x = x.view(x.size(0), -1)
        # dropout layer
        x = self.dropout(x)
        # activation for first layer
        x = self.dropout(F.relu(self.batch_norm(self.fc1(x))))
        x = self.fc2(x)
        return x


# instantiate the CNN
model_scratch = Net()

# move tensors to GPU if CUDA is available
if use_cuda:
    model_scratch.cuda()
model_scratch

Net(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=50176, out_features=800, bias=True)
  (fc2): Linear(in_features=800, out_features=133, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
  (batch_norm): BatchNorm1d(800, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [11]:
criterion_scratch = nn.CrossEntropyLoss()

optimizer_scratch = optim.Adam(model_scratch.parameters(), lr=0.03)

In [None]:
# Set PIL to be tolerant of image files that are truncated.
ImageFile.LOAD_TRUNCATED_IMAGES = True

epochs = 50

def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf
    
    print("Started Training...")
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # TRAINING #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            # update training loss
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
    
        ######################    
        # VALIDATION #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update avg. validation loss
            with torch.no_grad():
                output = model(data)
                loss = criterion(output, target)
                # update avg validation loss 
                valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
                
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))

        ## save the model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
            valid_loss_min,
            valid_loss))
            torch.save(model.state_dict(), save_path)
            valid_loss_min = valid_loss 
    
    # return trained model
    print("Finished training")
    
    return model

model = model_scratch
save_path = "model_scratch.pt"

# train the model
model_scratch = train(epochs, loaders_scratch, model_scratch, optimizer_scratch, criterion_scratch, use_cuda, save_path)

# load the model that got the best validation accuracy
model_scratch.load_state_dict(torch.load(save_path))

In [None]:
def test(loaders, model, criterion, use_cuda):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        # move to GPU
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))

# call test function    
test(loaders_scratch, model_scratch, criterion_scratch, use_cuda)

## 3. CNN using Transfer Learning

In [None]:
# data loaders same as above in 2.!
loaders_transfer = loaders_scratch

In [None]:
# I chose ResNet50, but any other deep CNN may be chosen
model_transfer = models.resnet50(pretrained=True)
classifierInputs = model_transfer.fc.in_features

print(classifierInputs)

In [None]:
# freeze layers
for param in model_transfer.parameters():
    param.requires_grad = False
    
# add output layer
model_transfer.fc = nn.Linear(in_features=classifierInputs,
                              out_features=BREEDS,
                              bias=True)

fc_parameters = model_transfer.fc.parameters()
# unfreeze last (new) layer
for param in fc_parameters:
    param.requires_grad = True
    
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model_transfer.to(device)

if use_cuda:
    model_transfer = model_transfer.cuda()

In [None]:
criterion_transfer = nn.CrossEntropyLoss()
optimizer_transfer = optimizer.Adam(model_transfer.parameters(),lr=0.001)

In [None]:
epochs = 20
save_transfer = 'model_transfer.pt'
# train model using function from above
model_transfer = train(epochs, loaders_transfer, model_transfer, optimizer_transfer, criterion_transfer, use_cuda, save_transfer)

# load  model with best validation accuracy
model_transfer.load_state_dict(torch.load(save_transfer))

In [None]:
# test model using function from above
test(loaders_transfer, model_transfer, criterion_transfer, use_cuda)