## Importing Libraries

Note: The dataset used in this notebook can be found here:  https://s3.amazonaws.com/content.udacity-data.com/nd089/Cat_Dog_data.zip.

In [None]:
%matplotlib inline
%config InlineBackend.figure_format = "retina"

import torch
import numpy as np
from torchvision import datasets, models
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
import matplotlib.pyplot as plt
from torch import optim
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

## Loading Data

In [None]:
# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 20
# percentage of training set to use as validation
valid_size = 0.2

# convert data to torch.FloatTensor
transform = transforms.Compose([transforms.RandomRotation(30),
                                transforms.RandomResizedCrop(224),
                                transforms.RandomHorizontalFlip(),
                                transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406],
                                                    [0.229, 0.224, 0.225])])



#Update the data_dir variable with the location of the dataset
data_dir = r"/Users/mustafakhan/Documents/Jupyter Projects/Cat vs Dog - ML Project/Cat_Dog_data"

# choose the training and test datasets
train_data = datasets.ImageFolder(root= data_dir + '/train', transform=transform)
test_data = datasets.ImageFolder(root=data_dir + '/test', transform=transform)


# obtain training indices that will be used for validation
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))  #floor function rounds down a decimal value
train_idx, valid_idx = indices[split:], indices[:split]

#When creating a slice, as in [1:4], the first index number is where the slice starts (inclusive), and the second index number is where the slice ends (exclusive).
#For this reason, in [......., split, .....], [split:] results in int(split) all the way to the end of the list, while
#[:split] results in the beginning of the list till split.


# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

#subsetrandom sampler samples elements randomly from a given list of indices, without replacement.


# prepare data loaders
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=train_sampler, num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, sampler=valid_sampler, num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, num_workers=num_workers)

# print out some data stats
print('Num training images: ', len(train_data))
print('Num test images: ', len(test_data))

## Visualize A Batch Of Training Data

In [None]:
# obtain one batch of training images
dataiter = iter(train_loader)
images, labels = dataiter.next()
images = images.numpy()

#The labels have been one hot encoded. 0 for cat and 1 for dog.
#The following line of code makes the labels more readable.
readable_label=[]
for label in labels:
    if label == 0:
        readable_label.append('cat')
    else:
        readable_label.append('dog')

#The following two lines of code avoid clipping input data by normalizing the images first
images -= images.min()
images /= images.max()

# plot the images in the batch, along with the corresponding labels
fig = plt.figure(figsize=(25, 4))
for idx in np.arange(20):
    ax = fig.add_subplot(2, 20/2, idx+1, xticks=[], yticks=[])
    ax.imshow(np.transpose(images[idx]), )
    # print out the correct label for each image
    # .item() gets the value contained in a Tensor
    ax.set_title(readable_label[idx])
    
    #Note: If you wanted to use the one hot encoding labels, use the following line of code + comment out the readable_label code above.
    #ax.set_title(str(readable_label[idx].item()))

## Checking If GPU Is Available 

In [None]:
# Use GPU if it's available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print("The ML code in this project will be trained on " + str(device))

## Defining The ML Model

In [None]:
#The different models available can be found here: https://pytorch.org/docs/stable/torchvision/models.html.
#Simply write 'model' in the next cell to see what is within the model. This helps with understanding
#what the architecture of the pretrained model is.

#Select a model whose weights will be transfered
model = models.vgg16(pretrained=True)

# We don't need to touch the weights in the architecture. 
# We only need to change the classifier. Therefore:
# Freeze parameters so we don't backprop through them
for param in model.parameters():
    param.requires_grad = False

#Defining the classifier
classifier = nn.Sequential(nn.Linear(25088, 4096), 
                           nn.ReLU(),
                           nn.Dropout(0.5),
                           nn.Linear(4096, 256),
                           nn.ReLU(), 
                           nn.Dropout(0.5),
                           nn.Linear(256, 2),               
                           nn.LogSoftmax(dim=1))


#Connecting the classifier to fully connected layer of the resnet50 architecture
model.classifier = classifier 

#Defining the loss
criterion = nn.NLLLoss()

# Defining the optimizer
optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

# Sending model to CPU/GPU
model.to(device);

## A More Detailed Look At The Model Architecture

In [None]:
# Find total parameters and trainable parameters
total_params = sum(p.numel() for p in model.parameters())
print(f'{total_params:,} total parameters exist in the model.')

total_trainable_params = sum(
    p.numel() for p in model.parameters() if p.requires_grad)
print(f'{total_trainable_params:,} parameters will be trained.')

#Model architecture
model

## Testing Speed of GPU vs. CPU

In [None]:
import time

#You can change the for loop to be " for device in ['cpu', 'cuda']: "
#if you want to compare CPU vs. GPU speed. Since I only have a CPU, 
#I'll use the following lines of code if I want to test the training speed of my CPU. 

for device in ['cpu']:

    criterion = nn.NLLLoss()
    
    # Only train the classifier parameters, feature parameters are frozen
    optimizer = optimizer = optim.Adam(model.classifier.parameters(), lr=0.001)

    model.to(device)

    for ii, (inputs, labels) in enumerate(train_loader):

        # Move input and label tensors to the GPU
        inputs, labels = inputs.to(device), labels.to(device)

        start = time.time()

        outputs = model.forward(inputs)
        loss = criterion(outputs, labels)
        #The next line of code resolved the following error: RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
        loss.requires_grad = True
        loss.backward()
        optimizer.step()

        if ii == 3:
            break

    print(f"Device = {device}; Time per batch: {(time.time() - start) / 3:.3f} seconds")
    
## The above code tests the training speed of the GPU and the CPU 
## by training for 3 iterations and averaging the time it took to complete them. 

## Training The ML Model

In [None]:
n_epochs = 1
steps=0
epochs_no_improve = 0
n_epochs_stop = 5

# initialize tracker for minimum validation loss
valid_loss_min = np.Inf # set initial "min" to infinity

for epoch in range(n_epochs):
    # monitor training loss
    train_loss = 0.0
    valid_loss = 0.0
    
    ###################
    # train the model #
    ###################
    model.train() # prep model for training
    for data, target in train_loader:
        steps += 1
        print(steps)
        # move data and target tensors to the default device
        data, target = data.to(device), target.to(device)
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        #The next line of code resolved the following error: RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
        loss.requires_grad = True
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update running training loss
        train_loss += loss.item()*data.size(0)
        
    ######################    
    # validate the model #
    ######################
    model.eval() # prep model for evaluation
    for data, target in valid_loader:
        # move data and target tensors to the default device
        data, target = data.to(device), target.to(device)
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update running validation loss 
        valid_loss += loss.item()*data.size(0)
        
    # print training/validation statistics 
    # calculate average loss over an epoch
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)
    
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
        epoch+1, 
        train_loss,
        valid_loss
        ))
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(model.state_dict(), 'model.pt')
        valid_loss_min = valid_loss
        epochs_no_improve = 0
    
    else:
        epochs_no_improve += 1
        # Check early stopping condition
        if epochs_no_improve == n_epochs_stop:
            print('Early stopping!')

In [None]:
epochs = 1
steps = 0
running_loss = 0
print_every = 5
for epoch in range(epochs):
    for inputs, labels in train_loader:
        steps += 1
        print(steps)
        # Move input and label tensors to the default device
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        
        logps = model.forward(inputs)
        loss = criterion(logps, labels)
        #The next line of code resolved the following error: RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn
        #loss.requires_grad = True
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        
        if steps % print_every == 0:
            test_loss = 0
            accuracy = 0
            model.eval()
            with torch.no_grad():
                for inputs, labels in test_loader:
                    inputs, labels = inputs.to(device), labels.to(device)
                    logps = model.forward(inputs)
                    batch_loss = criterion(logps, labels)
                    
                    test_loss += batch_loss.item()
                    
                    # Calculate accuracy
                    ps = torch.exp(logps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
                    
            print(f"Epoch {epoch+1}/{epochs}.. "
                  f"Train loss: {running_loss/print_every:.3f}.. "
                  f"Test loss: {test_loss/len(test_loader):.3f}.. "
                  f"Test accuracy: {accuracy/len(test_loader):.3f}")
            running_loss = 0
            model.train()