In [1]:
%matplotlib inline  
import cv2   
import time
import numpy as np
from glob import glob
import matplotlib.pyplot as plt                        
from tqdm import tqdm                       
from PIL import Image
import torchvision.transforms as transforms
import torchvision.models as models
from torchvision import datasets
from torch.autograd import Variable
import os
import socket
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True
import math
ceil = math.ceil

In [2]:
import torch
print(torch.__version__)
import torchvision
print(torchvision.__version__)

1.0.0
0.2.1


In [3]:
CUDA = torch.cuda.is_available()
if CUDA:
    print('GPU is available as:',CUDA)
else:
    print('GPU is available as:',CUDA)

GPU is available as: True


In [4]:
# Load dog files
if socket.gethostname() == 'zT5610':
    dog_files = np.array(glob("dogImages/*/*/*"))
else:
    dog_files = np.array(glob("/data/dog_images/*/*/*"))

# print number of images in each dataset
print('There are %d total dog images.' % len(dog_files))

There are 8351 total dog images.


In [5]:
dog_classes = []
for i in range(dog_files.shape[0]):
    s = dog_files[i-1]
    s = s[0:s.rfind('.jpg')]
    s = s[0:s.rfind("_")]
    s = s[0:s.rfind("/")]
    s = s[1+s.find("."):len(s)]
    dog_classes.append(s)
dog_classes = list(set(dog_classes))
print(len(dog_classes))

133


In [6]:
# Hyperparameters
batch_size = 30
num_workers = 0
num_classes = len(dog_classes)
input_size = 224       #Number of input neurons (image pixels)
num_epochs = 10        #Number of epochs
learning_rate = 0.01  #How fast the model learns

In [7]:
data_transform = transforms.Compose([transforms.Resize(224),
                                     transforms.RandomResizedCrop(224),
                                     transforms.ToTensor()
                                    ])

### TODO: Write data loaders for training, validation, and test sets
## Specify appropriate transforms, and batch_sizes
if socket.gethostname() == 'zT5610': # In my local environment
    train_dir, validation_dir, test_dir = "dogImages/train/", "dogImages/valid/", "dogImages/test/"
else: # In Udacity environment
    train_dir, validation_dir, test_dir = "/data/dog_images/train/", "/data/dog_images/valid/", "/data/dog_images/test/"

train_data = datasets.ImageFolder(train_dir, transform=data_transform)
validation_data = datasets.ImageFolder(validation_dir, transform=data_transform)
test_data = datasets.ImageFolder(test_dir, transform=data_transform)

In [8]:
train_loader = torch.utils.data.DataLoader(train_data, 
                                           batch_size=batch_size,
                                           num_workers=num_workers, 
                                           shuffle=True)
validation_loader = torch.utils.data.DataLoader(validation_data, 
                                                batch_size=batch_size,
                                                num_workers=num_workers, 
                                                shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, 
                                          batch_size=batch_size, 
                                          num_workers=num_workers, 
                                          shuffle=True)

In [9]:
print('Train Data Points:\t\t',str(len(train_data.imgs)))
print('Validation Data Points:\t\t',str(len(validation_data.imgs)))
print('Test Data Points:\t\t',str(len(test_data.imgs)))
print('Number of Classes:\t\t',str(num_classes))

Train Data Points:		 6680
Validation Data Points:		 835
Test Data Points:		 836
Number of Classes:		 133


In [10]:
#data.shape

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, bias=True)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, bias=True)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, bias=True)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, bias=True)
        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, bias=True)
        self.fc1 = nn.Linear(in_features=256 * 6 * 6, out_features=133, bias=True)
        self.max_pool = nn.MaxPool2d(kernel_size=2, stride=2,ceil_mode=True)
        self.dropout = nn.Dropout(p=0.3)
        self.conv_bn1 = nn.BatchNorm2d(num_features=224,eps=1e-05)
        self.conv_bn2 = nn.BatchNorm2d(num_features=16)
        self.conv_bn3 = nn.BatchNorm2d(num_features=32)
        self.conv_bn4 = nn.BatchNorm2d(num_features=64)
        self.conv_bn5 = nn.BatchNorm2d(num_features=128)
        self.conv_bn6 = nn.BatchNorm2d(num_features=256)
    
    def forward(self, x):
        
        x = F.softmax(self.conv1(x))
        x = self.max_pool(x)
        x = self.conv_bn2(x)
        
        x = F.softmax(self.conv2(x))
        x = self.max_pool(x)
        x = self.conv_bn3(x)
        
        x = F.softmax(self.conv3(x))
        x = self.max_pool(x)
        x = self.conv_bn4(x)
        
        x = F.softmax(self.conv4(x))
        x = self.max_pool(x)
        x = self.conv_bn5(x)
        
        x = F.softmax(self.conv5(x))
        x = self.max_pool(x)
        x = self.conv_bn6(x)
        
        x = x.view(-1, 256 * 6 * 6)
        
        x = self.dropout(x)
        x = self.fc1(x)
        return x

In [11]:
class Net2(nn.Module):
    def __init__(self):
        super(Net2, self).__init__()       
        # Hyperparameters
        c1_in,c1_out,c1_k,c1_s,c1_p = train_data[0][0].size()[0],16,5,1,2
        p1_k = 2
        
        c2_in,c2_out,c2_k,c2_s,c2_p = c1_out,c1_out*2,5,1,2
        p2_k = 2
        
        w,h = train_data[0][0].size()[1],train_data[0][0].size()[2]
        
        o1 = ceil((w-c1_k+2*c1_p)/c1_s+1)
        m1 = ceil(o1/p1_k)
        o2 = ceil((m1-c2_k+2*c2_p)/c2_s+1)
        m2 = ceil(o2/p2_k)
        
        f1_in,f1_out = c2_out*m2*m2,ceil(c2_out*m2*m2/28)
        
        f2_in,f2_out = f1_out,num_classes
        
        # Convolution 1
        self.cnn1 = nn.Conv2d(in_channels=c1_in,out_channels=c1_out,kernel_size=c1_k,stride=c1_s,padding=c1_p)
        self.relu1 = nn.ReLU()
        
        # Max pool 1
        self.maxpool1 = nn.MaxPool2d(kernel_size=p1_k)
     
        # Convolution 2
        self.cnn2 = nn.Conv2d(in_channels=c2_in, out_channels=c2_out, kernel_size=c2_k, stride=c2_s, padding=c2_p)
        self.relu2 = nn.ReLU()
        
        # Max pool 2
        self.maxpool2 = nn.MaxPool2d(kernel_size=p2_k)
        
        # Fully connected 1
        self.fc1 = nn.Linear(in_features=f1_in, out_features=f1_out) 
        
        # Fully connected 2
        self.fc2 = nn.Linear(in_features=f2_in, out_features=f2_out) 
        
    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)
        
        # Max pool 1
        out = self.maxpool1(out)
        
        # Convolution 2 
        out = self.cnn2(out)
        out = self.relu2(out)
        
        # Max pool 2 
        out = self.maxpool2(out)
        
        # Flatten 32*56*56
        out = out.view(out.size(0), -1)

        # Fully Connected Layers
        out = F.relu(self.fc1(out))
        out = self.fc2(out)
        
        return out

In [12]:
# instantiate the CNN
model_scratch = Net2()
print(model_scratch)
# move tensors to GPU if CUDA is available
if CUDA:
    model_scratch.cuda()

Net2(
  (cnn1): Conv2d(3, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (relu1): ReLU()
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (cnn2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (relu2): ReLU()
  (maxpool2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=100352, out_features=3584, bias=True)
  (fc2): Linear(in_features=3584, out_features=133, bias=True)
)


In [13]:
def train(n_epochs,loaders,model,optimizer,criterion,use_cuda,save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        #scheduler.step()
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## find the loss and update the model parameters accordingly
            # clear the gradients of all optimized variables
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # update training loss
            ## record the average training loss, using something like
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(validation_loader):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update the average validation loss
             # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # update average validation loss 
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))

# calculate average losses
        train_loss = train_loss/len(train_loader.dataset)
        valid_loss = valid_loss/len(validation_loader.dataset)

# print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))
        
        ## save the model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
            valid_loss_min,
            valid_loss))
            torch.save(model.state_dict(), save_path)
            valid_loss_min = valid_loss    
    # return trained model
    return model

In [14]:
### TODO: select loss function
criterion_scratch = nn.CrossEntropyLoss()

### TODO: select optimizer
optimizer_scratch = optim.SGD(model_scratch.parameters(), lr=learning_rate)

In [15]:
def test(loaders, model, criterion, use_cuda):

# monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(test_loader):
        # move to GPU
        if CUDA:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))
    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (100. * correct / total, correct, total))

In [16]:
# train the model
print('Train Results:')
model_scratch = train(n_epochs=5, 
                      loaders=train_loader, # good
                      model=model_scratch, # good
                      optimizer=optimizer_scratch, # good
                      criterion=criterion_scratch, # good 
                      use_cuda=CUDA, # good
                      save_path='model_scratch.pt' # good
                     )

# load the model that got the best validation accuracy
model_scratch.load_state_dict(torch.load('model_scratch.pt'))

print('\nTest Results:')
test(test_loader, model_scratch, criterion_scratch, CUDA)

Train Results:
Epoch: 1 	Training Loss: 0.000730 	Validation Loss: 0.005819
Validation loss decreased (inf --> 0.005819).  Saving model ...
Epoch: 2 	Training Loss: 0.000727 	Validation Loss: 0.005797
Validation loss decreased (0.005819 --> 0.005797).  Saving model ...
Epoch: 3 	Training Loss: 0.000722 	Validation Loss: 0.005742
Validation loss decreased (0.005797 --> 0.005742).  Saving model ...
Epoch: 4 	Training Loss: 0.000713 	Validation Loss: 0.005662
Validation loss decreased (0.005742 --> 0.005662).  Saving model ...
Epoch: 5 	Training Loss: 0.000701 	Validation Loss: 0.005662

Test Results:
Test Loss: 4.746204


Test Accuracy:  2% (18/836)
