# CNN

### Test for [CUDA](http://pytorch.org/docs/stable/cuda.html)

Since these are larger (244x244x3) images, it may prove useful to speed up your training time by using a GPU. CUDA is a parallel computing platform and CUDA Tensors are the same as typical Tensors, only they utilize GPU's for computation.


In [111]:
import torch
import numpy as np

# check if CUDA is available
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

CUDA is available!  Training on GPU ...


# The Road Ahead

We break the notebook into separate steps.  Feel free to use the links below to navigate the notebook

* [Step 0](#step0): Import Datasets
* [Step 1](#step1): Detect Birds
* [Step 2](#step2): Create a CNN to Classify Birds Breeds (from Scratch)
* [Step 3](#step3): Create a CNN to Classify Birds Breeds (using Transfer Learning)
* [Step 4](#step4): Write Algorithm
* [Step 5](#step5): Test Algorithm

Step 0: Import Datasets

Download the dog dataset from kaggle. Unzip the folder and place to project's home directory, at the location /dataset.

In [112]:
import numpy as np
from glob import glob

# load filenames for dog images
bird_files = np.array(glob("dataset/*/*/*"))

# print number of images in each dataset
print('There are %d total bird images.' % len(bird_files))

There are 33050 total bird images.


# Step 1: Detect Bird

In this section, using a pre-trained model(VGG16) to detect Birds in images.

In [3]:
import torch
import torchvision.models as models

# define VGG16 model
VGG16 = models.vgg16(pretrained=True)


# check if CUDA is available
use_cuda = torch.cuda.is_available()

# move model to GPU if CUDA is available
if use_cuda:
    VGG16 = VGG16.cuda()

# Making Predictions with a Pre-trained Mode

Function that accepts a path to an image (such as 'dataset/train/AFRICAN FIREFINCH/001.jpg') as input and returns the index corresponding to the ImageNet class that is predicted by the pre-trained VGG-16 model. The output should always be an integer between 0 and 999, inclusive.

In [194]:
from PIL import Image
import torch.nn as nn
from torchvision import datasets, transforms as T

# Set PIL to be tolerant of image files that are truncated.
from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

def VGG16_predict(img_path):
    VGG16.eval()

#read image file
    fp = open(img_path, "rb")
    p = ImageFile.Parser()
    while 1:
        s = fp.read(1024)
        if not s:
            break
        image = Image.open(img_path)
        
#config VGG16
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_tensor = preprocess(image)
    input_batch = input_tensor.unsqueeze(0)

# move the input and model to GPU for speed if available
    if torch.cuda.is_available():
        input_batch = input_batch.to('cuda')
        VGG16.to('cuda')

#use the model
    output = VGG16(input_batch)
    if torch.cuda.is_available():
        output = output.cpu()

#find the arg max from output tensor
    index = output.data.numpy().argmax()
    
    return index # predicted class index

In [202]:
VGG16_predict('dataset/train/ALBATROSS/001.jpg')

146

# define the training, test and validation dataset path

In [16]:
import torchvision
from torchvision.datasets import ImageFolder
from torchvision import datasets, transforms


transform = transforms.Compose(
    [transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]
)

test_dir='dataset/test/'
test_set = torchvision.datasets.ImageFolder(root=test_dir, transform=transform)
testloader = torch.utils.data.DataLoader(test_set, batch_size=64, num_workers=0, shuffle=True)

train_dir='dataset/train/'
train_set = torchvision.datasets.ImageFolder(root=train_dir, transform=transform)
trainloader = torch.utils.data.DataLoader(train_set, batch_size=64, num_workers=0, shuffle=True)

valid_dir='dataset/valid/'
valid_set = torchvision.datasets.ImageFolder(root=valid_dir, transform=transform)
validloader = torch.utils.data.DataLoader(valid_set, batch_size=64, num_workers=0, shuffle=True)

CLASSES = list(train_set.class_to_idx.keys())

dataloaders = {
    "train": trainloader,
    "test": testloader,
    "valid" : validloader
}
datasizes = {
    "train": len(train_set),
    "test": len(train_set),
    "valid" : len(valid_set)
}

# Step 2: Create a CNN to Classify Bird Breeds (from dataset)
In this step, it will create a CNN that classifies bird breeds and from this CNN to transfer learning for pretrainned model on step 3.

define a CNN training model

In [None]:
import torch.nn as nn
import torch.nn.functional as F

# define the CNN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.norm2d1 = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=1)
        
        # pool
        self.pool = nn.MaxPool2d(2, 2)
        size_linear_layer = 512
        
        # linear layer
        total_bird_classes = len(CLASSES)
        self.fc1 = nn.Linear(128 * 28 * 28, size_linear_layer)
        self.fc2 = nn.Linear(size_linear_layer, total_bird_classes)
    
    def forward(self, x):
        super().__init__()
        x = self.pool(F.relu(self.norm2d1(self.conv1(x))))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        
        x = x.view(-1, 128 * 28 * 28)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# instantiate the CNN
model_scratch = Net()

# move tensors to GPU if CUDA is available
if use_cuda:
    model_scratch.cuda()

Specify Loss Function and Optimizer

In [None]:
mport torch.optim as optim

### CrossEntropyLoss
criterion_scratch = nn.CrossEntropyLoss()

### Optimizer
optimizer_scratch = optim.SGD(model_scratch.parameters(), lr=0.01)

if use_cuda:
    criterion_scratch = criterion_scratch.cuda()

In [None]:
define tainning function

In [None]:
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
   
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                #cuda0 = torch.device('cuda:0')  # CUDA GPU 0
                #data = data.to(cuda0)
                #target = target.to(cuda0)
                data, target = data.cuda(), target.cuda()
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like
            ## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.da56ta - train_loss))
            
            optimizer.zero_grad()
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            loss = criterion(output, target)
            # backward pass: compute gradient of the loss with respect to model parameters
            loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
            #if batch_idx % 100 == 0:
            #    print('Epoch %d, Batch %d loss: %.6f' % (epoch, batch_idx + 1, train_loss))
            
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update the average validation loss
            output = model(data)
            loss = criterion(output, target)
            valid_loss = valid_loss + ((1 / (batch_idx + 1)) * (loss.data - valid_loss))
            
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, 
            train_loss,
            valid_loss
            ))
        
        ## TODO: save the model if validation loss has decreased
        if valid_loss < valid_loss_min:
            torch.save(model.state_dict(), save_path)
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'
                  .format(valid_loss_min, valid_loss))
            valid_loss_min = valid_loss
            
    # return trained model
    return model

# train the model
model_scratch = train(20, loaders_scratch, model_scratch, optimizer_scratch, 
                      criterion_scratch, use_cuda, 'model_scratch.pt')

# load the model that got the best validation accuracy
model_scratch.load_state_dict(torch.load('model_scratch.pt'))