In [0]:
!wget https://s3-us-west-1.amazonaws.com/udacity-aind/dog-project/dogImages.zip
!sudo apt-get install unzip
!unzip -q dogImages.zip
!mkdir data
!mv dogImages data/dog_images

In [5]:
import torch
import numpy as np
use_cuda = torch.cuda.is_available()
print(use_cuda)

True


In [0]:
import os
from torchvision import datasets
from torchvision.transforms import transforms
from torch.utils.data.sampler import SubsetRandomSampler
from PIL import ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True



### TODO: Write data loaders for training, validation, and test sets
## Specify appropriate transforms, and batch_sizes

num_workers = 1
batch_size = 64

data_dir = "data/dog_images"

transform = transforms.Compose([transforms.Resize(size=(240,240)), 
                                transforms.ToTensor(), 
                                transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
                                
#transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])



train_data = datasets.ImageFolder(data_dir + "/train", transform=transform)

validation_data = datasets.ImageFolder(data_dir + "/valid", transform=transform)

test_data = datasets.ImageFolder(data_dir + "/test", transform=transform)


train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, 
                                            num_workers=num_workers,shuffle=True)
valid_loader = torch.utils.data.DataLoader(validation_data, batch_size=16, 
      
                                           num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=4, 
                                           num_workers=num_workers)


loaders_scratch = {}
loaders_scratch['train'] = train_loader
loaders_scratch['valid'] = valid_loader
loaders_scratch['test'] = test_loader                                                            

In [8]:
import torch.nn as nn
import torch.nn.functional as F

# define the CNN architecture
class Net(nn.Module):
    ### TODO: choose an architecture, and complete the class
    def __init__(self):
        super(Net, self).__init__()
        ## Define layers of a CNN
        
        # sees -  (240 x 240 x 3)
        self.conv1 = nn.Conv2d(3, 32, 3, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        # sees -  (120 x 120 x 32)
        self.conv2 = nn.Conv2d(32, 64, 3, padding=0)
        self.bn2 = nn.BatchNorm2d(64)
        # sees -  (59 x 59 x 64)
        self.conv3 = nn.Conv2d(64, 128, 3, padding=0)
        self.bn3 = nn.BatchNorm2d(128)
        # out -  (28 x 28 x 128
        
        self.pool = nn.MaxPool2d(2, 2)
        
        self.fc1 = nn.Linear(28 * 28 * 128, 133)
        
        self.dropout_dense = nn.Dropout(0.25)
        
        
    
    def forward(self, x):
        ## Define forward behavior
        
        x = self.conv1(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.bn1(x)
        #print(x.shape)
        
       
        #x = self.dropout_cnn(x)
        
        x = self.conv2(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.bn2(x)
        #print(x.shape)
        
        #print(x.shape)
        x = self.conv3(x)
        x = F.relu(x)
        x = self.pool(x)
        x = self.bn3(x)
        #print(x.shape)
        
        x = x.view(-1, 28 * 28 * 128)
        x = self.dropout_dense(x)
        x = self.fc1(x)

        return x

#-#-# You so NOT have to modify the code below this line. #-#-#

# instantiate the CNN
model_scratch = Net()

print(model_scratch)
# move tensors to GPU if CUDA is available
if use_cuda:
    model_scratch.cuda()
    #input = input.cuda() 
    model_scratch = nn.DataParallel(model_scratch)

Net(
  (conv1): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=100352, out_features=133, bias=True)
  (dropout_dense): Dropout(p=0.25)
)


In [9]:
import torch.optim as optim

### TODO: select loss function
criterion_scratch = nn.CrossEntropyLoss()

### TODO: select optimizer
#optimizer_scratch = optim.Adam(model_scratch.parameters(), lr = 0.00001)

optimizer_scratch = optim.Adam([
    {'params': model_scratch.parameters(), 'weight_decay': 0.1, 'amsgrad': True}
], lr=0.00001)

#optimizer_scratch = optim.Adam([
#    {'params': model_scratch.parameters(), 'amsgrad': True}
#])

print(optimizer_scratch)

Adam (
Parameter Group 0
    amsgrad: True
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 1e-05
    weight_decay: 0.1
)


In [0]:
def train(n_epochs, loaders, model, optimizer, criterion, use_cuda, save_path):
    """returns trained model"""
    # initialize tracker for minimum validation loss
    valid_loss_min = np.Inf 
    
    for epoch in range(1, n_epochs+1):
        # initialize variables to monitor training and validation loss
        train_loss = 0.0
        valid_loss = 0.0
        #train_loss_old = 0.0
        
        ###################
        # train the model #
        ###################
        model.train()
        for batch_idx, (data, target) in enumerate(loaders['train']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
                
            optimizer.zero_grad()
            output = model(data)
            #print(target.shape)
            #print(output.shape)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            
            train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            #print(batch_idx)
            #train_loss_old += loss.item()*data.size(0)
            if(batch_idx % 10 == 0):
              print("batch_idx {} , train_loss {}".format(batch_idx, train_loss))
            ## find the loss and update the model parameters accordingly
            ## record the average training loss, using something like
            ## train_loss = train_loss + ((1 / (batch_idx + 1)) * (loss.data - train_loss))
            
        #train_loss_old = train_loss_old/len(loaders['train'].dataset)
        
        ######################    
        # validate the model #
        ######################
        model.eval()
        for batch_idx, (data, target) in enumerate(loaders['valid']):
            # move to GPU
            if use_cuda:
                data, target = data.cuda(), target.cuda()
            ## update the average validation loss
            output = model(data)
            loss = criterion(output, target)
            valid_loss += loss.item()*data.size(0)
        valid_loss = valid_loss/len(loaders['valid'].dataset)
            
        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch,
            train_loss,
            valid_loss
            ))
        
        ## TODO: save the model if validation loss has decreased
        if valid_loss <= valid_loss_min:
          print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
              valid_loss_min,
              valid_loss))
          
          torch.save(model.state_dict(), save_path)
          valid_loss_min = valid_loss
    # return trained model
    return model


# train the model
#model_scratch = train(20, loaders_scratch, model_scratch, optimizer_scratch, 
#                      criterion_scratch, use_cuda, 'model_scratch.pt')


# load the model that got the best validation accuracy
model_scratch.load_state_dict(torch.load('model_scratch_12_perc.pt'))

In [13]:
def test(loaders, model, criterion, use_cuda):

    # monitor test loss and accuracy
    test_loss = 0.
    correct = 0.
    total = 0.

    model.eval()
    for batch_idx, (data, target) in enumerate(loaders['test']):
        # move to GPU
        if use_cuda:
            data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update average test loss 
        test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data - test_loss))
        # convert output probabilities to predicted class
        pred = output.data.max(1, keepdim=True)[1]
        # compare predictions to true label
        correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
        total += data.size(0)
            
    print('Test Loss: {:.6f}\n'.format(test_loss))

    print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
        100. * correct / total, correct, total))

# call test function    
test(loaders_scratch, model_scratch, criterion_scratch, use_cuda)

Test Loss: 3.946966


Test Accuracy: 12% (102/836)
