In [1]:
from torchvision import datasets, transforms
import torch.utils.data as data
from skimage import feature
import PIL
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn, optim

## Preprocess data

In [2]:
def get_mean_std(dl):
    '''
    Calculates mean and std for each channel (RGB).
    Data has 4 dims [batch, C, H, W] i.e. [0, 1, 2, 3] and we want to calculate
    mean/std across (dims 0, 2, 3) but for each channel (dim 1).
    mean = sum (across all batches per channel) / n_batches
    std = sqrt(variance), 
        where variance = E(X**2) - E(X)**2
        where E(x) is expected value of x i.e. mean(x) 
        variance = mean(squared(data) per channel) - squared(mean(data per channel))
    '''
    n_batches = len(dl)
    batch_expected_x = 0
    batch_expected_x_sq = 0
    for data, _ in dl:
        batch_expected_x_sq += torch.mean(data**2, dim=[0, 2, 3]) # E(X**2) batch                                
        batch_expected_x    += torch.mean(data, dim=[0, 2, 3]) # E(X) batch
    mean = batch_expected_x / n_batches # overall E(X)
    var = (batch_expected_x_sq / n_batches) - mean**2 # E(X**2) - E(X)**2
    std = torch.sqrt(var)

    return mean, std

In [3]:
batch_size = 32

train_ds = datasets.ImageFolder(root='/Users/mt/data/Landmark_Classification/train',
                                    transform=transforms.Compose([
                                        transforms.Resize((128, 128)),
                                        # transforms.CenterCrop(128),                                    
                                        # transforms.RandomHorizontalFlip(p=0.25),
                                        transforms.ToTensor()]))
train_dl = data.DataLoader(train_ds, batch_size=batch_size)

mean, std = get_mean_std(train_dl)



In [4]:
mean, std

(tensor([0.4875, 0.5093, 0.4983]), tensor([0.2797, 0.2733, 0.3129]))

In [5]:
#
# Normalize using the statistics obtained above and apply images augumentations. 
#
train_ds = datasets.ImageFolder(root='/Users/mt/data/Landmark_Classification/train',
                                transform=transforms.Compose([                                
                                    transforms.Resize((128, 128)),
                                    # transforms.CenterCrop(128),                                    
                                    # transforms.RandomHorizontalFlip(p=0.25),
                                    transforms.ToTensor(),                                    
                                    transforms.Normalize(mean, std)]))
classes = train_ds.classes
train_dl = data.DataLoader(train_ds, batch_size=batch_size)

#
# Compute mean and std after normalization
#
norm_mean, norm_std = get_mean_std(train_dl)
print(f'norm_mean = {norm_mean}')
print(f'norm_std = {norm_std}\n')

norm_mean = tensor([-2.1115e-07,  1.8072e-07, -3.0436e-07])
norm_std = tensor([1.0000, 1.0000, 1.0000])



## Specify model

In [6]:
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        
        # Define layers of a CNN
        self.conv_1= nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, padding=1)
        self.relu_1 = nn.ReLU(inplace=True)
        self.bn_1 = nn.BatchNorm2d(16)
        self.maxpool_1 = nn.MaxPool2d(kernel_size=2)        
        self.conv_2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.relu_2 = nn.ReLU(inplace=True)
        self.bn_2 = nn.BatchNorm2d(32)
        self.maxpool_2 = nn.MaxPool2d(kernel_size=2)        
        self.conv_3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.relu_3 = nn.ReLU(inplace=True) 
        self.bn_3 = nn.BatchNorm2d(64)        
        self.maxpool_3 = nn.MaxPool2d(kernel_size=2)        
        self.conv_4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1)
        self.relu_4 = nn.ReLU(inplace=True)
        self.bn_4 = nn.BatchNorm2d(128)        
        self.maxpool_4 = nn.MaxPool2d(kernel_size=2)                
        self.conv_5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1)
        self.relu_5 = nn.ReLU(inplace=True)
        self.bn_5 = nn.BatchNorm2d(256)        
        self.maxpool_5 = nn.MaxPool2d(kernel_size=2)                        
        self.flatten_1 = nn.Flatten()
        self.linear_1 = nn.Linear(in_features=4 * 4 * 256, out_features=1024)
        self.relu_6 = nn.ReLU(inplace=True)
        self.bn_6 = nn.BatchNorm1d(1024)        
        self.linear_2 = nn.Linear(in_features=1024, out_features=50)
        self.output = nn.LogSoftmax(dim=1)
        
        ## Layers list
        self.layers = [layer for layer in self.modules() if not isinstance(layer, Net)]
      
    def forward(self, x):
        ## Define forward behavior
        for layer in self.layers: 
            x = layer(x)        
        return x

# instantiate the CNN
model = Net()

## Specify loss function and optimizer

In [19]:
loss_fn = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

## Train the model

In [21]:
def train(n_epochs):
    n_sample = len(train_dl)

    for epoch in range(1, n_epochs+1):
        train_loss = 0.0
        
        for batch_idx, (data, target) in enumerate(train_dl):        
            output = model(data)
            
            loss = loss_fn(output, target)
            optimizer.zero_grad()            
            loss.backward()
            
            optimizer.step()
            train_loss += loss.detach().item() 
            
        else:
            avg_loss = train_loss / n_sample                                 
            print('epoch', epoch, 'loss', avg_loss)

In [29]:
num_epochs = 20
train(num_epochs)


epoch 1 loss 0.023807676884276357
epoch 2 loss 0.020727139393590947
epoch 3 loss 0.018057170175412234
epoch 4 loss 0.01612152494093839
epoch 5 loss 0.014308119049057998
epoch 6 loss 0.012865767167742424
epoch 7 loss 0.011694732345046198
epoch 8 loss 0.01065020253535043
epoch 9 loss 0.009755415321783499
epoch 10 loss 0.008934328700366251
epoch 11 loss 0.008258165717323093
epoch 12 loss 0.007644926716741967
epoch 13 loss 0.0070918997566394035
epoch 14 loss 0.006581640425831713
epoch 15 loss 0.006164277236824459
epoch 16 loss 0.005741489561958278
epoch 17 loss 0.005384536023484543
epoch 18 loss 0.005051236556337631
epoch 19 loss 0.004946113435893339
epoch 20 loss 0.004641354500756975


## Test the model

In [25]:
test_ds = datasets.ImageFolder(root='/Users/mt/data/Landmark_Classification/test',
                                    transform=transforms.Compose([
                                    transforms.Resize((128, 128)),                                   
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean, std)]))

test_dl = data.DataLoader(test_ds, batch_size=batch_size) 

In [31]:
test_loss = 0.
correct = 0.
total = 0.

# set the module to evaluation mode
model.eval()

for batch_idx, (data, target) in enumerate(test_dl):
    # forward pass: compute predicted outputs by passing inputs to the model
    output = model(data)
    # calculate the loss
    loss = loss_fn(output, target)
    # update average test loss 
    test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data.item() - test_loss))
    # convert output probabilities to predicted class
    output = torch.exp(output)  # Get probability from LogSoftmax
    pred = output.data.max(1, keepdim=True)[1]
    # compare predictions to true label
    correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
    total += data.size(0)

print('Test Loss: {:.6f}\n'.format(test_loss))

print('\nTest Accuracy: %2d%% (%2d/%2d)' % (
    100. * correct / total, correct, total))

Test Loss: 3.849874


Test Accuracy: 44% (670/1500)


## Save the model

In [33]:
torch.save(model.state_dict(), '../model/cnn.pkl')