In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim

In [2]:
import copy
import numpy as np
import time

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Dataset

In [17]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((224, 224)),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])
batch_size = 4

trainset = torchvision.datasets.CIFAR10(root = './data', train = True, download = True, transform = transform)
trainset = torch.utils.data.Subset(trainset, torch.arange(5000))
trainloader = torch.utils.data.DataLoader(trainset, batch_size = batch_size, shuffle = True, num_workers = 2)
testset  = torchvision.datasets.CIFAR10(root = './data', train = False, download = True, transform = transform)
testset  = torch.utils.data.Subset(trainset, torch.arange(1000))
testloader  = torch.utils.data.DataLoader(testset, batch_size = batch_size, shuffle = False, num_workers = 2)

dataloaders = {'train' : trainloader, 'val':testloader}

Files already downloaded and verified
Files already downloaded and verified


In [20]:
def train(model, dataloaders, criterion, optimizer, scheduler=None, num_epochs=10):
    
    print("{:7}  {:10}  {:6}  {:8}\n".format("Epoch", "Stage", "Loss", "Accuracy"))
        
    best_model = copy.deepcopy(model.state_dict())
    training_info = {
        'Best Accuracy' : .0,
        'Best Loss' : np.inf,
        'Training Stats' : {'Accuracy' : [], 'Loss': []},
        'Validation Stats' : {'Accuracy' : [], 'Loss': []}
    }

    since = time.time()
    
    for epoch in range(num_epochs):
        for phase in ['train', 'val']:
            model.train() if phase == 'train' else model.eval()
            
            cumulative_loss = .0
            cumulative_hits = 0
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs, aux1, aux2  = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels) + criterion(aux1, labels) + criterion(aux2, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                cumulative_loss += loss*inputs.size(0)
                cumulative_hits += torch.sum(preds == labels.data)
                del(inputs); del(labels)
            if phase == 'train' and scheduler != None:
                scheduler.step()
            
            dataset_size = len(dataloaders[phase].dataset)
            epoch_loss = (cumulative_loss / dataset_size).to('cpu').item()
            epoch_acc  = (cumulative_hits.double() / dataset_size).to('cpu').item()
            
            if phase == 'train':
                training_info['Training Stats']['Accuracy'].append(epoch_acc)
                training_info['Training Stats']['Loss'].append(epoch_loss)
            else:
                training_info['Validation Stats']['Accuracy'].append(epoch_acc)
                training_info['Validation Stats']['Loss'].append(epoch_loss)
                
            print("{:7}  {:10}  {:<6.2f}  {:<8.2f}".format("{}/{}".format(epoch + 1, num_epochs) if phase == "train" else " ",
                                                                 "Training" if phase == "train" else "Validation",
                                                                 epoch_loss, epoch_acc))
            if phase == 'val' and epoch_acc > best_acc:
                training_info['Best Accuracy']      = epoch_acc
                training_info['Best Loss']          = epoch_loss
                best_model                          = copy.deepcopy(model.state_dict())
                
    time_elapsed = time.time() - since
    
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print("Best Validation Accuracy: {:.2f}".format(training_info['Best Accuracy']))
    print("Best Validation Loss: {:.2f}\n".format(training_info['Best Loss']))
    model.load_state_dict(best_model)
    return model, training_info

#### Convolutional Block
Every convolution is followed by an activation function and batch normalization.

In [21]:
Activations = nn.ModuleDict([
    ['relu', nn.ReLU()],
    ['identity', nn.Identity()]
])

def check_AutoPadding(kernel_size):
    if type(kernel_size) == tuple:
        return (kernel_size[0] // 2, kernel_size[1] // 2)
    else:
        return kernel_size // 2

In [22]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride = 1, padding = 0, use_autoPadding = False, use_batchNorm = False, activation = None):
        super(ConvBlock, self).__init__()
        
        padding = check_AutoPadding(kernel_size) if use_autoPadding else padding
        
        self.conv = nn.Conv2d(in_channels, out_channels,
                              kernel_size, stride, padding)
        self.batch_norm = nn.BatchNorm2d(out_channels) if use_batchNorm else Activations['identity']
        self.activation = Activations[activation] if activation != None else Activations['identity']
        
    def forward(self, X):
        return self.activation(self.batch_norm(self.conv(X)))

In [23]:
class InceptionModule(nn.Module):
    def __init__(self, in_channels, f_1x1, f_3x3_r, f_3x3, f_5x5_r, f_5x5, f_pp):
        super(InceptionModule, self).__init__()
        
        self.branch1 = nn.Sequential(
            ConvBlock(in_channels, f_1x1, kernel_size = 1, stride = 1, padding = 0)
        )
        self.branch2 = nn.Sequential(
            ConvBlock(in_channels, f_3x3_r, kernel_size = 1, stride=  1, padding = 0),
            ConvBlock(f_3x3_r, f_3x3, kernel_size = 3, stride = 1, padding = 1)
        )
        self.branch3 = nn.Sequential(
            ConvBlock(in_channels, f_5x5_r, kernel_size = 1, stride = 1, padding = 0),
            ConvBlock(f_5x5_r, f_5x5, kernel_size = 5, stride = 1, padding = 2)
        )
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(3, stride = 1, padding = 1, ceil_mode = True),
            ConvBlock(in_channels, f_pp, kernel_size = 1, stride = 1, padding = 0)
        )
    
    def forward(self, X):
        return torch.cat([
            self.branch1(X),
            self.branch2(X),
            self.branch3(X),
            self.branch4(X)
        ], 1)

__Adaptive average pooling__ is simply an average pooling operation that, given an input and output dimensionality, calculates the correct kernel size necessary to produce an output of the given dimensionality from the given input.

In [24]:
class InceptionAux(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(InceptionAux, self).__init__()
        
        self.pool = nn.AdaptiveAvgPool2d((4, 4))
        self.conv = ConvBlock(in_channels, 128, kernel_size = 1, activation = 'relu', use_batchNorm = False)
        self.fc1 = nn.Linear(2048, 1024)
        self.dropout = nn.Dropout(0.7)
        self.fc2 = nn.Linear(1024, num_classes)
        
    def forward(self, X):
        X = self.pool(X)
        X = self.conv(X)
        X = torch.flatten(X, 1)
        X = self.fc1(X)
        X = self.dropout(X)
        X = self.fc2(X)
        return X

In [25]:
class GoogleNet(nn.Module):
    def __init__(self, num_classes = 10):
        super(GoogleNet, self).__init__()
        self.conv1 = ConvBlock(3, 64, kernel_size = 7, stride = 2, use_autoPadding = True, use_batchNorm = True, activation = 'relu')
        self.pool1 = nn.MaxPool2d(3, stride = 2, padding = 0, ceil_mode = True)
        self.conv2 = ConvBlock(64, 64, kernel_size = 1, use_autoPadding = True, use_batchNorm = True, activation = 'relu')
        self.conv3 = ConvBlock(64, 192, kernel_size = 3, use_autoPadding = True, use_batchNorm = True, activation = 'relu')
        self.pool3 = nn.MaxPool2d(3, stride = 2, padding = 0, ceil_mode = True)
    
        self.inception3a = InceptionModule(in_channels = 192,
                                           f_1x1 = 64,
                                           f_3x3_r = 96,
                                           f_3x3 = 128,
                                           f_5x5_r = 16,
                                           f_5x5 = 32,
                                           f_pp = 32)
        self.inception3b = InceptionModule(in_channels = 256,
                                           f_1x1 = 128,
                                           f_3x3_r = 128,
                                           f_3x3 = 192,
                                           f_5x5_r = 32,
                                           f_5x5 = 96,
                                           f_pp = 64)
        self.pool4 = nn.MaxPool2d(3, stride = 2, padding = 0, ceil_mode = True)
        self.inception4a = InceptionModule(in_channels = 480,
                                           f_1x1 = 192,
                                           f_3x3_r = 96,
                                           f_3x3 = 208,
                                           f_5x5_r = 16,
                                           f_5x5 = 48,
                                           f_pp = 64)
        self.inception4b = InceptionModule(in_channels = 512,
                                           f_1x1 = 160,
                                           f_3x3_r = 112,
                                           f_3x3 = 224,
                                           f_5x5_r = 24,
                                           f_5x5 = 64,
                                           f_pp = 64)
        self.inception4c = InceptionModule(in_channels = 512,
                                           f_1x1 = 128,
                                           f_3x3_r = 128,
                                           f_3x3 = 256,
                                           f_5x5_r = 24,
                                           f_5x5 = 64,
                                           f_pp = 64)
        self.inception4d = InceptionModule(in_channels = 512,
                                           f_1x1 = 112,
                                           f_3x3_r = 144,
                                           f_3x3 = 288,
                                           f_5x5_r = 32,
                                           f_5x5 = 64,
                                           f_pp = 64)
        self.inception4e = InceptionModule(in_channels = 528,
                                           f_1x1 = 256,
                                           f_3x3_r = 160,
                                           f_3x3 = 320,
                                           f_5x5_r = 32,
                                           f_5x5 = 128,
                                           f_pp = 128)
        self.pool5 = nn.MaxPool2d(3, stride = 2, padding = 0, ceil_mode = True)
        self.inception5a = InceptionModule(in_channels = 832,
                                           f_1x1 = 256,
                                           f_3x3_r = 160,
                                           f_3x3 = 320,
                                           f_5x5_r = 32,
                                           f_5x5 = 128,
                                           f_pp = 128)
        self.inception5b = InceptionModule(in_channels = 832,
                                           f_1x1 = 384,
                                           f_3x3_r = 192,
                                           f_3x3 = 384,
                                           f_5x5_r = 48,
                                           f_5x5 = 128,
                                           f_pp = 128)
        self.pool6 = nn.AdaptiveAvgPool2d((1, 1))
        self.dropout = nn.Dropout(0.4)
        self.fc = nn.Linear(1024, num_classes)
        self.aux4a = InceptionAux(512, num_classes)
        self.aux4d = InceptionAux(528, num_classes)
    
    def forward(self, X):
        X = self.conv1(X)
        X = self.pool1(X)
        X = self.conv2(X)
        X = self.conv3(X)
        X = self.pool3(X)
        X = self.inception3a(X)
        X = self.inception3b(X)
        X = self.pool4(X)
        X = self.inception4a(X)
        
        aux1 = self.aux4a(X)
        
        X = self.inception4b(X)
        X = self.inception4c(X)
        X = self.inception4d(X)
        
        aux2 = self.aux4d(X)
        
        X = self.inception4e(X)
        X = self.pool5(X)
        X = self.inception5a(X)
        X = self.inception5b(X)
        X = self.pool6(X)
        X = torch.flatten(X, 1)
        X = self.dropout(X)
        X = self.fc(X)
        
        return X, aux1, aux2
        

In [26]:
model = GoogleNet(10).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [27]:
model, training_info = train(model, dataloaders, criterion, optimizer, num_epochs=10)

Epoch    Stage       Loss    Accuracy



AttributeError: 'dict' object has no attribute 'append'