**GoogLeNet** was a CNN architecture invented by Google research. It was the winner at ILSVRC 2014 image classification challenge. **GoogLeNet** introduced some key innovations in CNNs

1. 1 x 1 Convolutions.
2. Inception Module.
3. Auxiliary classifiers for training.

In the notebook below we will experiment with some of these ideas from **GoogLeNet**.

https://static.googleusercontent.com/media/research.google.com/en//pubs/archive/43022.pdf

In [1]:
#Perform imports
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
#Import models
from torchvision import models

In [4]:
#Download GoogLeNet
googlenet = models.googlenet(pretrained=True)

Downloading: "https://download.pytorch.org/models/googlenet-1378be20.pth" to /Users/jamieott/.cache/torch/hub/checkpoints/googlenet-1378be20.pth


HBox(children=(FloatProgress(value=0.0, max=52147035.0), HTML(value='')))




In [7]:
#Download the dataset and transform them to the format for ResNet
train_transform = transforms.Compose([
        transforms.Resize(224),             # resize shortest side to 224 pixels
        transforms.CenterCrop(224),         # crop longest side to 224 pixels at center
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

test_transform = transforms.Compose([
        transforms.Resize(224),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

train_data = datasets.CIFAR10(root='./', train=True, download=True, transform=train_transform)
test_data = datasets.CIFAR10(root='./', train=False, download=True, transform=test_transform)

Files already downloaded and verified
Files already downloaded and verified


In [8]:
#Create dataloaders
torch.manual_seed(101)  # for reproducible results

train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

In [10]:
def count_parameters(model):
    params = [p.numel() for p in model.parameters() if p.requires_grad]
    for item in params:
        print(f'{item:>6}')
    print(f'______\n{sum(params):>6}')

def train(model, criterion, optimizer):
    #Lets train the model
    import time
    start_time = time.time()

    epochs = 1

    max_trn_batch = 800
    max_tst_batch = 300

    train_losses = []
    test_losses = []
    train_correct = []
    test_correct = []

    for i in range(epochs):
        trn_corr = 0
        tst_corr = 0
    
        # Run the training batches
        for b, (X_train, y_train) in enumerate(train_loader):
            if b == max_trn_batch:
                break
            b+=1
        
            # Apply the model
            y_pred = model(X_train)
            loss = criterion(y_pred, y_train)
 
            # Tally the number of correct predictions
            predicted = torch.max(y_pred.data, 1)[1]
            batch_corr = (predicted == y_train).sum()
            trn_corr += batch_corr
        
            # Update parameters
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Print interim results
            if b%200 == 0:
                print(f'epoch: {i:2}  batch: {b:4} [{10*b:6}/8000]  loss: {loss.item():10.8f}  \
accuracy: {trn_corr.item()*100/(10*b):7.3f}%')

        train_losses.append(loss)
        train_correct.append(trn_corr)

        # Run the testing batches
        with torch.no_grad():
            for b, (X_test, y_test) in enumerate(test_loader):
                if b == max_tst_batch:
                    break

                # Apply the model
                y_val = model(X_test)

                # Tally the number of correct predictions
                predicted = torch.max(y_val.data, 1)[1] 
                tst_corr += (predicted == y_test).sum()

        loss = criterion(y_val, y_test)
        test_losses.append(loss)
        test_correct.append(tst_corr)
        return test_correct

    print(f'\nDuration: {time.time() - start_time:.0f} seconds') # print the time elapsed

def analyze(model, name, linear_in):
    print('-'*100)
    print('-'*100)
    print("Starting", name)
    print(model)
    #count parameters
    print('-'*100)
    print("Parameters in full model")
    count_parameters(model)
    print('-'*100)
    #reset grad
    for param in model.parameters():
        param.requires_grad = False
    torch.manual_seed(42)
    model.fc = nn.Sequential(nn.Linear(linear_in, out_features=10, bias=True),
                                 nn.LogSoftmax(dim=1))
    print("Parameters to optimize")
    count_parameters(model)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.001)
    print("Training")
    test_correct = train(model, criterion, optimizer)
    print(test_correct)
    print(f'Test accuracy: {test_correct[-1].item()*100/3000:.3f}%')
    print('-'*100)
    print('-'*100)
    print('\n\n')

In [11]:
analyze(googlenet, "googlenet", 1024)

----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------
Starting googlenet
GoogLeNet(
  (conv1): BasicConv2d(
    (conv): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=True)
  (conv2): BasicConv2d(
    (conv): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv3): BasicConv2d(
    (conv): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(192, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1,

epoch:  0  batch:  200 [  2000/8000]  loss: 1.13296509  accuracy:  43.950%
epoch:  0  batch:  400 [  4000/8000]  loss: 1.32418001  accuracy:  52.350%
epoch:  0  batch:  600 [  6000/8000]  loss: 1.33407283  accuracy:  56.200%
epoch:  0  batch:  800 [  8000/8000]  loss: 0.66557682  accuracy:  59.175%
[tensor(1968)]
Test accuracy: 65.600%
----------------------------------------------------------------------------------------------------
----------------------------------------------------------------------------------------------------





Now lets look at $1 \times 1$ convolutional layer.

In [19]:
for (X_train, y_train) in train_data:
    break

In [20]:
X_train.shape

torch.Size([3, 224, 224])

In [28]:
conv1 = nn.Conv2d(3, 16, 3, 1,1)
conv2 = nn.Conv2d(16, 32, 3, 1,1)
conv3 = nn.Conv2d(32, 3, 1, 1,0)

In [30]:
x = X_train.view(1,3,224,224)
print(x.shape)

x = conv1(x)
print(x.shape)
x = conv2(x)
print(x.shape)
x = conv3(x)
print(x.shape)

torch.Size([1, 3, 224, 224])
torch.Size([1, 16, 224, 224])
torch.Size([1, 32, 224, 224])
torch.Size([1, 3, 224, 224])


In [39]:
print("Conv1 parameters", np.sum(np.array([p.numel() for p in conv1.parameters() if p.requires_grad])))
print("Conv2 parameters", np.sum(np.array([p.numel() for p in conv2.parameters() if p.requires_grad])))
print("Conv3 parameters", np.sum(np.array([p.numel() for p in conv3.parameters() if p.requires_grad])))

Conv1 parameters 448
Conv2 parameters 4640
Conv3 parameters 99


$1 \times 1$ convolution compresses the information from larger convolution layers to a smaller representation.