Modern CNN - AlexNet

Source : https://github.com/d2l-ai/d2l-en/blob/master/chapter_convolutional-modern/googlenet.md

In [2]:
!pip install d2l==0.17.0

Collecting d2l==0.17.0
  Downloading d2l-0.17.0-py3-none-any.whl (83 kB)
[?25l[K     |████                            | 10 kB 21.4 MB/s eta 0:00:01[K     |███████▉                        | 20 kB 24.1 MB/s eta 0:00:01[K     |███████████▉                    | 30 kB 12.8 MB/s eta 0:00:01[K     |███████████████▊                | 40 kB 9.6 MB/s eta 0:00:01[K     |███████████████████▊            | 51 kB 5.1 MB/s eta 0:00:01[K     |███████████████████████▋        | 61 kB 5.4 MB/s eta 0:00:01[K     |███████████████████████████▋    | 71 kB 5.8 MB/s eta 0:00:01[K     |███████████████████████████████▌| 81 kB 6.6 MB/s eta 0:00:01[K     |████████████████████████████████| 83 kB 1.1 MB/s 
Installing collected packages: d2l
Successfully installed d2l-0.17.0


In [4]:
!pip install git+https://github.com/d2l-ai/d2l-book

Collecting git+https://github.com/d2l-ai/d2l-book
  Cloning https://github.com/d2l-ai/d2l-book to /tmp/pip-req-build-mc29wd7m
  Running command git clone -q https://github.com/d2l-ai/d2l-book /tmp/pip-req-build-mc29wd7m
Collecting sphinx>=2.2.1
  Using cached Sphinx-4.2.0-py3-none-any.whl (3.1 MB)
Collecting recommonmark
  Downloading recommonmark-0.7.1-py2.py3-none-any.whl (10 kB)
Collecting nbformat<=5.0.7
  Downloading nbformat-5.0.7-py3-none-any.whl (170 kB)
[K     |████████████████████████████████| 170 kB 5.3 MB/s 
Collecting sphinxcontrib-bibtex<2.0.0
  Downloading sphinxcontrib_bibtex-1.0.0-py3-none-any.whl (14 kB)
Collecting pybtex-apa-style
  Downloading pybtex_apa_style-1.3-py3-none-any.whl (6.4 kB)
Collecting mu-notedown
  Downloading mu_notedown-2.0.3-py3-none-any.whl (15 kB)
Collecting mxtheme>=0.3.16
  Downloading mxtheme-0.3.16.tar.gz (8.3 MB)
[K     |████████████████████████████████| 8.3 MB 53.7 MB/s 
[?25hCollecting sphinxcontrib-svg2pdfconverter
  Downloading sphin

In [3]:
%load_ext d2lbook.tab
tab.interact_select(['mxnet', 'pytorch', 'tensorflow'])

ModuleNotFoundError: ignored

In [None]:
%%tab mxnet
from d2l import mxnet as d2l
from mxnet import np, npx, init
from mxnet.gluon import nn
npx.set_np()

class Inception(nn.Block):
    # `c1`--`c4` are the number of output channels for each path
    def __init__(self, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        # Path 1
        self.p1_1 = nn.Conv2D(c1, kernel_size=1, activation='relu')
        # Path 2
        self.p2_1 = nn.Conv2D(c2[0], kernel_size=1, activation='relu')
        self.p2_2 = nn.Conv2D(c2[1], kernel_size=3, padding=1,
                              activation='relu')
        # Path 3
        self.p3_1 = nn.Conv2D(c3[0], kernel_size=1, activation='relu')
        self.p3_2 = nn.Conv2D(c3[1], kernel_size=5, padding=2,
                              activation='relu')
        # Path 4
        self.p4_1 = nn.MaxPool2D(pool_size=3, strides=1, padding=1)
        self.p4_2 = nn.Conv2D(c4, kernel_size=1, activation='relu')

    def forward(self, x):
        p1 = self.p1_1(x)
        p2 = self.p2_2(self.p2_1(x))
        p3 = self.p3_2(self.p3_1(x))
        p4 = self.p4_2(self.p4_1(x))
        return np.concatenate((p1, p2, p3, p4), axis=1)

In [None]:
%%tab pytorch
from d2l import torch as d2l
import torch
from torch import nn
from torch.nn import functional as F

class Inception(nn.Module):
    # `c1`--`c4` are the number of output channels for each path
    def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        # Path 1
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        # Path 2
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        # Path 3
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        # Path 4
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)

    def forward(self, x):
        p1 = F.relu(self.p1_1(x))
        p2 = F.relu(self.p2_2(F.relu(self.p2_1(x))))
        p3 = F.relu(self.p3_2(F.relu(self.p3_1(x))))
        p4 = F.relu(self.p4_2(self.p4_1(x)))
        return torch.cat((p1, p2, p3, p4), dim=1)

In [None]:
#1st module
#The first module uses a 64-channel $7\times 7$ convolutional layer.
%%tab all
class GoogleNet(d2l.Classification):
    def b1(self):
        if tab.selected('mxnet'):
            net = nn.Sequential()
            net.add(nn.Conv2D(64, kernel_size=7, strides=2, padding=3,
                              activation='relu'),
                    nn.MaxPool2D(pool_size=3, strides=2, padding=1))
            return net
        if tab.selected('pytorch'):
            return nn.Sequential(
                nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
        if tab.selected('tensorflow'):
            return tf.keras.models.Sequential([
                tf.keras.layers.Conv2D(64, 7, strides=2, padding='same',
                                       activation='relu'),
                tf.keras.layers.MaxPool2D(pool_size=3, strides=2, 
                                          padding='same')])

In [None]:
#2nd module
#The second module uses two convolutional layers: first, a 64-channel $1\times 1$ convolutional layer, 
#followed by a $3\times 3$ convolutional layer that triples the number of channels. 
#This corresponds to the second path in the Inception block and concludes the design of the stem. At this point we have 192 channels.

%%tab all
@d2l.add_to_class(GoogleNet)
def b2(self):
    if tab.selected('mxnet'):
        net = nn.Sequential()
        net.add(nn.Conv2D(64, kernel_size=1, activation='relu'),
               nn.Conv2D(192, kernel_size=3, padding=1, activation='relu'),
               nn.MaxPool2D(pool_size=3, strides=2, padding=1))
        return net
    if tab.selected('pytorch'):
        return nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=1), nn.ReLU(),
            nn.Conv2d(64, 192, kernel_size=3, padding=1), nn.ReLU(),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    if tab.selected('tensorflow'):
        return tf.keras.Sequential([
            tf.keras.layers.Conv2D(64, 1, activation='relu'),
            tf.keras.layers.Conv2D(192, 3, padding='same', activation='relu'),
            tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')])

In [None]:
#3rd module
#The third module connects two complete Inception blocks in series. 
#The number of output channels of the first Inception block is $64+128+32+32=256$.

%%tab all
@d2l.add_to_class(GoogleNet)
def b3(self):
    if tab.selected('mxnet'):
        net = nn.Sequential()
        net.add(Inception(64, (96, 128), (16, 32), 32),
               Inception(128, (128, 192), (32, 96), 64),
               nn.MaxPool2D(pool_size=3, strides=2, padding=1))
        return net
    if tab.selected('pytorch'):
        return nn.Sequential(Inception(192, 64, (96, 128), (16, 32), 32),
                             Inception(256, 128, (128, 192), (32, 96), 64),
                             nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    if tab.selected('tensorflow'):
        return tf.keras.models.Sequential([
            Inception(64, (96, 128), (16, 32), 32),
            Inception(128, (128, 192), (32, 96), 64),
            tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')])

In [None]:
#4th module
#The fourth module connects five Inception blocks in series, 
# and they have $192+208+48+64=512$, $160+224+64+64=512$, $128+256+64+64=512$, $112+288+64+64=528$, and $256+320+128+128=832$ output channels, respectively. 
%%tab all
@d2l.add_to_class(GoogleNet)
def b4(self):
    if tab.selected('mxnet'):
        net = nn.Sequential()
        net.add(Inception(192, (96, 208), (16, 48), 64),
                Inception(160, (112, 224), (24, 64), 64),
                Inception(128, (128, 256), (24, 64), 64),
                Inception(112, (144, 288), (32, 64), 64),
                Inception(256, (160, 320), (32, 128), 128),
                nn.MaxPool2D(pool_size=3, strides=2, padding=1))
        return net
    if tab.selected('pytorch'):
        return nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64),
                             Inception(512, 160, (112, 224), (24, 64), 64),
                             Inception(512, 128, (128, 256), (24, 64), 64),
                             Inception(512, 112, (144, 288), (32, 64), 64),
                             Inception(528, 256, (160, 320), (32, 128), 128),
                             nn.MaxPool2d(kernel_size=3, stride=2, padding=1))
    if tab.selected('tensorflow'):
        return tf.keras.Sequential([
            Inception(192, (96, 208), (16, 48), 64),
            Inception(160, (112, 224), (24, 64), 64),
            Inception(128, (128, 256), (24, 64), 64),
            Inception(112, (144, 288), (32, 64), 64),
            Inception(256, (160, 320), (32, 128), 128),
            tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')])

In [None]:
#5th module
#The fifth module has two Inception blocks with $256+320+128+128=832$ and $384+384+128+128=1024$ output channels. 
#The number of channels assigned to each path is the same as that in the third and fourth modules, but differs in specific values. 
%%tab all
@d2l.add_to_class(GoogleNet)
def b5(self):
    if tab.selected('mxnet'):
        net = nn.Sequential()
        net.add(Inception(256, (160, 320), (32, 128), 128),
                Inception(384, (192, 384), (48, 128), 128),
                nn.GlobalAvgPool2D())
        return net
    if tab.selected('pytorch'):
        return nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128),
                             Inception(832, 384, (192, 384), (48, 128), 128),
                             nn.AdaptiveAvgPool2d((1,1)), nn.Flatten())
    if tab.selected('tensorflow'):
        return tf.keras.Sequential([
            Inception(256, (160, 320), (32, 128), 128),
            Inception(384, (192, 384), (48, 128), 128),
            tf.keras.layers.GlobalAvgPool2D(),
            tf.keras.layers.Flatten()])
        

In [None]:
%%tab all
@d2l.add_to_class(GoogleNet)
def __init__(self, num_classes=10, lr=0.1):
    super(GoogleNet, self).__init__()
    self.save_hyperparameters()
    if tab.selected('mxnet'):
        self.net = nn.Sequential()
        self.net.add(self.b1(), self.b2(), self.b3(), self.b4(), self.b5(),
                     nn.Dense(num_classes))
        self.net.initialize(init.Xavier())
    if tab.selected('pytorch'):
        self.net = nn.Sequential(self.b1(), self.b2(), self.b3(), self.b4(),
                                 self.b5(), nn.Linear(1024, num_classes))
    if tab.selected('tensorflow'):
        self.net = tf.keras.Sequential([
            self.b1(), self.b2(), self.b3(), self.b4(), self.b5(), 
            tf.keras.layers.Dense(10)])

In [None]:
%%tab mxnet, pytorch
model = GoogleNet().layer_summary((1, 1, 224, 224))

Training

In [None]:
%%tab mxnet, pytorch
model = GoogleNet(lr=0.1)
trainer = d2l.Trainer(max_epochs=10, num_gpus=1)
data = d2l.FashionMNIST(batch_size=128, resize=(96, 96))
trainer.fit(model, data)