In [73]:
import torch
import torch.nn as nn

class Inception(nn.Module):
    # c1 - c4 are no. of output channels for each path
    def __init__(self, input_size, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)

        # path 1 is single 1x1 convolutional layer
        self.path1 = nn.Conv2d(in_channels=input_size, out_channels=c1, kernel_size=(1,1), stride=(1,1), padding=0)

        # path 2 is 1x1 convolutional layer followed by a 3x3 convolutional layer
        self.path2 = nn.Sequential(
                nn.Conv2d(in_channels=input_size, out_channels=c2[0], kernel_size=(1,1), stride=(1,1), padding=0),
                nn.Conv2d(in_channels=c2[0], out_channels=c2[1], kernel_size=(3,3), stride=(1,1), padding=1)    # 3x3 conv, pad=1
        )

        # path 3 is 1x1 convolutional layer followed by a 5x5 convolutional layer
        self.path3 = nn.Sequential(
                nn.Conv2d(in_channels=input_size, out_channels=c3[0], kernel_size=(1,1), stride=(1,1), padding=0),
                nn.Conv2d(in_channels=c3[0], out_channels=c3[1], kernel_size=(5,5), stride=(1,1), padding=2)    # 5x5 conv, pad=2
        )

        # path 4 is 3x3 max pool layer followed by a 1x1 convolutional layer
        self.path4 = nn.Sequential(
                nn.MaxPool2d(kernel_size=(3,3), stride=(1,1), padding=1),                                       # 3x3 MaxPool, pad=1
                nn.Conv2d(in_channels=input_size, out_channels=c4, kernel_size=(1,1), stride=(1,1), padding=0)
        )

    def forward(self, x):
        p1 = self.path1(x)
        p2 = self.path2(x)
        p3 = self.path3(x)
        p4 = self.path4(x)

        outputs = [p1, p2, p3, p4]

        return torch.cat(outputs, dim=1)


# GoogLeNet Model (THIS IS GoogLeNet version-V1)

# Stage1 ==> Stage2 ==> Stage3 ==> Stage4 ==> Stage5 ==> Output

# [7x7 Conv]-->[3x3 MaxPool] ==> [1x1 Conv]-->[3x3 Conv]-->[3x3 MaxPool] ==> 2x(Inception_block)-->[3x3 MaxPool]
#            ==> 5x(Inception_block)-->[3x3 MaxPool] ==> 2x(Inception_block)-->[Global AvgPool] ==> [FC]

class googleNet(nn.Module):
    def __init__(self, img_channel, num_classes):
        super(googleNet, self).__init__()

        self.b1 = nn.Sequential(
            nn.Conv2d(in_channels=img_channel, out_channels=64, kernel_size=(7,7), stride=(2,2), padding=3),
            nn.MaxPool2d(kernel_size=(3,3), stride=(2,2), padding=1)
        )

        self.b2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(1,1), stride=(1,1), padding=0),
            nn.Conv2d(in_channels=64, out_channels=192, kernel_size=(3,3), stride=(1,1), padding=1),
            nn.MaxPool2d(kernel_size=(3,3), stride=(2,2), padding=1)
        )

        self.b3 = nn.Sequential(
            Inception(192,   64, (96, 128), (16, 32), 32),
            Inception(256,   128, (128, 192), (32, 96), 64),
            nn.MaxPool2d(kernel_size=(3,3), stride=(2,2), padding=1)
        )

        self.b4 = nn.Sequential(
            Inception(480,   192, (96, 208), (16, 48), 64),
            Inception(512,   160, (112, 224), (24, 64), 64),
            Inception(512,   128, (128, 256), (24, 64), 64),
            Inception(512,   112, (144, 288), (32, 64), 64),
            Inception(528,   256, (160, 320), (32, 128), 128),
            nn.MaxPool2d(kernel_size=(3,3), stride=(2,2), padding=1)
        )

        self.b5 = nn.Sequential(
            Inception(832,   256, (160, 320), (32, 128), 128),
            Inception(832,   384, (192, 384), (48, 128), 128),
            nn.AdaptiveAvgPool2d((1, 1))    # makes opuput (1,1) in shape
        )

        self.classifier = nn.Sequential(
            nn.Linear(in_features=1024, out_features=1024, bias=True),
            nn.ReLU(inplace=True),
            nn.Dropout(p=0.65),

            nn.Linear(in_features=1024, out_features=num_classes, bias=True)
        )


    def forward(self, x):
        x = self.b1(x)
        x = self.b2(x)
        x = self.b3(x)
        x = self.b4(x)
        x = self.b5(x)

        x = x.reshape(x.size(0), -1)
      
        x = self.classifier(x)

        return x

def test():
    net = googleNet(img_channel=3 ,num_classes=10)
    input = torch.randn(5, 3, 227, 227)
    output = net(input)

    print(output.size())
    print(output)
    print(net)

test()


torch.Size([5, 10])
tensor([[ 6.2885e-03,  5.7898e-03,  2.0708e-02,  1.1792e-02, -6.0783e-03,
         -1.9125e-03, -8.9316e-03, -3.5678e-02,  4.0109e-02, -1.8585e-02],
        [ 5.8893e-05, -6.1648e-03, -1.5347e-03,  1.4419e-02, -1.0724e-02,
          1.3085e-02, -7.4089e-03, -3.3282e-02,  3.1295e-02,  2.2452e-02],
        [-1.4530e-02, -1.7390e-03,  5.0956e-03,  8.7681e-03, -2.8783e-02,
          1.2580e-02, -1.5602e-03, -2.5273e-02,  3.4936e-02, -8.8128e-04],
        [-1.7824e-02, -1.1543e-02,  3.3157e-03, -1.4159e-02, -1.6989e-02,
          4.9304e-03,  2.6607e-04,  2.4195e-03,  4.5513e-02,  3.9432e-03],
        [ 2.6847e-02, -2.0530e-02,  4.0927e-04, -7.2922e-03,  6.1777e-03,
          3.0795e-02, -1.5585e-02, -8.9317e-03,  3.4721e-02, -8.1499e-03]],
       grad_fn=<AddmmBackward>)
googleNet(
  (b1): Sequential(
    (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): MaxPool2d(kernel_size=(3, 3), stride=(2, 2), padding=1, dilation=1, ceil_mode=False)
  )