In [2]:
import torch
import torch.nn as nn

Link to NiN Paper: [Network in Network](https://arxiv.org/pdf/1312.4400)


A Network-in-Network block consist of a normal convolutional layer followed by two 1x1 convolution layer. The 1x1 convolution layer acts as a per-pixel fully-connected layer[1x1 Convolution](https://www.coursera.org/lecture/convolutional-neural-networks/networks-in-networks-and-1x1-convolutions-ZTb8x).

**Single NiN Block**

![Single NiN BLock](assets/nin_block.png)

The 1x1 convolutional layer gives us more per-pixel non-linearity.

**NiN Model**
![NiN Model](assets/nin_model.png)

NiN removes the fully connected layers and replace them with global average pooling after reducing the number of channels to the desired number of outputs (e.g., 10 for FashionMNIST)


Fully-connected layer are prone to overfitting. Overfitting arises when there are more parameters than required amount of parameters to do certain task. When we use fully-connected layer there will be more than enough parameter and our model will learn noise and other fine-grained details. Learning such details will lead to overfitting.



In [3]:
class NinNet(nn.Module):
    def __init__(self, output_size):
        super().__init__()
        
        self.output_size = output_size
        
        self.layer1 = self.nin_block(3, 96, 11, 4, 0)
        
        self.layer2 = self.nin_block(96, 256, 5, 1, 2)
        
        self.layer3 = self.nin_block(256, 384, 3, 1, 1)
        
        self.layer4 = self.nin_block(384, output_size, 3, 1, 1)
        
        self.dropout = nn.Dropout2d(p=0.5)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2)
        self.avgpool = nn.AvgPool2d(kernel_size=5, stride=1, padding=0)
    
    def nin_block(self, in_channels, out_channels, kernel_size, strides, padding):
        layers = list()
        
        layers = layers + [nn.Conv2d(in_channels, out_channels, kernel_size, strides, padding), nn.ReLU(inplace=True)]
        layers = layers + [nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU(inplace=True)]
        layers = layers + [nn.Conv2d(out_channels, out_channels, kernel_size=1), nn.ReLU(inplace=True)]        

        return nn.Sequential(*layers)
    
    def forward(self, x):
        x = self.layer1(x)
        x = self.maxpool(x)
        
        x = self.layer2(x)
        x = self.maxpool(x)

        x = self.layer3(x)
        x = self.maxpool(x)
        
        x = self.dropout(x)

        x = self.layer4(x)
        
        x = self.avgpool(x)
        
        x = x.view(x.shape[0], -1)
        
        return x


        
        
        

In [4]:
my_nin = NinNet(10)
my_nin = my_nin.double()
my_nin

NinNet(
  (layer1): Sequential(
    (0): Conv2d(3, 96, kernel_size=(11, 11), stride=(4, 4))
    (1): ReLU(inplace=True)
    (2): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(96, 96, kernel_size=(1, 1), stride=(1, 1))
    (5): ReLU(inplace=True)
  )
  (layer2): Sequential(
    (0): Conv2d(96, 256, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (5): ReLU(inplace=True)
  )
  (layer3): Sequential(
    (0): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
    (3): ReLU(inplace=True)
    (4): Conv2d(384, 384, kernel_size=(1, 1), stride=(1, 1))
    (5): ReLU(inplace=True)
  )
  (layer4): Sequential(
    (0): Conv2d(384, 10, kernel_size=(3, 3), stride=(1, 1

In [5]:
import numpy as np
X = np.random.uniform(size=(1, 3, 224, 224))
X = torch.from_numpy(X)
output = my_nin(X)

print(output.shape)

## The output is what we desired :) :) 

torch.Size([1, 10])
