In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
class BasicBlock(nn.Module):

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(
            in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes,
                          kernel_size=kernel_size, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )
    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet, self).__init__()
        self.in_planes = 64

        self.conv1 = nn.Conv2d(3, 64, kernel_size=3,
                               stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = nn.Linear(512, num_classes)
  
  
    def _make_layer(self, block, planes, num_blocks, stride):
        downsample = None
        strides = [stride] + [1]*(num_blocks-1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes
        return nn.Sequential(*layers)


    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = F.avg_pool2d(out, pool_size)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

In [None]:
#Configuration 1 : with kernel_size = 3 in the skip connection and 2 blocks each

layers=[2, 2, 2, 2]
kernel_size = 3
pool_size = 4
model = ResNet(BasicBlock, layers)
print(count_parameters(model))

12550218


Since the Resnet-18 model with 2 blocks configuration gives us 12 million parameters, sensible next step is to try to build a model with 1 blocks each. 

In [None]:
#Configuration 2 :

layers=[1, 1, 1, 1]
kernel_size = 3
pool_size = 4
model = ResNet(BasicBlock, layers)
print(count_parameters(model))

6279498


With the above configuration, we are able to get the model parameters to be below 6 million. Let's try a different configuration with kernel size to be 2

In [None]:
#Configuration 3 :

layers=[1, 1, 1, 1]
kernel_size = 2
pool_size = 4
model = ResNet(BasicBlock, layers)
print(count_parameters(model))

5419338


With kernel size equal to 2, we get around 5 million parameters. we need to reduce the kernel size further. 

In [None]:
#Configuration 4 :

layers=[2, 2, 2, 2]
kernel_size = 1
pool_size = 4
model = ResNet(BasicBlock, layers)
print(count_parameters(model))

11173962


In [None]:
#Configuration 5 :

layers=[1, 1, 1, 1]
kernel_size = 1
pool_size = 4
model = ResNet(BasicBlock, layers)
print(count_parameters(model))

4903242


From the above two configurations, we can understand that with block size of 2 and kernel size of 1 in skip connection , we get 11 million parameters as per the original resnet architecture. 

If we reduce the block size to 1 and with kernel size of 1, we get the required number of parameters which is below 5 million parameters. 

In [None]:
#Configuration 6 :

layers=[1, 1, 1, 1]
kernel_size = 1
pool_size = 2
model = ResNet(BasicBlock, layers)
print(count_parameters(model))

4903242


In [None]:
#Configuration 7 :

layers=[1, 1, 1, 1]
kernel_size = 1
pool_size = 8
model = ResNet(BasicBlock, layers)
print(count_parameters(model))

4903242


In [None]:
#Configuration 8 :

layers=[1, 1, 1, 1]
kernel_size = 1
pool_size = 16
model = ResNet(BasicBlock, layers)
print(count_parameters(model))

4903242


With the above 3 configurations, we can see that changing the pool size in the Average pooling layer does not have an effect on number of parameters of the model. 

So, we are going to finalize the following parameters for our model which results in model parameters to be less than 5 million parameters.



*   layers=[1, 1, 1, 1]
*   kernel_size = 1 
*   pool_size = 4






