In [1]:
!nvidia-smi

Sun Nov 22 14:19:01 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 455.38       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   34C    P8    28W / 149W |      0MiB / 11441MiB |      0%      Default |
|                               |                      |                 ERR! |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
import numpy as np
import torch
import torch.nn.functional as F
from torch import nn, optim
from torchvision import datasets, transforms, models
from torch import Tensor
from typing import Type, Any, Callable, Union, List, Optional
import os

In [3]:
# __all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
#            'resnet152', 'resnext50_32x4d', 'resnext101_32x8d',
#            'wide_resnet50_2', 'wide_resnet101_2']


# model_urls = {
#     'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
#     'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
#     'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
#     'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
#     'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
#     'resnext50_32x4d': 'https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth',
#     'resnext101_32x8d': 'https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth',
#     'wide_resnet50_2': 'https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth',
#     'wide_resnet101_2': 'https://download.pytorch.org/models/wide_resnet101_2-32ee1156.pth',
# }

In [3]:
class ConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, k, kernel_size = 5, stride = 1):
        super(ConvBlock, self).__init__()
        
        self.conv = nn.Conv2d(in_channels, out_channels*k, kernel_size, stride)
        self.tanh = nn.Tanh()
        self.pool = nn.AvgPool2d(kernel_size = 2)
    
    def forward(self, x):
        x = self.conv(x)
        out = self.pool(self.tanh(x))
        
        return out 

### Vanilla LeNet

In [4]:
class LeNet5(nn.Module):

    def __init__(self, n_classes):
        super(LeNet5, self).__init__()
        
        self.feature_extractor = nn.Sequential(            
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=16, out_channels=120, kernel_size=5, stride=1),
            nn.Tanh()
        )

        self.classifier = nn.Sequential(
            nn.Linear(in_features=120, out_features=84),
            nn.Tanh(),
            nn.Linear(in_features=84, out_features=n_classes),
        )


    def forward(self, x):
        x = self.feature_extractor(x)
        x = torch.flatten(x, 1)
        logits = self.classifier(x)
        return logits

In [5]:
from torchsummary import summary
LeNet5 = LeNet5(10)
LeNet5.cuda()
summary(LeNet5, (3, 32, 32))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             456
              Tanh-2            [-1, 6, 28, 28]               0
         AvgPool2d-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 16, 10, 10]           2,416
              Tanh-5           [-1, 16, 10, 10]               0
         AvgPool2d-6             [-1, 16, 5, 5]               0
            Conv2d-7            [-1, 120, 1, 1]          48,120
              Tanh-8            [-1, 120, 1, 1]               0
            Linear-9                   [-1, 84]          10,164
             Tanh-10                   [-1, 84]               0
           Linear-11                   [-1, 10]             850
Total params: 62,006
Trainable params: 62,006
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.01
Forward/ba

### Wide LeNet with Linear Bottle Neck

In [10]:
class Linear_WideLeNet5(nn.Module):

    def __init__(self, n_classes):
        super(Linear_WideLeNet5, self).__init__()
        
        self.feature_extractor = nn.Sequential(            
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=6, out_channels=32, kernel_size=5, stride=1, padding = 1),
            nn.Conv2d(in_channels=32, out_channels=8, kernel_size=1, stride=1, padding = 1),
            nn.Conv2d(in_channels=8, out_channels=62, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=62, out_channels=11, kernel_size=1, stride=1, padding = 0),
            nn.Conv2d(in_channels=11, out_channels=120, kernel_size=5, stride=1),
            nn.Tanh()
        )

        self.classifier = nn.Sequential(
            nn.Linear(in_features=120, out_features=84),
            nn.Tanh(),
            nn.Linear(in_features=84, out_features=n_classes),
        )


    def forward(self, x):
        x = self.feature_extractor(x)
        print(x.shape)
        x = torch.flatten(x, 1)
        print(x.shape)
        logits = self.classifier(x)
        return logits

In [11]:
from torchsummary import summary
Linear_WideLeNet5 = Linear_WideLeNet5(10)
Linear_WideLeNet5.cuda()
summary(Linear_WideLeNet5, (3, 32, 32))

torch.Size([2, 120, 1, 1])
torch.Size([2, 120])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             456
              Tanh-2            [-1, 6, 28, 28]               0
         AvgPool2d-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 32, 12, 12]           4,832
            Conv2d-5            [-1, 8, 14, 14]             264
            Conv2d-6           [-1, 62, 10, 10]          12,462
              Tanh-7           [-1, 62, 10, 10]               0
         AvgPool2d-8             [-1, 62, 5, 5]               0
            Conv2d-9             [-1, 11, 5, 5]             693
           Conv2d-10            [-1, 120, 1, 1]          33,120
             Tanh-11            [-1, 120, 1, 1]               0
           Linear-12                   [-1, 84]          10,164
             Tanh-13                   [-1, 84]        

### Wide LeNet with Non-Linear Bottle Neck

In [12]:
class Non_Linear_WideLeNet5(nn.Module):

    def __init__(self, n_classes):
        super(Non_Linear_WideLeNet5, self).__init__()
        
        self.feature_extractor = nn.Sequential(            
            nn.Conv2d(in_channels=3, out_channels=6, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=6, out_channels=32, kernel_size=5, stride=1, padding = 1),
            nn.Tanh(),
            nn.Conv2d(in_channels=32, out_channels=8, kernel_size=1, stride=1, padding = 1),
            nn.Tanh(),
            nn.Conv2d(in_channels=8, out_channels=62, kernel_size=5, stride=1),
            nn.Tanh(),
            nn.AvgPool2d(kernel_size=2),
            nn.Conv2d(in_channels=62, out_channels=11, kernel_size=1, stride=1, padding = 0),
            nn.Tanh(),
            nn.Conv2d(in_channels=11, out_channels=120, kernel_size=5, stride=1),
            nn.Tanh()
        )

        self.classifier = nn.Sequential(
            nn.Linear(in_features=120, out_features=84),
            nn.Tanh(),
            nn.Linear(in_features=84, out_features=n_classes),
        )


    def forward(self, x):
        x = self.feature_extractor(x)
        print(x.shape)
        x = torch.flatten(x, 1)
        print(x.shape)
        logits = self.classifier(x)
        return logits

In [13]:
from torchsummary import summary
Non_Linear_WideLeNet5 = Non_Linear_WideLeNet5(10)
Non_Linear_WideLeNet5.cuda()
summary(Non_Linear_WideLeNet5, (3, 32, 32))

torch.Size([2, 120, 1, 1])
torch.Size([2, 120])
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 6, 28, 28]             456
              Tanh-2            [-1, 6, 28, 28]               0
         AvgPool2d-3            [-1, 6, 14, 14]               0
            Conv2d-4           [-1, 32, 12, 12]           4,832
              Tanh-5           [-1, 32, 12, 12]               0
            Conv2d-6            [-1, 8, 14, 14]             264
              Tanh-7            [-1, 8, 14, 14]               0
            Conv2d-8           [-1, 62, 10, 10]          12,462
              Tanh-9           [-1, 62, 10, 10]               0
        AvgPool2d-10             [-1, 62, 5, 5]               0
           Conv2d-11             [-1, 11, 5, 5]             693
             Tanh-12             [-1, 11, 5, 5]               0
           Conv2d-13            [-1, 120, 1, 1]        