In [262]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [263]:
import sys
sys.path.append("./../..")

In [264]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as T
from torch import optim

import numpy as np

# local imports
from effcn.models_mnist import BaselineCNN
from misc.utils import count_parameters


#### Test capsulwise activation

In [221]:
a = torch.rand([1,1,28,28])
a.shape

torch.Size([1, 1, 28, 28])

In [222]:
conv =  nn.Conv2d(1, 32, kernel_size=(5, 5), padding="valid")

In [223]:
x = conv(a)
x.shape

torch.Size([1, 32, 24, 24])

In [224]:
conv.weight.shape

torch.Size([32, 1, 5, 5])

In [225]:
def squash_func(x, eps=10e-21):
    """
        IN:
            x (b, n, d)
        OUT:
            squash(x) (b, n, d)
    """
    x_norm = torch.norm(x, dim=2, keepdim=True)
    return (1 - 1 / (torch.exp(x_norm) + eps)) * (x / (x_norm + eps))

In [226]:
s = squash_func(x)
s.shape
s.min()

tensor(-0.4229, grad_fn=<MinBackward1>)

In [227]:
v = x.view(x.shape[0],x.shape[1],-1)
v = v.permute(0,2,1)
s = squash_func(v)
s.shape

torch.Size([1, 576, 32])

In [228]:
x_norm = torch.norm(v, dim=2, keepdim=True)
x_norm.shape

torch.Size([1, 576, 1])

In [229]:
eps=10e-21
k = (1 - 1 / (torch.exp(x_norm) + eps)) * (v / (x_norm + eps))
k.shape

torch.Size([1, 576, 32])

In [230]:
k.min()

tensor(-0.3995, grad_fn=<MinBackward1>)

In [231]:
t = k.permute(0,2,1)
t = t.view(x.shape)
t.shape

torch.Size([1, 32, 24, 24])

#### Ref Model

In [232]:
model = BaselineCNN()

In [233]:
model

BaselineCNN(
  (conv1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (conv2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (out): Linear(in_features=1568, out_features=10, bias=True)
)

In [234]:
count_parameters(model)

28938

In [235]:
def squash_conv_func(v, eps=10e-21):
    """
        IN:
            x (b, c, h, w)
        OUT:
            squash(x) (b, c, h, w)
    """
    #shap to capsule squashing for tests
    x = v.view(v.shape[0],v.shape[1],-1)
    x = x.permute(0,2,1)
    
    x_norm = torch.norm(x, dim=2, keepdim=True)
    k = (1 - 1 / (torch.exp(x_norm) + eps)) * (x / (x_norm + eps))
    
    #reshape to comv
    t = k.permute(0,2,1)
    t = t.view(v.shape)
    
    return t

In [265]:
class Squash(nn.Module):
    def __init__(self, eps=1e-21):
        super().__init__()
        self.eps = eps

    def forward(self, v):
        """
            IN:
                x (b, c, h, w)
            OUT:
                squash(x) (b, c, h, w)
        """
        #shap to capsule squashing for tests
        x = v.view(v.shape[0],v.shape[1],-1)
        x = x.permute(0,2,1)

        x_norm = torch.norm(x, dim=2, keepdim=True)
        k = (1 - 1 / (torch.exp(x_norm) + eps)) * (x / (x_norm + eps))

        #reshape to comv
        t = k.permute(0,2,1)
        t = t.view(v.shape)

        return t

In [237]:
#test on normal conv
a = torch.rand([1,1,28,28])
conv =  nn.Conv2d(1, 32, kernel_size=(5, 5), padding="valid")
#conv =  nn.Conv2d(1, 32, kernel_size=(28, 28), groups=1, padding="valid")
x = conv(a)
squa = Squash()
s = squa(x)

s.shape

torch.Size([1, 32, 24, 24])

In [238]:
count_parameters(conv)

832

In [239]:
#test on deepwise conv conv
a = torch.rand([1,1,28,28])
conv =  nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=(5, 5), padding="valid"),
            nn.Conv2d(32, 32, kernel_size=(24, 24), groups=32, padding="valid")
        )
x = conv(a)
squa = Squash()
s = squa(x)

s.shape

torch.Size([1, 32, 1, 1])

In [240]:
count_parameters(conv)

19296

In [241]:

19296
295776

295776

#### Test on Model

In [266]:
class SquashCNN(nn.Module):
    """
        Baseline CNN Model for MNIST
    """

    def __init__(self):
        super(SquashCNN, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels=1,
                out_channels=16,
                kernel_size=5,
                stride=1,
                padding=2,
            ),
            Squash(),
            #nn.MaxPool2d(kernel_size=2),
            #nn.AvgPool2d(kernel_size=2),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(16, 32, 5, 1, 2),
            Squash(),
            #nn.MaxPool2d(2),
            #nn.AvgPool2d(kernel_size=2),
        )
        # fully connected layer, output 10 classes
        #self.out = nn.Linear(32 * 7 * 7, 10)
        self.out = nn.Linear(32 * 28 * 28, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        # flatten the output of conv2 to (batch_size, 32 * 7 * 7)
        x = x.view(x.size(0), -1)
        output = self.out(x)
        return output

In [267]:
model = SquashCNN()
count_parameters(model)

264138

In [268]:
if torch.cuda.is_available():  
    dev = "cuda" 
else:  
    dev = "cpu"  
device = torch.device(dev)
device

device(type='cuda')

In [269]:
ds_train = datasets.MNIST(root='../../data', train=False, download=True, transform=T.ToTensor())
ds_valid = datasets.MNIST(root="../../data", train=False, download=True, transform=T.ToTensor())

In [270]:
dl_train = torch.utils.data.DataLoader(ds_train, 
                                          batch_size=256, 
                                          shuffle=True, 
                                          num_workers=4)
dl_valid = torch.utils.data.DataLoader(ds_valid, 
                                          batch_size=256, 
                                          shuffle=True, 
                                          num_workers=4)

In [260]:
model = SquashCNN()
model = model.to(device)

RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 2.00 GiB total capacity; 162.42 MiB already allocated; 0 bytes free; 188.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [261]:
loss_func = nn.CrossEntropyLoss() 
optimizer = optim.Adam(model.parameters(), lr = 0.01) 

In [250]:
num_epochs = 20
model.train()
for epoch in range(num_epochs):
    for idx, (x, y_true) in enumerate(dl_train):
        x = x.to(device)
        y_true = y_true.to(device)
        optimizer.zero_grad()
        y_pred = model(x)
        loss = loss_func(y_pred, y_true)         
        loss.backward()
        optimizer.step()
        
        if idx % 1000 == 0:
            print("Epoch[{}/{}] - step {} loss: {:.4f}".format(epoch, num_epochs, idx, loss.item()))

RuntimeError: CUDA out of memory. Tried to allocate 2.00 MiB (GPU 0; 2.00 GiB total capacity; 162.37 MiB already allocated; 0 bytes free; 188.00 MiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for x, y_true in dl_valid:
        x = x.to(device)
        y_true = y_true.to(device)
        y_pred = model(x)
        y_pred = torch.max(y_pred, 1)[1]
        correct += (y_pred == y_true).sum().item()
        total += y_true.shape[0]
    acc = correct / total

In [None]:
print(acc)
print(total - correct)

In [None]:
Epoch[19/20] - step 0 loss: 0.0036

1.0
0