In [52]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [53]:
import sys
sys.path.append("./../..")

In [54]:
import math
from tqdm import tqdm
#
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as T
from torch import optim
import torchvision.datasets as datasets
import numpy as np
#
from misc.utils import count_parameters
from effcn.layers_em import CapsNetEM
from effcn.functions_em import spread_loss, func_margin_hinton, func_margin_linear, func_step_rel, func_step_abs, exp_lr_decay, func_acc

In [55]:
ds_train = datasets.MNIST(root='../../data', train=True, download=True, transform=T.ToTensor())
ds_valid = datasets.MNIST(root="../../data", train=False, download=True, transform=T.ToTensor())

In [56]:
dl_train = torch.utils.data.DataLoader(ds_train, 
                                        batch_size=2, 
                                        shuffle=True,
                                        num_workers=2)

dl_valid = torch.utils.data.DataLoader(ds_valid, 
                                        batch_size=8, 
                                        shuffle=True,
                                        num_workers=2)

In [57]:
dev = "cpu"
device = torch.device(dev)

In [58]:
model = CapsNetEM(A=32, B=32, C=32, D=32,E=10, K=3, P=4, iter=3, hw_out=(28,28), device=dev)
model = model.to(device)

count_parameters(model)

952820

In [59]:
optimizer = optim.Adam(model.parameters(), lr = 0.01, weight_decay=2e-7) 

In [60]:
num_epochs = 2
dl_len = len(dl_train)
m_max = 0.9
m_min = 0.2
#
for epoch_idx in range(num_epochs):
    # ####################
    # TRAIN
    # ####################
    model.train()
    desc = "Train [{:3}/{:3}]:".format(epoch_idx, num_epochs)
    pbar = tqdm(dl_train, bar_format=desc + '{bar:10}{r_bar}{bar:-10b}')
    epoch_acc = 0

    for idx, (x,y_true) in enumerate(pbar):
        x = x.to(device)
        y_true = y_true.to(device)
        optimizer.zero_grad()

        _, y_pred = model(x)

        step_rel = func_step_rel(num_epochs, dl_len, epoch_idx, idx)
        margin = func_margin_linear(step_rel, m_max, m_min)
        loss = spread_loss(y_pred=y_pred, y_true=y_true, m=margin, device=device)
        acc = func_acc(y_pred, y_true)

        step_abs = func_step_abs(dl_len, epoch_idx, idx)
        exp_lr_decay(optimizer = optimizer, global_step = step_abs)

        print(y_pred, loss)
        loss.backward()
        optimizer.step()


        pbar.set_postfix(
                {'loss': loss.item(),
                 'acc': acc.item()
                 }
        )
        print(y_pred, loss)
        
        if idx >=2:
            break

    break
y_pred, loss

Train [  0/  2]:          | 0/30000 [00:00<?, ?it/s]

tensor([[6.5565e-11, 5.0000e-01, 5.0000e-01, 5.0000e-01, 5.0000e-01, 0.0000e+00,
         1.7341e-03, 5.0000e-01, 5.0000e-01, 5.0000e-01],
        [3.1809e-15, 5.0000e-01, 5.0000e-01, 5.0000e-01, 5.0000e-01, 0.0000e+00,
         1.1684e-04, 5.0000e-01, 5.0000e-01, 5.0000e-01]],
       grad_fn=<ViewBackward0>) tensor(-0.0225, grad_fn=<SubBackward0>)


Train [  0/  2]:          | 1/30000 [00:05<46:32:05,  5.58s/it, loss=-.0225, acc=0]

tensor([[6.5565e-11, 5.0000e-01, 5.0000e-01, 5.0000e-01, 5.0000e-01, 0.0000e+00,
         1.7341e-03, 5.0000e-01, 5.0000e-01, 5.0000e-01],
        [3.1809e-15, 5.0000e-01, 5.0000e-01, 5.0000e-01, 5.0000e-01, 0.0000e+00,
         1.1684e-04, 5.0000e-01, 5.0000e-01, 5.0000e-01]],
       grad_fn=<ViewBackward0>) tensor(-0.0225, grad_fn=<SubBackward0>)
tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
        [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]],
       grad_fn=<ViewBackward0>) tensor(nan, grad_fn=<SubBackward0>)


Train [  0/  2]:          | 2/30000 [00:06<24:13:22,  2.91s/it, loss=nan, acc=0.5] 

tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
        [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]],
       grad_fn=<ViewBackward0>) tensor(nan, grad_fn=<SubBackward0>)
tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
        [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]],
       grad_fn=<ViewBackward0>) tensor(nan, grad_fn=<SubBackward0>)


Train [  0/  2]:          | 2/30000 [00:07<24:13:22,  2.91s/it, loss=nan, acc=0]  

tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
        [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]],
       grad_fn=<ViewBackward0>) tensor(nan, grad_fn=<SubBackward0>)


Train [  0/  2]:          | 2/30000 [00:08<33:42:50,  4.05s/it, loss=nan, acc=0]


(tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan],
         [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]],
        grad_fn=<ViewBackward0>),
 tensor(nan, grad_fn=<SubBackward0>))