In [1]:
import sys
sys.path.append("../src")

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

import sys
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
from torchvision.datasets import FashionMNIST

import matplotlib.pyplot as plt
from pytorch_impl.nns import FCN
from pytorch_impl.nns.utils import warm_up_batch_norm, to_one_hot
from pytorch_impl.estimators import MatrixExpEstimator, GradientBoostingEstimator
from pytorch_impl import ClassifierTraining
from pytorch_impl.matrix_exp import matrix_exp, compute_exp_term

In [4]:
torch.manual_seed(0)

num_classes = 10

if torch.cuda.is_available() and False:
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

print('Torch version: {}'.format(torch.__version__))
print('Device: {}'.format(device))

train_loader = torch.utils.data.DataLoader(
    FashionMNIST(root='.', train=True, download=True,
          transform=transforms.ToTensor()),
    batch_size=32, shuffle=True, pin_memory=True)

test_loader = torch.utils.data.DataLoader(
    FashionMNIST(root='.', train=False, transform=transforms.ToTensor()),
    batch_size=32, shuffle=True, pin_memory=True)

device

Torch version: 1.3.1
Device: cpu


device(type='cpu')

In [5]:
a = 30 / 180 * np.pi
M = torch.tensor([[0, -1], [1, 0]]) * a
matrix_exp(M, device).cpu().numpy()

array([[ 0.8660254, -0.5      ],
       [ 0.5      ,  0.8660254]], dtype=float32)

In [6]:
a = 30 / 180 * np.pi
M = torch.tensor([[0, -1], [1, 0]]) * a
M_clone = M.clone().to(device)
(torch.matmul(M_clone, compute_exp_term(M, device)) + torch.eye(2)).numpy()

array([[ 0.8660254, -0.5      ],
       [ 0.5      ,  0.8660254]], dtype=float32)

In [9]:
D = 28

model     = FCN(10, D * D).to(device)

estimator = MatrixExpEstimator(model, num_classes, device, criterion=nn.CrossEntropyLoss(), learning_rate=20., momentum=0)

def get_estimator():
    return MatrixExpEstimator(model, num_classes, device, learning_rate=1., momentum=0)

In [10]:
gb_estimator = GradientBoostingEstimator(get_estimator, num_classes, nn.CrossEntropyLoss(), device)

In [11]:
_, (X, y) = next(enumerate(train_loader))
X, y = X.to(device), y.to(device)
    
for batch_id in range(100):
    
    if batch_id % 2 == 0:
        gb_estimator.step()
        print(estimator.predict(X)[:5])
    
    gb_estimator.fit_batch(X, y)
    print()
    print()

tensor([[-0.1619,  0.1148, -0.0321,  0.0260,  0.2270, -0.1008,  0.0324, -0.0551,
          0.0356,  0.2068],
        [-0.2565, -0.0517, -0.0577,  0.0131,  0.1002, -0.0295, -0.0954,  0.1097,
          0.0577,  0.0786],
        [-0.0790,  0.0361, -0.0936,  0.0594,  0.0459, -0.0528, -0.0233,  0.0876,
          0.0061,  0.0332],
        [-0.1783,  0.0862, -0.0158,  0.1585,  0.2853,  0.0329, -0.0128,  0.0063,
          0.1127,  0.1249],
        [-0.0520, -0.0025, -0.0870,  0.0055,  0.0746, -0.0656, -0.1532, -0.0108,
          0.0705,  0.0664]])
tensor([[0.0820, 0.1082, 0.0934, 0.0990, 0.1210, 0.0872, 0.0996, 0.0912, 0.0999,
         0.1186],
        [0.0780, 0.0957, 0.0951, 0.1021, 0.1114, 0.0978, 0.0916, 0.1125, 0.1068,
         0.1090],
        [0.0921, 0.1033, 0.0907, 0.1057, 0.1043, 0.0945, 0.0973, 0.1088, 0.1002,
         0.1030],
        [0.0782, 0.1019, 0.0921, 0.1096, 0.1244, 0.0967, 0.0923, 0.0941, 0.1047,
         0.1060],
        [0.0962, 0.1010, 0.0929, 0.1019, 0.1091, 0.0949, 0

  if __name__ == '__main__':


accuracy before fit 0.18750, loss 2.27208
computing grads ...
exponentiating kernel matrix ...


accuracy before fit 0.18750, loss 2.27208
computing grads ...
exponentiating kernel matrix ...


beta 2.938101053237915
debug {
tensor([[-0.1619,  0.1148, -0.0321,  0.0260,  0.2270, -0.1008,  0.0324, -0.0551,
          0.0356,  0.2068]])
tensor([[-0.1558, -0.2141, -0.1875, -0.1974, -0.2396, -0.1717, -0.1978, -0.1808,
          1.7790, -0.2344]])
}
beta 2.938101053237915
debug {
tensor([[-0.1619,  0.1148, -0.0321,  0.0260,  0.2270, -0.1008,  0.0324, -0.0551,
          0.0356,  0.2068]])
tensor([[-0.1558, -0.2141, -0.1875, -0.1974, -0.2396, -0.1717, -0.1978, -0.1808,
          1.7790, -0.2344]])
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
}
tensor([[-0.1619,  0.1148, -0.0321,  0.0260,  0.2270, -0.1008,  0.0324, -0.0551,
          0.0356,  0.2068],
        [-0.2565, -0.0517, -0.0577,  0.0131,  0.1002, -0.0295, -0.0954,  0.1097,


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
}
beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
}
tensor([[-0.1619,  0.1148, -0.0321,  0.0260,  0.2270, -0.1008,  0.0324, -0.0551,
          0.0356,  0.2068],
        [-0.2565, -0.0517, -0.0577,  0.0131,  0.1002, -0.0295, -0.0954,  0.1097,
          0.0577,  0.0786],


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
}
beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
}
tensor([[-0.1619,  0.1148, -0.0321,  0.0260,  0.2270, -0.1008,  0.0324, -0.0551,
          0.0356,  0.2068],
        [-0.2565, -0.0517, -0.0577,  0.0131,  0.1002, -0.0295, -0.0954,  0.1097,
          0.0577,  0.0786],


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
}
beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
}
tensor([[-0.1619,  0.1148, -0.0321,  0.0260,  0.2270, -0.1008,  0.0324, -0.0551,
          0.0356,  0.2068],
        [-0.2565, -0.0517, -0.0577,  0.0131,  0.1002, -0.0295, -0.0954,  0.1097,
          0.0577,  0.0786],


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
}
beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
}
tensor([[-0.1619,  0.1148, -0.0321,  0.0260,  0.2270, -0.1008,  0.0324, -0.0551,
          0.0356,  0.2068],
        [-0.2565, -0.0517, -0.0577,  0.0131,  0.1002, -0.0295, -0.0954,  0.1097,
          0.0577,  0.0786],


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
}
beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
}
tensor([[-0.1619,  0.1148, -0.0321,  0.0260,  0.2270, -0.1008,  0.0324, -0.0551,
          0.0356,  0.2068],
        [-0.2565, -0.0517, -0.0577,  0.0131,  0.1002, -0.0295, -0.0954,  0.1097,
          0.0577,  0.0786],


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
}
beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
}
tensor([[-0.1619,  0.1148, -0.0321,  0.0260,  0.2270, -0.1008,  0.0324, -0.0551,
          0.0356,  0.2068],
        [-0.2565, -0.0517, -0.0577,  0.0131,  0.1002, -0.0295, -0.0954,  0.1097,
          0.0577,  0.0786],


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
}
beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
}
tensor([[-0.1619,  0.1148, -0.0321,  0.0260,  0.2270, -0.1008,  0.0324, -0.0551,
          0.0356,  0.2068],
        [-0.2565, -0.0517, -0.0577,  0.0131,  0.1002, -0.0295, -0.0954,  0.1097,
          0.0577,  0.0786],


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
}
beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
}
tensor([[-0.1619,  0.1148, -0.0321,  0.0260,  0.2270, -0.1008,  0.0324, -0.0551,
          0.0356,  0.2068],
        [-0.2565, -0.0517, -0.0577,  0.0131,  0.1002, -0.0295, -0.0954,  0.1097,
          0.0577,  0.0786],


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


accuracy before fit 1.00000, loss 0.02841
computing grads ...
exponentiating kernel matrix ...


beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
}
beta 0.0
debug {
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
tensor([[ 0.2893,  0.3454,  0.3283,  0.3351,  0.3610,  0.3137,  0.3351,  0.3218,
         -2.9861,  0.3564]])
tensor([[-0.6195, -0.5141, -0.5831, -0.5538, -0.4771, -0.6053, -0.5488, -0.5863,
          5.2626, -0.4818]])
}
tensor([[-0.1619,  0.1148, -0.0321,  0.0260,  0.2270, -0.1008,  0.0324, -0.0551,
          0.0356,  0.2068],
        [-0.2565, -0.0517, -0.0577,  0.0131,  0.1002, -0.0295, -0.0954,  0.1097,
          0.0577,  0.0786],
