In [1]:
import sys
sys.path.append("../src")

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

import sys
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
from torchvision.datasets import FashionMNIST

import matplotlib.pyplot as plt
from pytorch_impl.nns import FCN
from pytorch_impl.nns.utils import warm_up_batch_norm, to_one_hot
from pytorch_impl.estimators import MatrixExpEstimator, GradientBoostingEstimator
from pytorch_impl import ClassifierTraining
from pytorch_impl.matrix_exp import matrix_exp, compute_exp_term

In [4]:
torch.manual_seed(0)

num_classes = 10

if torch.cuda.is_available() and False:
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

print('Torch version: {}'.format(torch.__version__))
print('Device: {}'.format(device))

train_loader = torch.utils.data.DataLoader(
    FashionMNIST(root='.', train=True, download=True,
          transform=transforms.ToTensor()),
    batch_size=32, shuffle=True, pin_memory=True)

test_loader = torch.utils.data.DataLoader(
    FashionMNIST(root='.', train=False, transform=transforms.ToTensor()),
    batch_size=32, shuffle=True, pin_memory=True)

device

Torch version: 1.3.1
Device: cpu


device(type='cpu')

In [5]:
a = 30 / 180 * np.pi
M = torch.tensor([[0, -1], [1, 0]]) * a
matrix_exp(M, device).cpu().numpy()

array([[ 0.8660254, -0.5      ],
       [ 0.5      ,  0.8660254]], dtype=float32)

In [6]:
a = 30 / 180 * np.pi
M = torch.tensor([[0, -1], [1, 0]]) * a
M_clone = M.clone().to(device)
(torch.matmul(M_clone, compute_exp_term(M, device)) + torch.eye(2)).numpy()

array([[ 0.8660254, -0.5      ],
       [ 0.5      ,  0.8660254]], dtype=float32)

In [7]:
D = 28

model     = FCN(10, D * D).to(device)

In [10]:
base_estimator = MatrixExpEstimator(model, num_classes, device, learning_rate=1., aug_grad=True)

_, (X, y) = next(enumerate(train_loader))
base_estimator.fit_batch(X, y)
gb_estimator = GradientBoostingEstimator(base_estimator, num_classes, nn.CrossEntropyLoss(), device)

aug_c 0.3742941915988922
accuracy before fit 0.09375, loss 1.04214
computing grads ...
exponentiating kernel matrix ...
fitting done. took 1s



In [11]:
n_iter = 100

for batch_id in range(100):
    _, (X, y) = next(enumerate(train_loader))
    X, y = X.to(device), y.to(device)

    if batch_id % (n_iter // 10) == 0:
        gb_estimator.step()
        print(gb_estimator.predict(X)[:5])
    
    gb_estimator.fit_batch(X, y)
    print()
    print()

tensor([[-1.3116, -1.3073, -1.6780, -0.8270,  0.5999, -1.4074, -1.2200, -1.2811,
         -1.5010, -1.5031],
        [-0.9381, -0.2929, -1.0449, -0.0203, -0.9594, -1.2553, -1.6095, -0.9632,
         -1.1724, -1.1886],
        [-1.1686, -1.4108, -0.9120, -0.7105, -1.1318, -0.9942, -1.2031, -1.1310,
         -1.5252, -0.1785],
        [-1.0273,  0.6424, -0.8892, -0.7680, -0.7968, -1.3836, -1.5088, -0.9488,
         -1.0229, -1.1342],
        [-0.1427, -0.5453, -1.5399, -0.6621, -0.6578, -1.3688, -0.8946, -1.1529,
         -1.2402, -1.0434]])
accuracy before fit 0.56250, loss 1.70072
computing grads ...
exponentiating kernel matrix ...
current beta 3.139465808868408
fitting done. took 2s



accuracy before fit 0.50000, loss 1.77643
computing grads ...
exponentiating kernel matrix ...
current beta 3.0946052074432373
fitting done. took 1s



accuracy before fit 0.65625, loss 1.61806
computing grads ...
exponentiating kernel matrix ...
current beta 3.202892303466797
fitting done. took 1s





computing grads ...
exponentiating kernel matrix ...
current beta 2.7441210746765137
fitting done. took 1s



beta 2.6861887693405153
tensor([[-1.4641e+00,  3.8098e+00, -8.5554e-01,  2.9257e-04, -6.9992e-01,
         -1.3633e+00, -1.7885e+00, -1.2359e+00, -1.7003e+00, -1.7156e+00],
        [-1.0551e+00,  3.4529e+00, -1.1134e+00, -8.0081e-01, -1.3327e+00,
         -1.2288e+00, -1.5632e+00, -1.3226e+00, -1.0788e+00, -1.7038e+00],
        [-1.8779e+00, -1.4104e+00, -1.7986e+00, -1.6724e+00, -1.5651e+00,
          3.5635e-01, -1.6781e+00,  4.0651e+00, -1.0043e+00, -1.5028e+00],
        [-3.3519e-01, -1.5779e+00,  2.3778e+00, -2.5399e+00, -8.2241e-01,
         -2.0318e+00,  5.9038e-01, -2.0567e+00, -1.3114e+00, -1.8934e+00],
        [ 1.7672e-02, -8.5219e-01, -9.0522e-01, -1.1705e+00, -1.4809e+00,
          1.8428e+00, -6.3302e-01, -7.8500e-01, -3.7198e-01,  1.2994e-01]])
accuracy before fit 0.75000, loss 0.76981
computing grads ...
exponentiating kernel matrix ...
current beta 2.6003668308

accuracy before fit 0.90625, loss 0.55294
computing grads ...
exponentiating kernel matrix ...
current beta 2.261086940765381
fitting done. took 2s



accuracy before fit 0.78125, loss 0.62255
computing grads ...
exponentiating kernel matrix ...
current beta 2.3446102142333984
fitting done. took 1s



accuracy before fit 0.78125, loss 0.64513
computing grads ...
exponentiating kernel matrix ...
current beta 2.3759281635284424
fitting done. took 1s



accuracy before fit 0.71875, loss 0.99092
computing grads ...
exponentiating kernel matrix ...
current beta 2.49845027923584
fitting done. took 1s



beta 2.2522916078567503
tensor([[-0.9732, -0.9867, -1.2511, -0.7503, -1.3648,  2.1495, -1.1402, -0.6771,
         -0.5742, -0.0868],
        [-1.1668,  3.8514, -1.5034, -0.1144, -0.8765, -1.1748, -1.6132, -1.6199,
         -0.9702, -1.7498],
        [-0.1367, -1.5708,  0.2192, -0.9399,  0.0210, -0.4636,  1.6568, -1.7840,
         -0.5221, -2.5885],
        [-1.5234, -2.4036, -1.4229,  0.1246,