In [1]:
import sys
sys.path.append("../src")

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

import sys
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
from torchvision.datasets import FashionMNIST

import matplotlib.pyplot as plt
from pytorch_impl.nns import FCN
from pytorch_impl.nns.utils import warm_up_batch_norm, to_one_hot
from pytorch_impl.estimators import MatrixExpEstimator, GradientBoostingEstimator
from pytorch_impl import ClassifierTraining
from pytorch_impl.matrix_exp import matrix_exp, compute_exp_term

In [4]:
torch.manual_seed(0)

num_classes = 10

if torch.cuda.is_available() and False:
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

print('Torch version: {}'.format(torch.__version__))
print('Device: {}'.format(device))

train_loader = torch.utils.data.DataLoader(
    FashionMNIST(root='.', train=True, download=True,
          transform=transforms.ToTensor()),
    batch_size=32, shuffle=True, pin_memory=True)

test_loader = torch.utils.data.DataLoader(
    FashionMNIST(root='.', train=False, transform=transforms.ToTensor()),
    batch_size=32, shuffle=True, pin_memory=True)

device

Torch version: 1.3.1
Device: cpu


device(type='cpu')

In [5]:
a = 30 / 180 * np.pi
M = torch.tensor([[0, -1], [1, 0]]) * a
matrix_exp(M, device).cpu().numpy()

array([[ 0.8660254, -0.5      ],
       [ 0.5      ,  0.8660254]], dtype=float32)

In [6]:
a = 30 / 180 * np.pi
M = torch.tensor([[0, -1], [1, 0]]) * a
M_clone = M.clone().to(device)
(torch.matmul(M_clone, compute_exp_term(M, device)) + torch.eye(2)).numpy()

array([[ 0.8660254, -0.5      ],
       [ 0.5      ,  0.8660254]], dtype=float32)

In [7]:
D = 28

model     = FCN(10, D * D).to(device)

estimator = MatrixExpEstimator(model, num_classes, device, criterion=nn.CrossEntropyLoss(), learning_rate=20., momentum=0)

def get_estimator():
    return MatrixExpEstimator(model, num_classes, device, learning_rate=1., momentum=0)

In [10]:
gb_estimator = GradientBoostingEstimator(get_estimator, num_classes, nn.CrossEntropyLoss(), device)

In [11]:
n_iter = 100

for batch_id in range(100):
    _, (X, y) = next(enumerate(train_loader))
    X, y = X.to(device), y.to(device)

    if batch_id % (n_iter // 10) == 0:
        gb_estimator.step()
        print(gb_estimator.predict(X)[:5])
    
    gb_estimator.fit_batch(X, y)
    print()
    print()

tensor([[ 0.0882, -0.0574, -0.1197,  0.0684,  0.0278, -0.1262,  0.0563,  0.1518,
          0.0645, -0.0233],
        [ 0.0695, -0.0536, -0.0047,  0.0895,  0.0004, -0.1268,  0.0655,  0.0041,
          0.0860,  0.0521],
        [ 0.0148, -0.0231, -0.1221,  0.0730, -0.0011, -0.1341,  0.0320, -0.0496,
          0.1455,  0.1509],
        [ 0.1883, -0.0633,  0.0066,  0.1647,  0.0337, -0.1627,  0.0835, -0.0159,
          0.0712,  0.0350],
        [ 0.1182, -0.0307, -0.1416,  0.1147,  0.0425, -0.2466,  0.0587, -0.0328,
          0.1185,  0.0367]])
accuracy before fit 0.09375, loss 2.30899
computing grads ...
exponentiating kernel matrix ...
current beta 2.9025161266326904
fitting done. took 1s



accuracy before fit 0.12500, loss 2.30829
computing grads ...
exponentiating kernel matrix ...
current beta 2.911940097808838
fitting done. took 1s



accuracy before fit 0.15625, loss 2.27337
computing grads ...
exponentiating kernel matrix ...
current beta 2.9364707469940186
fitting done. took 1s




computing grads ...
exponentiating kernel matrix ...
current beta 2.755383253097534
fitting done. took 1s



accuracy before fit 0.62500, loss 0.88663
computing grads ...
exponentiating kernel matrix ...
current beta 2.7156293392181396
fitting done. took 1s



beta 2.4860928535461424
tensor([[-0.9920, -0.9286, -1.0921, -1.1813, -0.7006,  0.9065, -0.7832,  4.5178,
         -0.6145,  1.1042],
        [-1.3591, -1.0926, -0.2711, -0.8656, -0.7624,  2.4386, -0.7053, -0.0191,
          1.3796,  1.6509],
        [-0.7177,  5.4680, -0.6094, -0.2224,  0.0190, -0.7251, -0.7292, -0.8108,
         -0.8390, -0.7085],
        [ 4.7250, -1.4686, -0.1787, -0.5227, -0.7393, -1.6891,  2.6157, -1.1150,
         -0.6955, -0.7539],
        [-0.5721, -0.4186, -0.8402, -0.3347, -0.6239,  1.6564, -0.3168,  2.7619,
          0.1169, -1.1042]])
accuracy before fit 0.71875, loss 0.89962
computing grads ...
exponentiating kernel matrix ...
current beta 2.7082135677337646
fitting done. took 1s



accuracy before f

current beta 2.1664175987243652
fitting done. took 1s



accuracy before fit 0.75000, loss 0.81927
computing grads ...
exponentiating kernel matrix ...
current beta 2.5755343437194824
fitting done. took 1s



accuracy before fit 0.84375, loss 0.56600
computing grads ...
exponentiating kernel matrix ...
current beta 2.273399591445923
fitting done. took 1s



beta 2.36766459941864
tensor([[-0.2491, -0.7847, -0.3771, -0.7966, -0.8041,  2.7798, -0.4002,  0.2526,
          0.0914,  0.4037],
        [-1.2364, -1.4959, -0.7230, -1.2634, -0.9524,  0.6151, -0.7357,  0.2592,
          0.9442,  4.7360],
        [-1.1159, -1.4402,  0.7280, -0.4886, -0.1140, -0.0707, -0.0298, -0.6804,
          4.2275, -0.6857],
        [-0.1872, -0.3138,  1.4406, -0.1225,  0.1511,  0.7055,  0.2360, -0.4364,
         -0.5603, -0.8465],
        [ 0.9187,  3.3428, -0.4137, -0.7442, -0.1064, -0.1429,  0.0914, -0.8083,
         -0.8175, -1.1293]])
accuracy before fit 0.84375, loss 0.45593
computing grads ...
exponentia