In [1]:
import sys
sys.path.append("../src")

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

import sys
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms, datasets
from torchvision.datasets import FashionMNIST

import matplotlib.pyplot as plt
from pytorch_impl.nns import FCN
from pytorch_impl.nns.utils import warm_up_batch_norm, to_one_hot
from pytorch_impl.estimators import MatrixExpEstimator, GradientBoostingEstimator
from pytorch_impl import ClassifierTraining
from pytorch_impl.matrix_exp import matrix_exp, compute_exp_term

In [4]:
torch.manual_seed(0)

num_classes = 10

if torch.cuda.is_available() and False:
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

print('Torch version: {}'.format(torch.__version__))
print('Device: {}'.format(device))

train_loader = torch.utils.data.DataLoader(
    FashionMNIST(root='.', train=True, download=True,
          transform=transforms.ToTensor()),
    batch_size=32, shuffle=True, pin_memory=True)

test_loader = torch.utils.data.DataLoader(
    FashionMNIST(root='.', train=False, transform=transforms.ToTensor()),
    batch_size=32, shuffle=True, pin_memory=True)

device

Torch version: 1.3.1
Device: cpu


device(type='cpu')

In [5]:
a = 30 / 180 * np.pi
M = torch.tensor([[0, -1], [1, 0]]) * a
matrix_exp(M, device).cpu().numpy()

array([[ 0.8660254, -0.5      ],
       [ 0.5      ,  0.8660254]], dtype=float32)

In [6]:
a = 30 / 180 * np.pi
M = torch.tensor([[0, -1], [1, 0]]) * a
M_clone = M.clone().to(device)
(torch.matmul(M_clone, compute_exp_term(M, device)) + torch.eye(2)).numpy()

array([[ 0.8660254, -0.5      ],
       [ 0.5      ,  0.8660254]], dtype=float32)

In [7]:
D = 28

model     = FCN(10, D * D).to(device)

In [8]:
base_estimator = MatrixExpEstimator(model, num_classes, device, learning_rate=1., aug_grad=True)

gb_estimator = GradientBoostingEstimator(base_estimator, num_classes, nn.CrossEntropyLoss(), device)

In [9]:
n_iter = 100

for batch_id in range(100):
    _, (X, y) = next(enumerate(train_loader))
    X, y = X.to(device), y.to(device)

    if batch_id % (n_iter // 10) == 0:
        gb_estimator.step()
        print(gb_estimator.predict(X)[:5])
    
    gb_estimator.fit_batch(X, y)
    print()
    print()

aug_c 0.0032174130901694298
tensor([[ 0.1142, -0.1113, -0.0138,  0.0702,  0.0507, -0.0686,  0.0039, -0.0219,
          0.0300,  0.0163],
        [ 0.0986, -0.0899, -0.0401,  0.0588, -0.0016, -0.0557,  0.0568,  0.0580,
          0.0717, -0.0187],
        [ 0.1704, -0.0307, -0.0916,  0.1134,  0.0964, -0.1029,  0.1274,  0.1146,
          0.0588,  0.0025],
        [ 0.1535, -0.0174, -0.0952,  0.1338,  0.0837, -0.1138,  0.0730,  0.0208,
          0.0158,  0.0563],
        [ 0.0397, -0.0467, -0.1416,  0.0630,  0.0553, -0.1488,  0.0778,  0.0306,
          0.1050,  0.0641]])
accuracy before fit 0.12500, loss 2.28910
computing grads ...
exponentiating kernel matrix ...
current beta 2.916586399078369
fitting done. took 1s



accuracy before fit 0.09375, loss 2.31758
computing grads ...
exponentiating kernel matrix ...
current beta 2.90033221244812
fitting done. took 1s



accuracy before fit 0.09375, loss 2.31625
computing grads ...
exponentiating kernel matrix ...
current beta 2.917775154113769

computing grads ...
exponentiating kernel matrix ...
current beta 2.747290849685669
fitting done. took 1s



beta 2.614604187011719
tensor([[-1.4518, -0.7718, -1.4052, -1.1332, -0.8145,  0.9229, -0.9932,  2.3010,
         -0.2149,  3.9711],
        [-0.6420,  4.0821, -0.4130,  0.2655, -0.4113, -0.4212, -0.3108, -0.5202,
         -0.5017, -1.0551],
        [-0.6767, -0.4908, -0.3006, -0.8362, -0.9762,  2.1707, -0.2309, -0.4446,
          0.0878,  1.8081],
        [-0.2905, -0.8462,  1.1580, -1.1542,  3.9078, -1.1137,  0.5923, -0.7841,
         -0.7751, -0.5415],
        [-0.9211,  5.5466, -0.7020, -0.1410, -0.7006, -0.6909, -0.7129, -0.6016,
         -0.4871, -0.5504]])
accuracy before fit 0.65625, loss 0.83886
computing grads ...
exponentiating kernel matrix ...
current beta 2.541694164276123
fitting done. took 1s



accuracy before fit 0.78125, loss 0.85097
computing grads ...
exponentiating kernel matrix ...
current beta 2.610654354095459
fitting done. took 1s



accuracy before fit 

current beta 2.2659950256347656
fitting done. took 1s



accuracy before fit 0.78125, loss 0.58987
computing grads ...
exponentiating kernel matrix ...
current beta 2.305229425430298
fitting done. took 1s



beta 2.1994688630104067
tensor([[ 2.9459, -1.1109, -1.1856,  0.8778,  0.9948, -1.8669,  2.6884, -1.1924,
         -1.1689, -0.7515],
        [-1.6931, -0.7447,  0.8479, -0.1051,  3.1641, -1.3683,  2.8682, -1.4989,
         -0.1306, -1.0492],
        [ 0.5618, -0.1855, -0.3874,  1.0612, -0.6427,  0.5172,  0.1318, -0.5076,
         -0.0100, -0.6423],
        [ 0.0694, -1.0809,  3.7233, -1.1444,  0.7614, -1.3430,  1.9175, -1.0495,
         -0.8090, -0.9008],
        [-0.4251, -0.9665,  3.7887, -0.2435,  0.5522, -1.5200,  2.4756, -1.1449,
         -1.1291, -1.2656]])
accuracy before fit 0.78125, loss 0.60895
computing grads ...
exponentiating kernel matrix ...
current beta 2.3336129188537598
fitting done. took 1s



accuracy before fit 0.71875, loss 0.99728
computing grads ...
exponent