In [1]:
import sys
sys.path.append("../src")

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

import sys
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
from torchvision import transforms, datasets
from torchvision.datasets import FashionMNIST

import matplotlib.pyplot as plt
from pytorch_impl.nns import FCN
from pytorch_impl.nns.utils import warm_up_batch_norm, to_one_hot
from pytorch_impl.estimators import MatrixExpEstimator
from pytorch_impl import ClassifierTraining
from pytorch_impl.matrix_exp import matrix_exp, compute_exp_term

In [4]:
torch.manual_seed(0)

num_classes = 10

if torch.cuda.is_available() and False:
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

print('Torch version: {}'.format(torch.__version__))
print('Device: {}'.format(device))

train_loader = torch.utils.data.DataLoader(
    FashionMNIST(root='.', train=True, download=True,
          transform=transforms.ToTensor()),
    batch_size=32, shuffle=True, pin_memory=True)

test_loader = torch.utils.data.DataLoader(
    FashionMNIST(root='.', train=False, transform=transforms.ToTensor()),
    batch_size=32, shuffle=True, pin_memory=True)

device

Torch version: 1.3.1
Device: cpu


device(type='cpu')

In [5]:
a = 30 / 180 * np.pi
M = torch.tensor([[0, -1], [1, 0]]) * a
matrix_exp(M, device).cpu().numpy()

array([[ 0.8660254, -0.5      ],
       [ 0.5      ,  0.8660254]], dtype=float32)

In [6]:
a = 30 / 180 * np.pi
M = torch.tensor([[0, -1], [1, 0]]) * a
M_clone = M.clone().to(device)
(torch.matmul(M_clone, compute_exp_term(M, device)) + torch.eye(2)).numpy()

array([[ 0.8660254, -0.5      ],
       [ 0.5      ,  0.8660254]], dtype=float32)

In [7]:
D = 28

model     = FCN(10, D * D).to(device)
estimator = MatrixExpEstimator(model, num_classes, device, learning_rate=1., momentum=0)

In [8]:
_, (X, y) = next(enumerate(train_loader))
X, y = X.to(device), y.to(device)

estimator.predict(X)
for _ in range(30):
    estimator.fit(X, y)
    print(estimator.predict(X)[:5])
    print()
    print()

accuracy 0.12500, loss 0.03217
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.0019218921661377
tensor([[-1.0019, -1.0029, -0.9997, -0.9995, -0.9961,  0.9891, -1.0040, -0.9948,
         -0.9993, -1.0017],
        [-0.9962, -0.9966, -1.0020, -0.9997, -0.9890, -1.0019, -0.9987, -0.9971,
         -0.9994,  0.9787],
        [-1.0074, -0.9979, -0.9957, -0.9977, -1.0032, -1.0042, -0.9993,  1.0122,
         -1.0004, -1.0110],
        [-1.0039, -1.0050, -1.0079, -1.0061, -0.9984, -0.9983, -1.0032,  0.9900,
         -1.0041, -0.9911],
        [ 0.9677, -0.9822, -1.0063, -0.9902, -0.9936, -1.0136, -1.0110, -1.0068,
         -1.0073, -1.0081]])


accuracy 1.00000, loss 0.00000
computing grads ... 1s
exponentiating kernel matrix ... 1s
beta = 1.0000238418579102
tensor([[-1.0002, -1.0001, -0.9999, -1.0000, -0.9997,  0.9996, -1.0002, -0.9998,
         -0.9999, -1.0000],
        [-0.9997, -0.9998, -1.0002, -0.9999, -0.9997, -1.0001, -0.9998, -0.9999,
         -0.9999,  0.9992],
  

accuracy 1.00000, loss 0.00000
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.0
tensor([[-1.0000, -1.0000, -1.0000, -1.0000, -1.0000,  1.0000, -1.0000, -1.0000,
         -1.0000, -1.0000],
        [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
         -1.0000,  1.0000],
        [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,  1.0000,
         -1.0000, -1.0000],
        [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,  1.0000,
         -1.0000, -1.0000],
        [ 1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
         -1.0000, -1.0000]])


accuracy 1.00000, loss 0.00000
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.0
tensor([[-1.0000, -1.0000, -1.0000, -1.0000, -1.0000,  1.0000, -1.0000, -1.0000,
         -1.0000, -1.0000],
        [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
         -1.0000,  1.0000],
        [-1.0000, -1.0000, -1.00

accuracy 1.00000, loss 0.00000
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.0
tensor([[-1.0000, -1.0000, -1.0000, -1.0000, -1.0000,  1.0000, -1.0000, -1.0000,
         -1.0000, -1.0000],
        [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
         -1.0000,  1.0000],
        [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,  1.0000,
         -1.0000, -1.0000],
        [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,  1.0000,
         -1.0000, -1.0000],
        [ 1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
         -1.0000, -1.0000]])


accuracy 1.00000, loss 0.00000
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.0
tensor([[-1.0000, -1.0000, -1.0000, -1.0000, -1.0000,  1.0000, -1.0000, -1.0000,
         -1.0000, -1.0000],
        [-1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000, -1.0000,
         -1.0000,  1.0000],
        [-1.0000, -1.0000, -1.00