In [1]:
import sys
sys.path.append("../src")

In [2]:
%load_ext autoreload

In [3]:
%autoreload 2

import sys
import numpy as np
from PIL import Image

import torch
import torch.nn as nn
from torchvision import transforms, datasets
from torchvision.datasets import FashionMNIST

import matplotlib.pyplot as plt
from pytorch_impl.nns import FCN
from pytorch_impl.nns.utils import warm_up_batch_norm, to_one_hot
from pytorch_impl.estimators import MatrixExpEstimator
from pytorch_impl import ClassifierTraining
from pytorch_impl.matrix_exp import matrix_exp, compute_exp_term

In [4]:
torch.manual_seed(0)

num_classes = 10

if torch.cuda.is_available() and False:
    device = torch.device('cuda:0')
else:
    device = torch.device('cpu')

print('Torch version: {}'.format(torch.__version__))
print('Device: {}'.format(device))

train_loader = torch.utils.data.DataLoader(
    FashionMNIST(root='.', train=True, download=True,
          transform=transforms.ToTensor()),
    batch_size=32, shuffle=True, pin_memory=True)

test_loader = torch.utils.data.DataLoader(
    FashionMNIST(root='.', train=False, transform=transforms.ToTensor()),
    batch_size=32, shuffle=True, pin_memory=True)

device

Torch version: 1.3.1
Device: cpu


device(type='cpu')

In [5]:
a = 30 / 180 * np.pi
M = torch.tensor([[0, -1], [1, 0]]) * a
matrix_exp(M, device).cpu().numpy()

array([[ 0.8660254, -0.5      ],
       [ 0.5      ,  0.8660254]], dtype=float32)

In [6]:
a = 30 / 180 * np.pi
M = torch.tensor([[0, -1], [1, 0]]) * a
M_clone = M.clone().to(device)
(torch.matmul(M_clone, compute_exp_term(M, device)) + torch.eye(2)).numpy()

array([[ 0.8660254, -0.5      ],
       [ 0.5      ,  0.8660254]], dtype=float32)

In [9]:
D = 28

model     = FCN(10, D * D).to(device)
estimator = MatrixExpEstimator(model, num_classes, device, criterion=nn.CrossEntropyLoss(), learning_rate=1., momentum=0)

In [12]:
estimator.predict(X)
for _ in range(200):
    _, (X, y) = next(enumerate(train_loader))
    X, y = X.to(device), y.to(device)
    estimator.fit(X, y)
    print(estimator.predict(X)[:5])
    print()
    print()

accuracy 0.87500, loss 0.01531
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.3414092063903809
tensor([[-0.3338, -0.9810, -0.4215, -0.8494, -1.7486,  4.1046, -0.7653,  0.7009,
          0.0750,  0.2386],
        [-0.6081, -1.1190, -0.6937, -1.4812, -1.3826,  1.5553, -2.0984,  1.6034,
         -0.3516,  4.3019],
        [-1.6277, -1.2654, -1.6431, -0.8292, -1.0373,  3.7274, -0.4105,  0.8392,
          0.9327,  1.0883],
        [-1.9510, -1.0068, -1.0052, -1.3552, -1.0315,  6.8800, -0.5135,  0.2510,
          0.0382, -0.5826],
        [-1.7724, -2.0050,  1.5949,  6.1843,  1.0475, -1.8399,  3.1398, -1.1612,
         -2.7629, -3.0375]])


accuracy 0.65625, loss 0.02426
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.4355376958847046
tensor([[ 1.1148e-01, -9.4621e-01,  2.4694e+00, -1.2983e+00,  5.3115e+00,
         -2.9944e+00,  2.2582e+00, -1.6517e+00, -2.2793e+00, -1.4289e+00],
        [-3.2427e+00, -2.0414e+00, -3.8157e-01, -3.0114e+00,  2.9032e+0

accuracy 0.81250, loss 0.01601
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.3259093761444092
tensor([[ 6.0202, -0.3650,  0.3566, -1.1426, -0.2221, -2.2159,  3.6583, -2.7572,
         -1.4087, -1.9534],
        [-4.6209, -2.3446,  1.2582, -1.4804,  0.4755,  1.1239, -0.2761, -1.5718,
          7.7352, -0.8159],
        [-1.2160, -1.1057, -0.0245, -1.0229, -0.8348,  5.0918, -0.2469,  1.4078,
         -0.2288, -1.9137],
        [-1.2220, -2.7371,  1.6504,  4.5872, -0.2406, -0.6995,  2.1876, -1.8351,
         -0.3798, -1.8946],
        [ 8.8103, -0.7873,  1.4893, -1.6496, -1.3290, -2.0013, -0.3549, -1.3799,
         -1.4516, -1.6282]])


accuracy 0.90625, loss 0.00664
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.141294002532959
tensor([[ 6.3349, -0.5101,  1.5697,  0.7678, -3.9745, -1.0151,  2.6074, -0.5263,
         -2.8489, -2.6245],
        [-0.8000,  3.0881,  0.7949,  6.4339, -2.3529, -2.1241, -2.7704, -1.7675,
         -0.7037, -0.4355],
   

computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.2642661333084106
tensor([[-2.7143, -0.2942,  2.8643, -3.3567,  5.6348, -1.9827,  2.4255, -0.2178,
         -0.5452, -1.8741],
        [ 4.6339, -0.7667,  0.7323, -0.4025, -0.2646, -1.6744,  2.1713, -1.4743,
         -2.0642, -1.2368],
        [-3.3764, -2.3182, -2.0480, -1.6180, -1.3575,  3.5630, -2.8348,  7.5840,
          1.1475,  0.6015],
        [-1.0831, -1.8573,  2.2573,  0.5944,  0.3891, -1.1974,  5.5607, -1.8869,
         -1.7595, -1.4201],
        [-3.6593, -0.4895, -1.1622,  3.4921,  5.8677, -3.3567,  2.6522, -1.4000,
         -0.5547, -1.7305]])


accuracy 0.87500, loss 0.02609
computing grads ... 1s
exponentiating kernel matrix ... 1s
beta = 1.4906269311904907
tensor([[ 2.5881e-01, -1.7935e+00, -3.4170e+00, -3.0959e+00, -5.1752e-01,
          1.9352e-01, -3.9505e+00,  3.1225e+00,  1.8136e+00,  6.8566e+00],
        [ 1.4491e+00,  1.1705e-01,  1.7793e-02,  1.2421e+00,  1.9912e+00,
         -3.8082e+00,  5.025

beta = 1.3740425109863281
tensor([[13.3175, -2.9621, -0.9755,  1.3968, -4.4277, -4.0676,  1.0401, -2.0955,
         -0.2069, -1.4446],
        [-0.5732, -1.0874,  7.5285, -0.1191, -0.2987, -4.1854,  2.3362, -1.0363,
         -1.2130, -1.6818],
        [-2.3612, -1.7306, -2.1908, -2.1756, -1.2418, -1.0890, -0.7874,  3.8176,
          0.4777,  7.0381],
        [ 3.8634,  0.3303, -2.4503,  6.7897, -1.4503, -2.2223,  0.5027, -1.9699,
         -3.4709, -0.4248],
        [-2.6723,  1.3848,  2.6096, -2.9526,  6.0307, -1.6009,  2.5949, -1.0681,
         -2.6863, -2.1209]])


accuracy 0.84375, loss 0.01773
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.2783401012420654
tensor([[-2.0291, -3.9428,  2.3014, -2.9976,  2.7531,  1.5771, -1.2045, -0.8275,
          6.5882, -2.5085],
        [-2.6154, 11.6089, -1.0208,  3.3692, -2.9012, -2.7944, -2.5340, -0.9230,
         -1.7369, -0.8697],
        [-2.2887, -1.0208, -1.0228, -0.4439, -1.6855,  0.9985, -2.1290,  6.9032,
         -0

accuracy 0.75000, loss 0.03525
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.6662330627441406
tensor([[-3.8730, -3.1946, -0.3340, -1.2333,  1.1827,  2.5161, -4.9683, -1.1716,
          8.4053,  1.8463],
        [-1.3179,  0.4071, -0.4197,  9.6696,  1.3439, -5.6206,  2.1726, -3.5120,
         -1.8110, -1.4289],
        [-3.7486, -2.1037,  0.6133,  0.6017, -0.5139,  0.3233, -1.6907,  1.1440,
          7.1533, -1.9790],
        [-4.4328, -0.1030,  3.3119, -2.3899, -3.7097,  2.4727, -0.7908, -2.0757,
         -6.3021, 13.2872],
        [ 7.4011, -0.7081,  1.4144, -1.4426, -0.4157, -3.3974,  2.1756, -2.4785,
         -0.1379, -2.9754]])


accuracy 0.75000, loss 0.02343
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.3839386701583862
tensor([[-2.0034, -1.0665, -0.0367, -0.9119, -2.2737,  1.7095, -1.0344,  5.0844,
          0.4884, -0.2053],
        [-1.1342,  0.0709,  0.5213,  1.6361, -0.7256, -1.0372,  5.1631, -2.5519,
          0.2203, -2.5644],
  

accuracy 0.84375, loss 0.02045
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.3971946239471436
tensor([[-9.6666e-01, -1.0981e+00, -2.2491e-01, -7.5606e-01, -1.1656e+00,
          4.8418e+00,  1.5074e-01,  7.7036e-01, -3.4696e-01, -1.3137e+00],
        [ 3.7703e+00, -4.3917e+00,  3.1594e+00, -4.1588e+00,  1.1708e-01,
         -8.6672e+00,  3.2145e+00, -6.1061e-01,  7.0804e+00, -5.0190e-02],
        [ 2.2351e-02, -1.4270e-01, -1.2409e+00, -1.2029e+00, -2.5935e+00,
          4.6269e+00, -1.2993e+00,  1.4124e+00,  1.1510e+00, -6.3122e-01],
        [ 6.5791e-01, -9.3423e-01,  7.9181e-03, -1.2005e-01, -2.4916e+00,
          4.0120e+00, -1.0770e-01, -6.8457e-01, -2.1178e-01, -3.4168e-01],
        [ 2.8782e+00, -2.6449e+00,  2.7585e+00, -5.7959e-01,  3.2910e+00,
         -2.6880e+00,  4.7523e+00, -5.5926e+00, -1.9099e+00, -5.3271e-01]])


accuracy 0.90625, loss 0.01230
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.2337971925735474
tensor([[-2.5543e+00

accuracy 0.78125, loss 0.03024
computing grads ... 1s
exponentiating kernel matrix ... 1s
beta = 1.418859839439392
tensor([[ 7.4600, -1.1497,  0.2857,  1.0185, -1.4255, -2.5731,  1.5635, -0.7871,
         -1.2544, -3.3626],
        [-6.2043, -3.5266, 11.0166, -1.1046,  5.4190, -1.4442,  0.0426, -2.7729,
          0.6303, -2.4275],
        [-0.5598, 14.0235, -0.1184,  0.1678, -4.9278, -3.6153, -0.5463, -1.3042,
         -3.2015, -0.4939],
        [ 6.5012, -1.9207, -4.5247, -1.0437, -3.7562, -2.4553,  8.0920, -1.1358,
          2.7201, -3.1055],
        [-3.8463, 14.9685, -0.8015,  1.3689, -0.5903, -4.4898, -2.6495, -1.9459,
         -1.7645, -0.5765]])


accuracy 0.81250, loss 0.02681
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.5127185583114624
tensor([[-1.1243, -0.8541, -2.7195, -2.2746, -0.1333,  8.7327, -0.5919,  2.7410,
         -0.9549, -2.9506],
        [-0.3433, -0.4323,  2.1715, -0.9122, -0.1958, -1.8376,  5.4368, -1.5714,
         -1.6710, -1.0218],
   

beta = 1.3301643133163452
tensor([[-1.2481, -1.0652,  6.3959,  1.4791,  0.1690, -3.3410,  3.9417, -3.0902,
         -1.9216, -1.9241],
        [-1.9089,  9.6303, -0.3671,  4.0281, -4.3106, -3.7387, -3.6923, -1.2378,
          1.2598, -0.1580],
        [-0.9534, -1.2671, -0.3838,  0.0470, -1.4101,  6.0383, -0.3719,  0.8319,
         -0.7602, -2.0610],
        [-0.1156, -2.4782, -2.0747, -4.6260, -2.2587, -4.0940, -2.4284,  1.8400,
         15.9569, -0.1570],
        [ 0.5946,  0.8198,  1.3670,  1.7049,  9.3995, -3.5096,  0.1666, -4.3836,
         -3.6188, -2.8723]])


accuracy 0.75000, loss 0.02546
computing grads ... 1s
exponentiating kernel matrix ... 1s
beta = 1.3685225248336792
tensor([[-1.3924, -2.4033, -3.5455, -0.3806, -2.2446, -0.0403, -0.5028,  2.5147,
          0.9593,  6.6337],
        [-0.9490, -1.9930,  1.1207,  0.3869,  5.6305, -2.9684,  1.8288, -3.0775,
          1.8940, -2.0884],
        [ 8.0277,  0.3600, -2.6419,  3.0715, -5.9774, -1.5887,  4.1940, -0.8033,
         -2

computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.3526310920715332
tensor([[-2.0939, -0.4599, -3.7077, -3.8280, -3.1893,  0.8804,  0.4430,  4.5167,
         -0.5353,  7.5774],
        [-0.2215,  0.7839, -0.2709, -6.2557, -5.8687,  4.9573, -3.0053,  1.3126,
         -4.5501, 12.6312],
        [ 1.0203, -0.3250,  9.5013,  0.7724,  1.8271, -5.0665, -1.7020, -2.5734,
         -1.3133, -2.7204],
        [ 9.0120,  1.5205, -1.7809, -0.4096, -2.6063, -5.1315,  3.2357, -0.4384,
         -0.7594, -3.1584],
        [-0.3374,  5.6017, -1.0315,  1.2107, -1.0523, -1.0769, -0.1817, -0.1341,
         -1.9720, -1.3635]])


accuracy 0.87500, loss 0.01312
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.2803423404693604
tensor([[-2.0084,  0.4834,  7.4903, -2.8190, -2.0298, -0.6138,  5.0027, -1.5069,
         -3.5759, -0.6885],
        [-2.2138, -1.5698, -2.6185, -2.6584, -3.7977,  1.4145, -2.0766,  4.5510,
         -0.2693,  8.5301],
        [-1.9675, -0.6202,  1.031

accuracy 0.78125, loss 0.02281
computing grads ... 1s
exponentiating kernel matrix ... 1s
beta = 1.371100664138794
tensor([[-0.9898, -1.0836, -1.7042, -0.8721, -1.6312,  5.3573, -0.5749,  1.7787,
         -0.1055, -0.3855],
        [-0.0885, -0.7274, -3.2641, -4.7347, -5.6176,  3.3590, -1.2057,  3.4310,
         -3.5868, 11.9601],
        [-0.5528, -0.8945, -1.4242, -2.2852, -3.3160,  3.2022, -1.5506, -0.0704,
         -1.5808,  8.2144],
        [-2.2349,  0.3347,  8.9678, -1.7611,  9.4178, -5.6987, -6.3916, -1.4147,
         -0.9427, -0.9851],
        [ 1.0673, -0.5253, -2.8486, -0.4008, -3.0555,  4.5493, -0.7341,  0.7369,
         -0.3582,  1.1796]])


accuracy 0.78125, loss 0.01609
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.3139450550079346
tensor([[ 2.4727e+00, -1.7548e+00,  2.1592e+00, -2.8733e+00,  8.6681e+00,
         -3.4642e+00, -2.0824e+00, -2.6073e+00,  3.4682e-01, -1.4798e+00],
        [-1.8332e+00, -1.4149e+00, -1.1270e+00,  4.1843e-03, -1.1596e+00

computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.4294575452804565
tensor([[-5.7749e-01, -6.0260e-01, -5.2626e+00, -2.2898e+00, -5.5108e+00,
          3.2292e+00, -4.0485e+00, -5.3915e-02,  1.5550e+00,  1.2987e+01],
        [-4.4581e+00, -3.9151e-01, -2.9786e+00, -3.1277e+00, -3.9854e+00,
         -2.3434e+00, -2.6874e+00,  1.1884e+01,  2.0846e+00,  5.3761e+00],
        [-2.0937e+00,  7.6727e+00, -2.7538e+00,  4.1397e+00, -1.8422e-01,
         -4.7937e+00,  1.7730e+00, -2.0182e+00, -2.4316e+00,  2.1210e-01],
        [-7.5087e-01, -1.2875e+00,  3.5501e-03, -5.2694e-01, -2.3146e+00,
          5.2189e+00, -1.4090e+00,  7.9884e-01,  7.3112e-01, -5.3636e-01],
        [-9.1107e-01, -2.2888e+00,  1.6913e+00, -2.2998e+00, -8.0982e-01,
         -1.7379e+00,  4.1460e-01,  7.1849e-01,  6.0026e+00, -1.2416e+00]])


accuracy 0.81250, loss 0.02911
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.4017221927642822
tensor([[-2.7881, -1.7153,  2.4196, -2.6954, -0.352

accuracy 0.90625, loss 0.01122
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.19803786277771
tensor([[-3.9066, -1.5418, -3.2224, -1.6705, -2.1444,  2.5076, -4.4933,  9.4665,
          0.2299,  4.2733],
        [ 3.5487, -1.4100,  0.9087,  0.5177,  1.2469, -0.9419,  1.3610, -2.1750,
         -0.1085, -3.0474],
        [-3.6174,  1.6544, 10.5824, -1.4633,  0.5165, -2.2735,  3.6322, -2.2743,
         -2.9970, -4.1458],
        [-0.9640, -1.2891,  0.2241, -1.2325, -2.3541,  6.9272,  0.4586,  1.8838,
         -1.4522, -2.2884],
        [-0.1566, -1.6427,  4.0753, -1.2803,  7.7653, -3.2143,  3.9739, -6.4011,
         -2.3578, -1.4117]])


accuracy 0.78125, loss 0.02355
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.3935433626174927
tensor([[-3.5521, -0.2866, 10.7402, -5.1217,  7.3772, -4.1116, -3.3855,  0.0197,
         -1.2982, -0.8332],
        [-2.9934, -2.0573,  3.9343,  1.2169,  6.0591, -2.5508, -1.1445, -1.2043,
          0.0707, -1.8245],
    

accuracy 0.90625, loss 0.01727
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.3198630809783936
tensor([[-0.8619, -1.1009, -1.7355,  0.0725, -2.7079,  1.2371,  0.6546, -1.4695,
         -6.0261, 11.3391],
        [-5.1011,  1.2181,  2.3503, -4.2375, 12.7966, -1.1615,  0.9385, -6.6075,
          0.2714, -0.9606],
        [-7.2477, -1.0132,  4.8911, -5.7153, 11.6225, -2.7750,  7.5604, -1.6837,
         -0.9624, -4.7953],
        [ 0.2448,  2.2969,  0.3895, 12.4727, -5.0897, -1.9104, -6.8657, -0.4161,
         -0.1185, -1.4998],
        [ 6.9826, -1.0465,  0.8744,  2.1440, -6.1117, -1.4038,  3.7823, -0.9878,
         -1.4093, -3.0035]])


accuracy 0.84375, loss 0.01280
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.2362383604049683
tensor([[-0.7518, -0.8615, -1.9252, -1.4259, -3.5181,  6.1881, -0.5002,  3.5537,
         -1.5955,  0.5447],
        [-6.1562, -1.6197,  9.2959, -3.4443,  6.6954, -4.3447,  3.3554, -1.4437,
          0.3535, -3.2941],
  

beta = 1.4178884029388428
tensor([[ 5.0428e+00, -9.5008e-01, -3.0890e-03, -1.1825e+00, -7.7629e-01,
         -4.5464e-01,  1.1515e+00,  8.1002e-01, -8.3577e-01, -2.9454e+00],
        [ 1.7291e+00,  2.1355e+00, -3.4346e+00,  1.0020e+01, -1.7504e+00,
         -2.6427e+00, -1.9504e+00, -3.9764e+00,  7.4391e-01, -1.2372e+00],
        [-1.1958e+00, -1.2646e+00, -3.1707e+00, -2.1516e+00, -2.4462e+00,
          2.3943e+00, -6.1021e-01,  1.0829e+01, -1.6482e+00, -9.7069e-01],
        [-1.6776e+00, -1.3058e+00,  7.0607e+00, -4.1627e+00,  1.0402e+01,
         -4.6994e+00,  7.4073e-02, -3.7513e+00, -1.0559e+00, -1.4772e+00],
        [-2.8932e+00,  3.1268e-01, -5.3224e+00, -1.7055e+00, -6.8879e+00,
          7.4225e+00, -2.4134e+00,  6.3739e-01, -4.5024e+00,  1.5107e+01]])


accuracy 0.84375, loss 0.02596
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.3898147344589233
tensor([[-2.3155, -3.6791,  2.4065, -3.8966, 11.1304, -2.8031,  6.9346, -5.2630,
         -0.6151, -2.2135],
 

accuracy 0.81250, loss 0.01762
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.3116955757141113
tensor([[ 0.7227, -2.5265, -4.2465, -3.7941, -1.3045, -1.0135,  1.0504, -0.4586,
         11.2916,  0.0149],
        [-3.9742, -1.3801, -2.8002, -0.3006, -2.4518,  5.3976, -6.2190, 10.6447,
         -0.2281,  0.7958],
        [-5.5973, -2.1490, -0.8354,  0.0508, -1.9977,  5.0816,  1.2905,  1.0294,
          1.6015,  1.3295],
        [-1.9955, 10.9051, -1.6090,  0.0909, -0.9567, -2.4897, -1.2072,  0.2443,
         -1.5712, -1.7140],
        [ 2.4954, -0.5500, -2.1582,  7.0348, -3.4578,  0.9609, -3.7710, -2.1871,
          2.5198, -1.2543]])


accuracy 0.78125, loss 0.02429
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.3250354528427124
tensor([[ 2.1117e+00,  3.9362e-01, -8.7497e-01,  2.0218e+00, -2.3313e+00,
         -1.2390e+00,  6.3573e+00, -1.6520e+00, -1.8151e+00, -3.2405e+00],
        [-3.8075e+00, -2.1410e+00, -3.1499e+00, -3.3887e+00, -1.7854e+0

accuracy 0.87500, loss 0.01365
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.3047198057174683
tensor([[-1.3436, -1.5222, -1.6302, -1.4785, -1.6857,  3.7070, -0.7870,  8.3363,
         -0.5684, -3.1221],
        [-5.6477,  1.6282,  4.7852, -6.4598, 12.5162, -5.7624,  5.6083, -2.5135,
         -1.5906, -3.1170],
        [-1.7463, -1.3331, -1.0133, -0.1693, -2.5237,  6.7964,  1.4619, -0.5059,
         -0.6007, -0.4476],
        [11.3484, -1.2536, -2.9346,  3.6136, -1.7225, -7.5598,  4.6707, -2.0006,
         -2.6119, -1.8632],
        [-1.7395, -1.1915, -0.4385, -1.2680, -3.2525,  7.7824, -0.7687,  2.2158,
          0.4156, -1.9515]])


accuracy 0.93750, loss 0.00489
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.1025035381317139
tensor([[-1.4880, -2.1197,  5.5077, -0.5015, 11.5826, -5.1996,  1.1183, -6.8685,
         -0.8974, -1.7461],
        [-1.4457, -0.5113,  3.3439, -3.0866,  9.4441, -1.5323,  1.6268, -1.7146,
         -2.3323, -3.7459],
  

accuracy 0.78125, loss 0.02493
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.3432649374008179
tensor([[ 4.8606,  8.2131,  0.1531,  3.7629, -2.8281, -3.4040, -2.5108, -2.9220,
         -3.1278, -2.4614],
        [-3.9778,  2.1741,  8.7028,  0.0231,  4.9452, -3.0777, -2.4172, -0.7847,
         -2.5939, -3.7799],
        [-0.8536, -1.7672, -0.6069, -0.0978,  4.6173, -2.5319,  7.7916, -4.0346,
         -1.3024, -1.5275],
        [ 2.6699,  6.5128, -2.6813, 10.5363, -4.7622, -3.2446, -4.5646, -2.7979,
         -0.7358, -1.5056],
        [-2.3900, -1.8950, -0.0622, -0.6555, -1.3960,  6.9098, -1.0250, -1.0190,
          1.7650, -0.5180]])


accuracy 0.84375, loss 0.01730
computing grads ... 0s
exponentiating kernel matrix ... 0s
beta = 1.3102726936340332
tensor([[-0.9202, -2.4238, -0.2205, -1.8054, -4.8752,  2.7172, -7.7443,  3.0605,
         -0.6301, 12.3208],
        [-0.5834, -2.1460, -0.4005,  0.3307, -2.7145,  5.2700, -3.6250,  2.2606,
          1.0659,  0.2785],
  