In [1]:
import numpy as np
from sklearn.decomposition import KernelPCA
import matplotlib.pyplot as plt
from time import perf_counter
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
from tqdm import tqdm, trange
# from tqdm.notebook import tqdm
import multivariatet as mt
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

%matplotlib inline

## read in data

In [2]:
mnist = pd.read_csv('../datasets/mnist/train.csv')

full_X = mnist[mnist.columns[1:]].values / 255
full_y = mnist.label.values

X = full_X[:1000]
y = full_y[:1000]

n,d = X.shape

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=15)

## kernel generation functions

In [66]:

    
def generate_kernel(m=220, s=1/d):
    b = np.random.uniform(low=0, high=2*np.pi, size=(1,m))
    W = np.random.multivariate_normal(mean=np.zeros(d), cov=2*s*np.eye(d), size=m) # m x d
    def ker(x, y):
        z1 = np.cos(x @ W.T + b)
        z2 = np.cos(y @ W.T + b)
        return z1 @ z2.T / m
    return ker

In [4]:
import torch

## Parallel

In [67]:
parameters = (1/784) * np.arange(0.1,10,0.1)#range(0.1,0.3,0.1)
m = 220

# X_train is train_batch x d
# X_test is test_batch x d

start = perf_counter()

n_param = parameters.shape[0]

# m x d x n_param
W = np.random.multivariate_normal(mean=np.zeros(d), cov=2*np.eye(d), size=(n_param, m)).transpose(1,2,0) * np.sqrt(parameters)

# n_param x m x 1
b = np.random.uniform(low=0, high=2*np.pi, size=(n_param,m,1))

# Wtranspose below is n_param x m x d, X_train.T is d x train_batch, their product is n_param x m x train_batch

placeholder = np.cos(np.dot(W.transpose(2,0,1), X_train.T) + b) # n_param x m x train_batch

z11 = torch.from_numpy(placeholder.transpose(0,2,1)) # n_param x train_batch x m
z2 = torch.from_numpy(placeholder.transpose(0,1,2)) # n_param x m x train_batch

z12 = torch.from_numpy(np.cos(np.dot(W.transpose(2,0,1), X_test.T) + b).transpose(0,2,1)) # n_param x test_batch x m

out1 = (1/m) * np.asarray(torch.bmm(z11, z2)) # n_param x train_batch x train_batch
out2 = (1/m) * np.asarray(torch.bmm(z12, z2)) # n_param x test_batch x train_batch

results = np.empty(n_param)

for i in range(n_param):
    svm = SVC(kernel='precomputed')
    svm.fit(out1[i], y_train)
    results[i] = svm.score(out2[i], y_test)

totaltime = perf_counter() - start

print(totaltime, rngtime)
print(results)

(99, 200, 800)
18.001125499999944 0.5822069999999258
[0.11  0.125 0.215 0.395 0.545 0.605 0.62  0.695 0.7   0.735 0.755 0.78
 0.785 0.79  0.82  0.845 0.825 0.81  0.815 0.84  0.805 0.83  0.83  0.835
 0.815 0.845 0.79  0.845 0.825 0.835 0.81  0.84  0.855 0.82  0.85  0.835
 0.825 0.8   0.83  0.855 0.815 0.865 0.835 0.87  0.85  0.83  0.83  0.84
 0.845 0.83  0.84  0.82  0.825 0.85  0.85  0.855 0.84  0.825 0.845 0.83
 0.86  0.855 0.83  0.83  0.815 0.83  0.845 0.83  0.845 0.855 0.845 0.84
 0.83  0.835 0.84  0.82  0.85  0.835 0.855 0.815 0.82  0.815 0.84  0.83
 0.86  0.83  0.86  0.835 0.8   0.85  0.835 0.835 0.81  0.805 0.8   0.825
 0.84  0.815 0.825]


## Loop random

In [68]:
st = perf_counter()

values = np.empty(parameters.shape[0])
for i,val in enumerate(parameters):
    svm = SVC(kernel=generate_kernel(s=val))
    svm.fit(X_train, y_train)
    values[i] = svm.score(X_test, y_test)

print(perf_counter() - st)
print(values)

43.89197130000002
[0.11  0.13  0.265 0.29  0.485 0.61  0.67  0.645 0.725 0.755 0.77  0.755
 0.75  0.78  0.805 0.78  0.825 0.795 0.825 0.79  0.8   0.815 0.845 0.84
 0.805 0.815 0.81  0.795 0.85  0.825 0.81  0.82  0.815 0.83  0.84  0.86
 0.855 0.81  0.84  0.835 0.83  0.82  0.83  0.815 0.84  0.845 0.83  0.86
 0.855 0.855 0.84  0.84  0.82  0.835 0.84  0.845 0.875 0.8   0.845 0.84
 0.84  0.845 0.81  0.865 0.845 0.84  0.86  0.84  0.86  0.82  0.83  0.815
 0.81  0.825 0.835 0.855 0.83  0.865 0.81  0.86  0.845 0.85  0.865 0.835
 0.845 0.84  0.81  0.85  0.84  0.83  0.86  0.83  0.845 0.83  0.865 0.805
 0.835 0.835 0.82 ]


## Loop det

In [69]:
st = perf_counter()

values = np.empty(parameters.shape[0])
for i,val in enumerate(parameters):
    svm = SVC(gamma=val)
    svm.fit(X_train, y_train)
    values[i] = svm.score(X_test, y_test)

print(perf_counter() - st)
print(values)

79.79559129999961
[0.135 0.38  0.63  0.71  0.78  0.82  0.83  0.85  0.85  0.855 0.86  0.86
 0.86  0.865 0.865 0.865 0.87  0.87  0.87  0.865 0.865 0.865 0.865 0.865
 0.86  0.86  0.865 0.865 0.865 0.865 0.86  0.86  0.865 0.87  0.875 0.875
 0.88  0.88  0.88  0.88  0.88  0.88  0.88  0.88  0.89  0.89  0.895 0.895
 0.895 0.895 0.9   0.9   0.905 0.905 0.905 0.905 0.905 0.905 0.905 0.905
 0.905 0.905 0.905 0.905 0.905 0.91  0.91  0.91  0.91  0.91  0.905 0.905
 0.905 0.9   0.9   0.9   0.9   0.9   0.9   0.9   0.9   0.9   0.9   0.9
 0.9   0.9   0.9   0.9   0.9   0.9   0.9   0.9   0.9   0.9   0.9   0.905
 0.905 0.905 0.905]


In [98]:
from sklearn.model_selection import GridSearchCV

start = perf_counter()

params = {'gamma': parameters}
svc = SVC(kernel='rbf')
clf = GridSearchCV(svc, params, cv=3)
clf.fit(X_train, y_train)

print(perf_counter() - start)
print(clf.cv_results_['mean_test_score'])




133.02951949999988
[0.1275  0.21375 0.34125 0.5375  0.65625 0.725   0.7475  0.76625 0.7825
 0.795   0.79875 0.81    0.815   0.81625 0.82375 0.83    0.8275  0.83125
 0.8375  0.84375 0.84125 0.8425  0.8475  0.84625 0.85    0.84875 0.8475
 0.85    0.85375 0.855   0.85625 0.85625 0.8575  0.8575  0.8575  0.85875
 0.85875 0.85875 0.86    0.85875 0.85875 0.85875 0.86125 0.86125 0.86125
 0.8625  0.8625  0.8625  0.865   0.86625 0.865   0.865   0.86625 0.8675
 0.8675  0.86875 0.87    0.87125 0.87125 0.87125 0.87125 0.87    0.87
 0.87    0.87125 0.87125 0.87375 0.87375 0.875   0.87625 0.87625 0.87625
 0.8775  0.8775  0.8775  0.8775  0.8775  0.8775  0.87875 0.87875 0.87875
 0.87875 0.87875 0.88    0.88    0.88125 0.88125 0.88125 0.88125 0.88125
 0.88125 0.88125 0.88125 0.88125 0.88125 0.88125 0.88125 0.88125 0.88125]


## Cross-val, random fourier features, computation in parallel

In [99]:
from sklearn.model_selection import StratifiedKFold


# X_train is train_batch x d
# X_test is test_batch x d

n_cv = 3

scores = np.empty((n_cv,n_param))

start = perf_counter()

skf = StratifiedKFold(n_splits = n_cv)
for i,(train_index, test_index) in enumerate(skf.split(X,y)):
    X_tr, X_te = X[train_index], X[test_index]
    y_tr, y_te = y[train_index], y[test_index]
    
    # m x d x n_param
    W = np.random.multivariate_normal(mean=np.zeros(d), cov=2*np.eye(d), size=(n_param, m)).transpose(1,2,0) * np.sqrt(parameters)

    # n_param x m x 1
    b = np.random.uniform(low=0, high=2*np.pi, size=(n_param,m,1))

    # Wtranspose below is n_param x m x d, X_train.T is d x train_batch, their product is n_param x m x train_batch

    placeholder = np.cos(np.dot(W.transpose(2,0,1), X_tr.T) + b) # n_param x m x train_batch

    z11 = torch.from_numpy(placeholder.transpose(0,2,1)) # n_param x train_batch x m
    z2 = torch.from_numpy(placeholder.transpose(0,1,2)) # n_param x m x train_batch

    z12 = torch.from_numpy(np.cos(np.dot(W.transpose(2,0,1), X_te.T) + b).transpose(0,2,1)) # n_param x test_batch x m

    out1 = (1/m) * np.asarray(torch.bmm(z11, z2)) # n_param x train_batch x train_batch
    out2 = (1/m) * np.asarray(torch.bmm(z12, z2)) # n_param x test_batch x train_batch

    for j in range(n_param):
        svm = SVC(kernel='precomputed')
        svm.fit(out1[j], y_tr)
        scores[i,j] = svm.score(out2[j], y_te)

totaltime = perf_counter() - start

print(totaltime)
print(np.mean(scores, axis=0))

41.17694929999925
[0.12399673 0.13094118 0.20887498 0.33409173 0.38597504 0.48778988
 0.55303169 0.61608482 0.66591983 0.69713728 0.73801826 0.72502574
 0.72416882 0.73909789 0.77107078 0.76001744 0.77706208 0.78512714
 0.77604004 0.77814721 0.78011004 0.78510059 0.78604192 0.80707793
 0.80608587 0.79412486 0.79803868 0.79515583 0.79007879 0.80805528
 0.80608009 0.80407204 0.80112293 0.81407654 0.7981081  0.82711402
 0.82611879 0.80600227 0.81203852 0.80208159 0.80511827 0.80905628
 0.82000789 0.80112582 0.81121446 0.81306344 0.8160817  0.8270454
 0.79811415 0.8259934  0.81407338 0.82112245 0.81814049 0.8290261
 0.81808948 0.80905339 0.83299752 0.83208327 0.83610542 0.81805662
 0.82205169 0.82607383 0.83700152 0.82207299 0.80605643 0.81107327
 0.82409025 0.8239254  0.82107777 0.82215108 0.83103442 0.83611147
 0.80510274 0.82509125 0.82203119 0.82810319 0.8111064  0.83207722
 0.83710642 0.82311868 0.8260231  0.81205323 0.83203858 0.81808975
 0.81904002 0.83714217 0.81308448 0.81906945 0

In [100]:
randmeans = np.mean(scores,axis=0)
detmeans = clf.cv_results_['mean_test_score']

In [101]:
#Error of results
np.linalg.norm(randmeans-detmeans) / np.linalg.norm(detmeans)

0.08789816078818842

In [107]:
#Check where random is maximized, and identify the order of that index in detmeans

random_max_idx = np.argmax(randmeans)

det_val_for_best_rand = detmeans[random_max_idx]

sorted_det = np.sort(detmeans)[::-1]

rank = np.where(sorted_det == det_val_for_best_rand)[0][0]

print(rank)

0


In [103]:
sorted_det

array([0.1275 , 0.21375, 0.34125, 0.5375 , 0.65625, 0.725  , 0.7475 ,
       0.76625, 0.7825 , 0.795  , 0.79875, 0.81   , 0.815  , 0.81625,
       0.82375, 0.8275 , 0.83   , 0.83125, 0.8375 , 0.84125, 0.8425 ,
       0.84375, 0.84625, 0.8475 , 0.8475 , 0.84875, 0.85   , 0.85   ,
       0.85375, 0.855  , 0.85625, 0.85625, 0.8575 , 0.8575 , 0.8575 ,
       0.85875, 0.85875, 0.85875, 0.85875, 0.85875, 0.85875, 0.86   ,
       0.86125, 0.86125, 0.86125, 0.8625 , 0.8625 , 0.8625 , 0.865  ,
       0.865  , 0.865  , 0.86625, 0.86625, 0.8675 , 0.8675 , 0.86875,
       0.87   , 0.87   , 0.87   , 0.87   , 0.87125, 0.87125, 0.87125,
       0.87125, 0.87125, 0.87125, 0.87375, 0.87375, 0.875  , 0.87625,
       0.87625, 0.87625, 0.8775 , 0.8775 , 0.8775 , 0.8775 , 0.8775 ,
       0.8775 , 0.87875, 0.87875, 0.87875, 0.87875, 0.87875, 0.88   ,
       0.88   , 0.88125, 0.88125, 0.88125, 0.88125, 0.88125, 0.88125,
       0.88125, 0.88125, 0.88125, 0.88125, 0.88125, 0.88125, 0.88125,
       0.88125])

## Cross-val, random fourier features, computation with loops

In [113]:
# X_train is train_batch x d
# X_test is test_batch x d

n_cv = 3

scores_cv_rnd_np = np.empty((n_cv,n_param))

start = perf_counter()

skf = StratifiedKFold(n_splits = n_cv)
for i,(train_index, test_index) in enumerate(skf.split(X,y)):
    X_tr, X_te = X[train_index], X[test_index]
    y_tr, y_te = y[train_index], y[test_index]
    
    for j,val in enumerate(parameters):
        svm = SVC(kernel=generate_kernel(s=val))
        svm.fit(X_tr, y_tr)
        scores_cv_rnd_np[i,j] = svm.score(X_te, y_te)


totaltime = perf_counter() - start

print(totaltime)
print(np.mean(scores_cv_rnd_np, axis=0))

78.97071219999998
[0.12399673 0.12399673 0.23300472 0.31283536 0.42793692 0.48690876
 0.57297967 0.62105408 0.6592575  0.6891114  0.71908572 0.7380698
 0.75802121 0.7590038  0.76205862 0.77306096 0.78703056 0.7709267
 0.7731748  0.78908356 0.79296479 0.77905803 0.79309046 0.80299927
 0.78102401 0.80302608 0.80711711 0.78407802 0.80009223 0.79905467
 0.79413669 0.81310867 0.79707397 0.78706342 0.80402157 0.80913121
 0.80507909 0.81915042 0.82507888 0.82413467 0.80711079 0.8250518
 0.81715763 0.80504911 0.81505046 0.81607854 0.80905339 0.82008597
 0.83604575 0.82700596 0.82905634 0.80200982 0.82016379 0.82697363
 0.82704225 0.81705535 0.82605912 0.82413494 0.81802087 0.83005103
 0.82109853 0.8100812  0.81406444 0.84602894 0.82907106 0.8030834
 0.82613089 0.82605045 0.82604386 0.81807188 0.82308266 0.82510281
 0.83899458 0.80912254 0.81708243 0.83306929 0.81605778 0.82306505
 0.8190847  0.80411726 0.83108281 0.81409757 0.82917018 0.82198308
 0.80293959 0.83207722 0.81209295 0.83007838 0.8

## Cross-val, deterministic gaussian, parallel

In [None]:
# X_train is train_batch x d
# X_test is test_batch x d

n_cv = 3

scores = np.empty((n_cv,n_param))

start = perf_counter()

skf = StratifiedKFold(n_splits = n_cv)
for i,(train_index, test_index) in enumerate(skf.split(X,y)):
    X_tr, X_te = X[train_index], X[test_index]
    y_tr, y_te = y[train_index], y[test_index]
    
    # m x d x n_param
    W = np.random.multivariate_normal(mean=np.zeros(d), cov=2*np.eye(d), size=(n_param, m)).transpose(1,2,0) * np.sqrt(parameters)

    # n_param x m x 1
    b = np.random.uniform(low=0, high=2*np.pi, size=(n_param,m,1))

    # Wtranspose below is n_param x m x d, X_train.T is d x train_batch, their product is n_param x m x train_batch

    placeholder = np.cos(np.dot(W.transpose(2,0,1), X_tr.T) + b) # n_param x m x train_batch

    z11 = torch.from_numpy(placeholder.transpose(0,2,1)) # n_param x train_batch x m
    z2 = torch.from_numpy(placeholder.transpose(0,1,2)) # n_param x m x train_batch

    z12 = torch.from_numpy(np.cos(np.dot(W.transpose(2,0,1), X_te.T) + b).transpose(0,2,1)) # n_param x test_batch x m

    out1 = (1/m) * np.asarray(torch.bmm(z11, z2)) # n_param x train_batch x train_batch
    out2 = (1/m) * np.asarray(torch.bmm(z12, z2)) # n_param x test_batch x train_batch

    for j in range(n_param):
        svm = SVC(kernel='precomputed')
        svm.fit(out1[j], y_tr)
        scores[i,j] = svm.score(out2[j], y_te)

totaltime = perf_counter() - start

print(totaltime)
print(np.mean(scores, axis=0))