In [32]:
from __future__ import print_function
import numpy as np
from time import time

In [33]:
d, N = 1000, 10000

X = np.random.randn(N,d)
z = np.random.randn(d)

In [34]:
#square distance between 2 vectors
def dist_pp(z, x):
    d = z - x.reshape(z.shape)
    return np.sum(d*d)

#distance from one point to each point in a set, naive
def dist_ps_naive(z, X):
    N = X.shape[0]
    res = np.zeros((1,N))
    for i in range(N):
        res[0][i] = dist_pp(z, X[i])
    return res

In [44]:
#from one point to each point in a set, fast
def dist_ps_fast(z, X):
    X2 = np.sum(X*X, 1)
    z2 = np.sum(z*z)
    return X2 + z2 - 2*X.dot(z)

t1 = time()
D1 = dist_ps_naive(z, X)
print('naive point2set, running time: ', time() - t1, 's')

t2 = time()
D2 = dist_ps_fast(z, X)
print('fast point2set, running time: ', time() - t2, 's')
print('result difference: ', np.linalg.norm(D1-D2))

naive point2set, running time:  0.09869265556335449 s
fast point2set, running time:  0.023698806762695312 s
result difference:  2.3020953036420277e-11


In [45]:
#distance from 2 sets
Z = np.random.randn(100, d)

def dist_ss_0(Z, X):
    M, N = Z.shape[0], X.shape[0]
    res = np.zeros((M, N))
    for i in range(M):
        res[i] = dist_ps_fast(Z[i], X)
    return res


In [46]:
def dist_ss_fast(Z, X):
    Z2 = np.sum(Z*Z, 1)
    X2 = np.sum(X*X, 1)
    return Z2.reshape(-1,1) + X2.reshape(1,-1) - 2*Z.dot(X.T)

In [47]:
t1 = time()
D3 = dist_ss_0(Z, X)
print('half fast set2set running time: ', time() - t1, 's')

t2 = time()
D4 = dist_ss_fast(Z, X)
print('fast set2set running time: ', time() -t2, 's')
print('result difference: ', np.linalg.norm(D3-D4))

half fast set2set running time:  2.769139051437378 s
fast set2set running time:  0.05274391174316406 s
result difference:  1.0193798628392088e-10


In [48]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn import datasets, neighbors

In [49]:
iris = datasets.load_iris()
iris_X = iris.data
iris_y = iris.target

In [51]:
print('Labels:', np.unique(iris_y))

Labels: [0 1 2]


In [53]:
np.random.seed(7)
X_train, X_test, y_train, y_test = train_test_split(iris_X,iris_y, test_size=130)

print('Training size:', X_train.shape[0], ', test size:', X_test.shape[0])

Training size: 20 , test size: 130


In [57]:
model = neighbors.KNeighborsClassifier(n_neighbors=1, p = 2)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print('Accuracy of 1NN: %.2f%%' %(100*accuracy_score(y_test, y_pred)))

Accuracy of 1NN: 92.31%


In [58]:
model = neighbors.KNeighborsClassifier(n_neighbors=7, p = 2)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print('Accuracy of 7NN: %.2f%%' %(100*accuracy_score(y_test, y_pred)))

Accuracy of 7NN: 93.85%


In [59]:
model = neighbors.KNeighborsClassifier(n_neighbors=7, p = 2, weights='distance')
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
print('Accuracy of 7NN (1/distance weights): %.2f%%' %(100*accuracy_score(y_test, y_pred)))

Accuracy of 7NN (1/distance weights): 94.62%


In [62]:
def myweight(distances):
    sigma2 = .4
    return np.exp(-distances**2/sigma2)

model = neighbors.KNeighborsClassifier(n_neighbors=7, p = 2, weights=myweight)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print('Accuracy of 7NN (1/distance weights): %.2f%%' %(100*accuracy_score(y_test, y_pred)))

Accuracy of 7NN (1/distance weights): 95.38%
