In [3]:
import numpy as np
from time import time
d, N = 1000, 10000

np.random.seed(4)
z = np.random.rand(d)
X = np.random.rand(N, d)


def dis_pp(z, x):
    d = z - x.reshape(z.shape)
    return np.sum(d * d)


def dis_ps_naive(z, X):
    N = X.shape[0]
    res = np.zeros((1, N))
    for i in range(N):
        res[0][i] = dis_pp(z, X[i])
    return res


def dis_ps_fast(z, X):
    X2 = np.sum(X * X, 1)
    z2 = np.sum(z*z)
    return X2 + z2 - 2 * X.dot(z)


t1 = time()
D1 = dis_ps_naive(z, X)
print("Running time of test 1:", time() - t1)
t1 = time()
D2 = dis_ps_fast(z, X)
print("Running time of test 2", time() - t1)
print("Difference results:", np.linalg.norm(D2 - D1))


Running time of test 1: 0.07811379432678223
Running time of test 2 0.04687333106994629
Difference results: 9.67808243788816e-12


In [2]:
M=100
Z=np.random.rand(M,d)

def dis_ss_naive(Z,X):
    M=Z.shape[0]
    N=X.shape[0]
    res=np.zeros((M,N))
    for i in range(M):
        res[i]=dis_ps_naive(Z[i],X)
    return res
def dis_ss_fast(Z,X):
    X2=np.sum(X**2,1)
    Z2=np.sum(Z**2,1)
    return Z2.reshape(-1,1)+X2.reshape(1,-1)-2*Z.dot(X.T)
t1=time()
D3=dis_ss_naive(Z,X)
print("Running time of test 3:",time()-t1)
t1=time()
D4=dis_ss_fast(Z,X)
print("Running time of test 4:",time()-t1)
print("Difference results:",np.linalg.norm(D4-D3))




Running time of test 3: 10.387345552444458
Running time of test 4: 0.2909712791442871
Difference results: 1.0659028135774289e-10


In [12]:
import numpy as np
from sklearn import neighbors, datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

np.random.seed(3)
iris=datasets.load_iris()
iris_X=iris.data
iris_y=iris.target
print("Labels:",np.unique(iris_y))

X_train,X_test,y_train,y_test=train_test_split(iris_X,iris_y,test_size=130)
print("Train size:",X_train.shape[0],"Test_size:",X_test.shape[0])

Labels: [0 1 2]
Train size: 20 Test_size: 130


In [14]:
model=neighbors.KNeighborsClassifier(n_neighbors=1,p=2)
model.fit(X_train,y_train)
y_pre=model.predict(X_test)
print("Accuracy of 1NN:%.2f %%"%(100*accuracy_score(y_test,y_pre)))


Accuracy of 1NN:92.31 %


In [13]:
model=neighbors.KNeighborsClassifier(n_neighbors=7,p=2)
model.fit(X_train,y_train)
y_pre=model.predict(X_test)
print("Accuracy of 7NN with major voting: %.2f%%"%(100*accuracy_score(y_test,y_pre)))

Accuracy of 7NN: 87.69%


In [16]:
model=neighbors.KNeighborsClassifier(n_neighbors=7,p=2,weights="distance")
model.fit(X_train,y_train)
y_pre=model.predict(X_test)
print("Accuracy of 7NN (1/distance weight): %.2f%%"%(100*accuracy_score(y_test,y_pre)))

Accuracy of 7NN (1/distance weight): 89.23%


In [17]:
def myweight(distance):
    sigma2=0.4
    return np.exp(-distance/sigma2)
model=neighbors.KNeighborsClassifier(n_neighbors=7,p=2,weights=myweight)
model.fit(X_train,y_train)
y_pre=model.predict(X_test)
print("Accuracy of 7NN (customized weight): %.2f%%"%(100*accuracy_score(y_test,y_pre)))


Accuracy of 7NN (customized weight): 90.77%
