In [1]:
from sklearn import neighbors, datasets
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from collections import Counter

# Dataset

## Load data

In [2]:
df = datasets.load_iris()
X = df.data
y = df.target
print('Number of classes: %d' %len(np.unique(y)))
print('Number of data points: %d' %len(y))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

Number of classes: 3
Number of data points: 150


# Không sử dụng thư viện

In [3]:
def euclidean_distance(point1, point2):
    return np.sqrt(np.sum((np.array(point1) - np.array(point2))**2))

In [4]:
def knn_predict(training_data, training_labels, test_point, k):
    distances = []
    for i in range(len(training_data)):
        dist = euclidean_distance(test_point, training_data[i])
        distances.append((dist, training_labels[i]))
    distances.sort(key=lambda x: x[0])
    k_nearest_labels = [label for _, label in distances[:k]]
    return Counter(k_nearest_labels).most_common(1)[0][0]

In [5]:
y_pred = [knn_predict(X_train, y_train, x, k=3) for x in X_test]

# Đánh giá độ chính xác
print("Accuracy: %.2f%%" % (100 * accuracy_score(y_test, y_pred)))


Accuracy: 90.00%


# K-nearest neighbors sử dụng thư viện có sẵn

## k = 1

In [6]:
model = neighbors.KNeighborsClassifier(n_neighbors = 1, p = 2)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print ("Accuracy of 1NN: %.2f %%" %(100*accuracy_score(y_test, y_pred)))

Accuracy of 1NN: 90.00 %


## k = 10

In [51]:
model = neighbors.KNeighborsClassifier(n_neighbors = 12, p = 2)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print ("Accuracy of 1NN: %.2f %%" %(100*accuracy_score(y_test, y_pred)))

Accuracy of 1NN: 86.67 %


## k = 10, trọng số theo khoảng cách

In [53]:
model = neighbors.KNeighborsClassifier(n_neighbors = 12, p = 2, weights = 'distance')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print ("Accuracy of 1NN: %.2f %%" %(100*accuracy_score(y_test, y_pred)))

Accuracy of 1NN: 86.67 %
