In [72]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [73]:
class KNN:
    def __euclidean_distance(self, row1, row2):
        distance = 0.0
        for i in range(len(row1)-1):
            distance += (float(row1[i]) - float(row2[i]))**2
        return np.sqrt(distance)


    def __get_neighbors(self, train, test_row, num_neighbors):
        distances = list()
        for train_row in train:
            train_row_list = train_row[:-1].tolist()
            dist = self.__euclidean_distance(test_row, train_row_list)
            distances.append((train_row_list, dist))
        distances.sort(key=lambda tup: tup[1])
        neighbors = [distances[i][0] for i in range(num_neighbors)]
        return neighbors

    def predict_classification(self, train, test_row, num_neighbors):
        neighbors = self.__get_neighbors(train, list(test_row), num_neighbors)
        output_values = [row[-1] for row in neighbors]
        prediction = max(set(output_values), key=output_values.count)
        return prediction



In [74]:
df = pd.read_csv("./dataset.csv")
np_dataset = df.to_numpy()


test_row = 0.4073167465959663,0.9671155219265555,0

knn = KNN()



for k in range(1, 10, 2):
    print(f"K = {k}")
    print(f"KNN accuracy: {knn.predict_classification(np_dataset, test_row, k)}")



print(f"Accuracy for K as 3: {knn.predict_classification(np_dataset, test_row, 3)}")

Accuracy for K as 3: 0.9671155209265556
K = 1
KNN accuracy: 0.9671155209265556
K = 3
KNN accuracy: 0.9671155209265556
K = 5
KNN accuracy: 0.9671155209265556
K = 7
KNN accuracy: 0.9671155209265556
K = 9
KNN accuracy: 0.9671155209265556


In [77]:
def evaluate_knn(train_data, test_data, num_neighbors, threshold=0.5):
    knn = KNN()
    correct_predictions = 0
    total_instances = len(test_data)

    for test_instance in test_data:
        test_features = test_instance[:-1]
        actual_label = test_instance[-1]
        predicted_value = knn.predict_classification(train_data, test_features, num_neighbors)

        predicted_label = 1 if float(predicted_value) >= threshold else 0

        if int(predicted_label) == int(actual_label):
            correct_predictions += 1

    accuracy = correct_predictions / total_instances
    return accuracy


train_data, test_data = train_test_split(np_dataset, test_size=0.2, random_state=42)

for k in range(1, 16, 2):
    accuracy = evaluate_knn(train_data, test_data, num_neighbors=k)
    print(f"K = {k}, Accuracy: {accuracy:.2f}")




K = 1, Accuracy: 0.50
K = 3, Accuracy: 0.50
K = 5, Accuracy: 0.60
K = 7, Accuracy: 0.60
K = 9, Accuracy: 0.60
K = 11, Accuracy: 0.80
K = 13, Accuracy: 0.80
K = 15, Accuracy: 0.90
