In [46]:
# importing required functions
import random
import csv
import numpy as np

In [47]:
with open("seeds.csv", "r") as file:
    lines = file.readlines()
    attribute_names = lines[0].strip().split(",")
    data = [list(map(float, line.strip().split(","))) for line in lines[1:]]

labels = [row[-1] for row in data]
data_without_labels = [row[:-1] for row in data]

#Shuffling the data and labels randomly
combined_data = list(zip(data_without_labels, labels))
random.shuffle(combined_data)
data_without_labels, labels = zip(*combined_data)

# Splitting
split_ratio = 0.8
split_idx = int(split_ratio * len(data))


training_data = data_without_labels[:split_idx]
training_labels = labels[:split_idx]
testing_data = data_without_labels[split_idx:]
testing_labels = labels[split_idx:]

In [48]:
# Defining the KNNClassifier
class KNNClassifier:
    def __init__(self, k=3, p=2):
        self.k = k
        self.p = p

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return predictions

    def _predict(self, x):
        distances = [sum(abs(xi - xj)**self.p for xi, xj in zip(x_train, x))**(1/self.p) for x_train in self.X_train]
        k_indices = sorted(range(len(distances)), key=lambda i: distances[i])[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        most_common = max(set(k_nearest_labels), key=k_nearest_labels.count)
        return most_common


In [49]:
best_accuracy = 0.0
best_k = 1
best_p = 1

for k in range(1, 11):
    for p in range(1, 11):
        accuracies = []

        for i in range(3):
            start_idx = i * len(data_without_labels) // 3
            end_idx = (i + 1) * len(data_without_labels) // 3

            validation_data = data_without_labels[start_idx:end_idx]
            validation_labels = labels[start_idx:end_idx]

            training_data = data_without_labels[:start_idx] + data_without_labels[end_idx:]
            training_labels = labels[:start_idx] + labels[end_idx:]

            #fit the KNN classifier
            knn = KNNClassifier(k=k, p=p)
            knn.fit(training_data, training_labels)

            # Make predictions
            predictions = knn.predict(validation_data)

            # Calculate accuracy for the fold
            fold_accuracy = sum(1 for true, pred in zip(validation_labels, predictions) if true == pred) / len(validation_labels)
            accuracies.append(fold_accuracy)

        # Calculate average accuracy for all the folds
        average_accuracy = sum(accuracies) / 3

        print(f"For K = {k} and P = {p}\nAvg accuraccy = {average_accuracy:.5f}")

        if average_accuracy > best_accuracy:
            best_accuracy = average_accuracy
            best_k = k
            best_p = p
print(f"Best k: {best_k} and Best p : {best_p}")
print(f"Best Accuracy: {best_accuracy:.3f}")

For K = 1 and P = 1
Avg accuraccy = 0.89929
For K = 1 and P = 2
Avg accuraccy = 0.90427
For K = 1 and P = 3
Avg accuraccy = 0.90939
For K = 1 and P = 4
Avg accuraccy = 0.91942
For K = 1 and P = 5
Avg accuraccy = 0.92447
For K = 1 and P = 6
Avg accuraccy = 0.92447
For K = 1 and P = 7
Avg accuraccy = 0.92447
For K = 1 and P = 8
Avg accuraccy = 0.92447
For K = 1 and P = 9
Avg accuraccy = 0.92447
For K = 1 and P = 10
Avg accuraccy = 0.92447
For K = 2 and P = 1
Avg accuraccy = 0.90457
For K = 2 and P = 2
Avg accuraccy = 0.91949
For K = 2 and P = 3
Avg accuraccy = 0.90954
For K = 2 and P = 4
Avg accuraccy = 0.91459
For K = 2 and P = 5
Avg accuraccy = 0.90457
For K = 2 and P = 6
Avg accuraccy = 0.89462
For K = 2 and P = 7
Avg accuraccy = 0.89462
For K = 2 and P = 8
Avg accuraccy = 0.89959
For K = 2 and P = 9
Avg accuraccy = 0.89959
For K = 2 and P = 10
Avg accuraccy = 0.89959
For K = 3 and P = 1
Avg accuraccy = 0.91437
For K = 3 and P = 2
Avg accuraccy = 0.91942
For K = 3 and P = 3
Avg accura

In [50]:
split_idx = int(0.8 * len(data_without_labels))
training_data = data_without_labels[:split_idx]
training_labels = labels[:split_idx]
testing_data = data_without_labels[split_idx:]
test_labels = labels[split_idx:]

knn = KNNClassifier(k=best_k, p=best_p)
knn.fit(training_data, training_labels)

preds = knn.predict(testing_data)

# Calculating TP, FP, FN for each label
tp_l1 = sum(1 for true, pred in zip(test_labels, preds) if true == pred and true == 1)
fp_l1 = sum(1 for true, pred in zip(test_labels, preds) if true != pred and true == 2)
fn_l1 = sum(1 for true, pred in zip(test_labels, preds) if true != pred and true == 1 or true == 3)

lp_l2 = sum(1 for true, pred in zip(test_labels, preds) if true == pred and true == 2)
fp_l2 = sum(1 for true, pred in zip(test_labels, preds) if true != pred and true == 1 or true == 3)
fn_l2 = sum(1 for true, pred in zip(test_labels, preds) if true != pred and true == 2)

tp_l3 = sum(1 for true, pred in zip(test_labels, preds) if true == pred and true == 3)
fp_l3 = sum(1 for true, pred in zip(test_labels, preds) if true != pred and true == 1 or true == 2)
fn_l3 = sum(1 for true, pred in zip(test_labels, preds) if true != pred and true == 3)

# Calculating TN for each label
tn_l1 = len(test_labels) - (tp_l1 + fp_l1 + fn_l1)
tn_l2 = len(test_labels) - (lp_l2 + fp_l2 + fn_l2)
tn_l3 = len(test_labels) - (tp_l3 + fp_l3 + fn_l3)

# Calculating metrics for each label
acc_l1 = (tp_l1 + tn_l2 + tn_l3) / len(test_labels)
prec_l1 = tp_l1 / (tp_l1 + fp_l1) if tp_l1 + fp_l1 != 0 else 0
rec_l1 = tp_l1 / (tp_l1 + fn_l1) if tp_l1 + fn_l1 != 0 else 0
f1_l1 = 2 * (prec_l1 * rec_l1) / (prec_l1 + rec_l1) if prec_l1 + rec_l1 != 0 else 0

acc_l2 = (lp_l2 + tn_l1 + tn_l3) / len(test_labels)
prec_l2 = lp_l2 / (lp_l2 + fp_l2) if lp_l2 + fp_l2 != 0 else 0
re_l2 = lp_l2 / (lp_l2 + fn_l2) if lp_l2 + fn_l2 != 0 else 0
f1_l2 = 2 * (prec_l2 * re_l2) / (prec_l2 + re_l2) if prec_l2 + re_l2 != 0 else 0

acc_l3 = (tp_l3 + tn_l1 + tn_l2) / len(test_labels)
pre_l3 = tp_l3 / (tp_l3 + fp_l3) if tp_l3 + fp_l3 != 0 else 0
rec_l3 = tp_l3 / (tp_l3 + fn_l3) if tp_l3 + fn_l3 != 0 else 0
f1_l3 = 2 * (pre_l3 * rec_l3) / (pre_l3 + rec_l3) if pre_l3 + rec_l3 != 0 else 0

# Printing metrics for each label
print("Label 1 Metrics are:")
print(f"Accuracy: {acc_l1:.3f}")
print(f"Precision: {prec_l1:.3f}")
print(f"Recall: {rec_l1:.3f}")
print(f"F1 Score: {f1_l1:.3f}")

print("\nLabel 2 Metrics are :")
print(f"Accuracy: {acc_l2:.3f}")
print(f"Precision: {prec_l2:.3f}")
print(f"Recall: {re_l2:.3f}")
print(f"F1 Score: {f1_l2:.3f}")

print("\nLabel 3 Metrics:")
print(f"Accuracy: {acc_l3:.3f}")
print(f"Precision: {pre_l3:.3f}")
print(f"Recall: {rec_l3:.3f}")
print(f"F1 Score: {f1_l3:.3f}")



Label 1 Metrics:
Accuracy: 0.60
Precision: 0.80
Recall: 0.32
F1 Score: 0.46

Label 2 Metrics:
Accuracy: 0.85
Precision: 0.43
Recall: 0.87
F1 Score: 0.58

Label 3 Metrics:
Accuracy: 0.90
Precision: 0.48
Recall: 0.94
F1 Score: 0.64
