In [5]:
import random

import numpy as np

# Load the data
data = np.loadtxt('pp_tra.dat')

# Extract features (first 192 columns)
X = data[:, :-1]

# Extract labels (last column)
y = data[:, -1]



# Shuffle the dataset
combined_data = list(zip(X, y))
random.shuffle(combined_data)
X, y = zip(*combined_data)

fold_size = len(X) // 3
maxAccuracy = 0


def getAccuracy(y_val,y_pre):
    count = 0;
    n = len(y_val)
    for i in range(n):
        if y_val[i] == y_pre[i]: count+=1;

    return count/n;



class KNNClassifier:
    def __init__(self, k,p):
        self.k = k
        self.p = p
        self.X_train = None
        self.y_train = None

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def _minkowski_distance(self, x1, x2):
        return np.power(np.sum(np.abs(x1 - x2)**self.p), 1/self.p)

    def _get_neighbors(self, x):
        distances = [self._minkowski_distance(x, x_train) for x_train in self.X_train]
        sorted_indices = np.argsort(distances)
        neighbors_indices = sorted_indices[:self.k]
        return neighbors_indices

    def predict(self, X_test):
        predictions = [self._predict(x) for x in X_test]
        return predictions

    def _predict(self, x):
        neighbors_indices = self._get_neighbors(x)
        neighbors_labels = [self.y_train[i] for i in neighbors_indices]
        most_common = np.bincount(neighbors_labels).argmax()
        return most_common


for j in range(20):
    fold_accuracies = []
    for i in range(3):
        start_idx = i * fold_size
        end_idx = start_idx + fold_size

        X_val = X[start_idx:end_idx]  #test data
        y_val = y[start_idx:end_idx]

        X_train = X[:start_idx] + X[end_idx:]  #Training data
        y_train = y[:start_idx] + y[end_idx:]

        # Initialize your machine learning model

        model = KNNClassifier(j+1,2)  # Initialize your model instance

        # Train your model on the training data
        model.fit(X_train, y_train)

        # Predict on the validation data
        y_pred = model.predict(X_val)

        # Calculate accuracy for this fold
        accuracy = getAccuracy(y_val, y_pred)
        fold_accuracies.append(accuracy)
    average_accuracy = sum(fold_accuracies) / 5
    if(average_accuracy > maxAccuracy):
        maxAccuracy = average_accuracy
        preferedK = j+1
    std_deviation = np.std(fold_accuracies)
    print(f"Avg Accuracy for k = {j+1} is : {average_accuracy:.3f} and SD is: {std_deviation:.3f}")



print(f"Maximum average accuracy is : {maxAccuracy} for k = {preferedK}" )


data = np.loadtxt('pp_tes.dat')

# Extract features (first 192 columns)
X_Test = data[:, :-1]

# Extract labels (last column)
y_Test = data[:, -1]

best_p = None
p_values = [1, 2, 3, 4]
best_accuracy = 0

for p in p_values:
    model = KNNClassifier(preferedK,p)  # Initialize your model instance

    # Train your model on the training data
    model.fit(X, y)

    # Predict on the validation data
    y_pred = model.predict(X_Test)

    # Calculate accuracy for this fold
    accuracy = getAccuracy(y_Test, y_pred)

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_p = p
    print(f"Average Accuracy for P {p} is : {accuracy:.3f} ")


print("Best p value is :", best_p)

Avg Accuracy for k = 1 is : 0.547 and SD is: 0.004
Avg Accuracy for k = 2 is : 0.534 and SD is: 0.003
Avg Accuracy for k = 3 is : 0.544 and SD is: 0.002
Avg Accuracy for k = 4 is : 0.545 and SD is: 0.005
Avg Accuracy for k = 5 is : 0.545 and SD is: 0.009
Avg Accuracy for k = 6 is : 0.545 and SD is: 0.007
Avg Accuracy for k = 7 is : 0.543 and SD is: 0.004
Avg Accuracy for k = 8 is : 0.542 and SD is: 0.004
Avg Accuracy for k = 9 is : 0.543 and SD is: 0.005
Avg Accuracy for k = 10 is : 0.541 and SD is: 0.006
Avg Accuracy for k = 11 is : 0.541 and SD is: 0.005
Avg Accuracy for k = 12 is : 0.538 and SD is: 0.006
Avg Accuracy for k = 13 is : 0.538 and SD is: 0.006
Avg Accuracy for k = 14 is : 0.538 and SD is: 0.007
Avg Accuracy for k = 15 is : 0.538 and SD is: 0.005
Avg Accuracy for k = 16 is : 0.537 and SD is: 0.007
Avg Accuracy for k = 17 is : 0.537 and SD is: 0.005
Avg Accuracy for k = 18 is : 0.535 and SD is: 0.008
Avg Accuracy for k = 19 is : 0.535 and SD is: 0.006
Avg Accuracy for k = 