In [7]:
import numpy as np
from collections import Counter

def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

class KNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return predictions

    def _predict(self, x):
        # Compute distances and get the indices of the k closest neighbors
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in indices]

        # Majority vote: Find the most common class label
        most_common_label, _ = Counter(k_nearest_labels).most_common(1)[0]

        return most_common_label
  

In [8]:
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd 
from sklearn.preprocessing import LabelEncoder


data=pd.read_csv('Q1Data.csv')
selected_features = ['MFCCs_ 1', 'MFCCs_ 2', 'MFCCs_ 3', 'MFCCs_ 4', 'MFCCs_ 5', 'MFCCs_ 6','MFCCs_ 7','MFCCs_ 8', 'MFCCs_9', 'MFCCs_10', 'MFCCs_11', 'MFCCs_12', 'MFCCs_13','MFCCs_14', 'MFCCs_15', 'MFCCs_16', 'MFCCs_17', 'MFCCs_18', 'MFCCs_19','MFCCs_20', 'MFCCs_21']
X = data[selected_features]
Y= data['Genus']
le = LabelEncoder()
Y = le.fit_transform(Y)
X = X.to_numpy()

knn= KNN(k=5)




0.985406532314107


In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, random_state=42)
knn.fit(X_train, y_train)

k_values=[1,3,5,7,9,11,13,15,17,19,21]
accuracies=[]
for k in k_values:
    knn=KNN(k=k)
    knn.fit(X_train,y_train)
    predictions=knn.predict(X_test)
    accuracy=np.sum(predictions==y_test)/len(y_test)
    accuracies.append(accuracy)

import matplotlib.pyplot as plt
plt.plot(k_values,accuracies)
plt.xlabel("k")
plt.ylabel("Validation Accuracy")
plt.title("KNN Classifier Accuracy")

In [None]:
# Find the optimal value of k
optimal_k = 3
for i in range(1, len(accuracies)):
    if accuracy[i] < accuracy[i - 1]:
        optimal_k = k_values[i - 1]
        break

In [None]:

predions = knn.predict(X_test)

def accuracy(y_test, y_pred):
    return np.sum(y_test == y_pred) / len(y_test)

acc = accuracy(y_test, predions)
print(acc)


## B

In [None]:
# Split the dataset into training (30%) and test (10%)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, train_size=0.3, random_state=42)

# Initialize lists to store accuracy values
k_values = list(range(1, 51))
accuracy_values = []

# Loop through k values
for k in k_values:
    # Initialize KNN classifier with current k value
    knn = KNN(k=k)

    # Fit the model on the training data
    knn.fit(X_train, y_train)

    # Predictions on the test data
    predictions = knn.predict(X_test)

    # Calculate accuracy and store it
    acc = np.sum(y_test == predictions) / len(y_test)
    accuracy_values.append(acc)

# Plot the test accuracy for different k values
plt.plot(k_values, accuracy_values, marker='o')
plt.title('Test Accuracy for Different k Values')
plt.xlabel('k')
plt.ylabel('Test Accuracy')
plt.show()
