In [6]:
import pandas as pd 
import numpy as np 
from collections import Counter 
from sklearn.model_selection import train_test_split 
from sklearn.metrics import accuracy_score, confusion_matrix 

# Load dataset
data = pd.read_csv('C:\\Users\\Musakalim Khan\\Downloads\\csv-dataset\\diabetes.csv')
X = data.drop(columns='Outcome').values 
y = data['Outcome'].values 

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=42) 

# Euclidean distance function
def euclidean_distance(point1, point2): 
    return np.sqrt(np.sum((point1 - point2) ** 2)) 

# k-NN prediction function
def knn_predict(X_train, y_train, X_test, k): 
    pred = [] 
    for test_point in X_test: 
        distances = [euclidean_distance(train_point, test_point) for train_point in X_train] 
        nearest_indices = np.argsort(distances)[:k] 
        nearest_labels = y_train[nearest_indices] 
        # Use majority voting to determine the predicted label 
        predicted_label = Counter(nearest_labels).most_common(1)[0][0] 
        pred.append(predicted_label) 
    return np.array(pred) 

# Calculate accuracy
y_pred = knn_predict(X_train, y_train, X_test, k=7) 
accuracy = accuracy_score(y_test, y_pred) 
print(f'Accuracy: {accuracy}') 

# Confusion matrix
cm = confusion_matrix(y_test, y_pred) 
tp, fp, fn, tn = cm.ravel() 

print("True Positives:", tp) 
print("False Positives:", fp) 
print("False Negatives:", fn) 
print("True Negatives:", tn) 

# Accuracy calculation
accuracy = (tp + tn) / (tp + fp + tn + fn) 
print("Accuracy:", accuracy)


Accuracy: 0.6923076923076923
True Positives: 20
False Positives: 5
False Negatives: 7
True Negatives: 7
Accuracy: 0.6923076923076923
