In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import numpy as np

# Load the dataset
df = pd.read_csv('heart.csv')

# Define features and target variable
X = df.drop('target', axis=1)  # Assuming 'target' is the name of the target column
y = df['target']

# Split the dataset into training and testing sets
t_size = 0.20
seed = 7
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=t_size, random_state=seed)

print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

accuracies = []
neighbors = range(1, 251)

for k in neighbors:
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)
    accuracies.append(accuracy)

accuracies = np.array(accuracies)

highest_accuracy = np.max(accuracies)
lowest_accuracy = np.min(accuracies)

best_k = neighbors[np.argmax(accuracies)]
worst_k = neighbors[np.argmin(accuracies)]

print(f"Highest accuracy: {highest_accuracy:.4f} at k={best_k}")
print(f"Lowest accuracy: {lowest_accuracy:.4f} at k={worst_k}")

X_train shape: (820, 13)
X_test shape: (205, 13)
y_train shape: (820,)
y_test shape: (205,)
Highest accuracy: 0.9805 at k=1
Lowest accuracy: 0.6439 at k=230
