In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
df.shape

In [None]:
df.info()


In [None]:

df.describe()

In [None]:
df.isnull().sum()


In [None]:
cols = ["Glucose","BloodPressure","SkinThickness","Insulin","BMI"]

for col in cols:
    df[col] = df[col].replace(0, np.nan)
    df[col].fillna(df[col].median(), inplace=True)


In [None]:
X = df.drop("Outcome", axis=1)
y = df["Outcome"]


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train, y_train)


In [None]:
y_pred = knn.predict(X_test)


In [None]:
accuracy_score(y_test, y_pred)


In [None]:
cm = confusion_matrix(y_test, y_pred)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
print(classification_report(y_test, y_pred))


In [None]:
scores = []

for k in range(1,21):
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(X_train,y_train)
    scores.append(model.score(X_test,y_test))

plt.plot(range(1,21),scores,marker="o")
plt.xlabel("K Value")
plt.ylabel("Accuracy")
plt.show()
