In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# **Importing Dataset**

In [None]:
df = pd.read_csv("../input/heart-disease-uci/heart.csv")

In [None]:
df.head()

# **Feature Selection**

In [None]:
corrmat = df.corr()
top_corr_features = corrmat.index
plt.figure(figsize=(20,20))
#plot heat map
g=sns.heatmap(df[top_corr_features].corr(),annot=True,cmap="RdYlGn")

In [None]:
df['target'].hist()

# **Getting Dummy Variables for Categorical Features**

In [None]:
dataset= pd.get_dummies(df, columns = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'ca', 'thal'])

# **Standardization**

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
standardScaler = StandardScaler()
columns_to_scale = ['age', 'trestbps', 'chol', 'thalach', 'oldpeak']
dataset[columns_to_scale] = standardScaler.fit_transform(dataset[columns_to_scale])

# **Splitting into Dependent and Independent Variables**

In [None]:
y = dataset['target']
X = dataset.drop(['target'], axis = 1)

# **Perform Test Train Split**

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.30)

# **Using KNN Algorithm**

In [None]:
#Importing KNN
from sklearn.neighbors import KNeighborsClassifier

# **Finding suitable value of k**

In [None]:
k=[i for i in range(1,41)]
print(k)

# **Evaluation of model for various values of K**

In [None]:
from sklearn.metrics import accuracy_score
accuracy=[]
for i in k:
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train, y_train)
    y_pred = knn.predict(X_test)
    accuracy.append(accuracy_score(y_test, y_pred))

# **Plot a graph of Accuarcy vs K value**

In [None]:
plt.xlabel("Value of K")
plt.ylabel("Accuracy")
plt.title("Accuracy vs K Value")
plt.plot(k,accuracy,color='blue', linestyle='dashed', marker='o',markerfacecolor='red', markersize=10)

# **Plot a graph of Error vs K value**

In [None]:
error=[(1-i) for i in accuracy]
plt.xlabel("Value of K")
plt.ylabel("Error Rate")
plt.title("Error rate vs K Value")
plt.plot(k,error,color='blue', linestyle='dashed', marker='o',markerfacecolor='red', markersize=10)

# **The best value of K that can be concluded is at K=6 as it has lowest value of error and highest value of accuracy**

In [None]:
knn = KNeighborsClassifier(n_neighbors=6)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
print("Accuracy:",accuracy_score(y_test, y_pred))

# **Accuracy acheived using KNN algorithm: 0.879**