# Predicting Heart Disease with K-NN and SVM

This notebook is a simple introduction to the K-NN and SVM algorithms using a heart disease dataset that can be found at https://data.world/informatics-edu/heart-disease-prediction.

# Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
import sklearn.neighbors
from sklearn.neighbors import KNeighborsClassifier

# Data

In [None]:
df = pd.read_csv('../input/heart-disease-prediction/Heart_Disease_Prediction.csv')

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.shape

In [None]:
# Checking for missing values.
df.isnull().values.any()

In [None]:
# Checking for imbalanced data based on sex.
df['Sex'].value_counts()

In [None]:
# Checking for imbalanced data based on outcome.
df['Heart Disease'].value_counts()

In [None]:
sns.countplot(x='Heart Disease', data=df)

In [None]:
# Checking for any correlations.
df.corr()

# K-Nearest Neighbours 

In [None]:
# Splitting the dataset into training and testing sets.
x = df.iloc[:, :-2]
y = df.iloc[:, -1]
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state = 0, test_size = 0.35)

In [None]:
# Using standard scaler as a standardization technique.
sc_x = StandardScaler()
x_train = sc_x.fit_transform(x_train)
x_test = sc_x.transform(x_test)

In [None]:
# Looking for optimal number of nearest neighbours.
import math
math.sqrt(len(y_test))

In [None]:
# Creating KNN Model.
classifier = KNeighborsClassifier(n_neighbors = 9, p = 2, metric = 'euclidean')
classifier.fit(x_train,y_train)

In [None]:
y_pred = classifier.predict(x_test)
y_pred

In [None]:
cm = confusion_matrix(y_test,y_pred)
print(cm)

In [None]:
print(accuracy_score(y_test,y_pred))

# SVM Model

In [None]:
# Creating SVM model.
from sklearn import svm
clf = svm.SVC(kernel='rbf')
clf.fit(x_train,y_train)
y_pred = clf.predict(x_test)

In [None]:
y_pred = clf.predict(x_test)
y_pred

In [None]:
cm = confusion_matrix(y_test,y_pred)
print(cm)

In [None]:
print(accuracy_score(y_test,y_pred))