## 1. IMPORT LIBRARIES

In [1]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

from scipy import stats

In [2]:
df = pd.read_csv('../dataset/heart-disease-uci/heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [3]:
df.shape

(303, 14)

## Rename Column

In [4]:
df.rename(columns={'cp' : 'chest_pain', 'thalach' : 'max_heart_rate'}, inplace=True)
df.head()

Unnamed: 0,age,sex,chest_pain,trestbps,chol,fbs,restecg,max_heart_rate,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


## Checking Missing Values

In [None]:
plt.figure(figsize=(15, 5))
sns.heatmap(df.isnull())
plt.show()

## EDA

In [None]:
df.describe()

In [None]:
plt.figure(figsize=(15, 10))
correlation_matrix = df.corr()
sns.heatmap(correlation_matrix, annot=True)
plt.show()

## 2. SPLITTING DATASET (TRAIN & TEST)

In [None]:
X = df.iloc[:,0:13]
y = df.iloc[:,-1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0)

In [None]:
X.head()

In [None]:
y.head()

In [None]:
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)

In [None]:
import math
math.sqrt(len(y_test))

## 3.1 DEFINE CLASSIFIER (SVM)

In [None]:
classifier_svm = SVC(kernel = 'rbf', random_state = 0)
classifier_svm.fit(X_train, y_train)

In [None]:
y_pred_svm = classifier_svm.predict(X_test)

In [None]:
cm_svm = confusion_matrix(y_test, y_pred_svm)
print(cm_svm)

In [None]:
f1_score(y_test, y_pred_svm)

In [None]:
accuracy_score(y_test, y_pred_svm)

## 3.2 DEFINE CLASSIFIER (KNN)

In [None]:
classifier_knn = KNeighborsClassifier(n_neighbors=7, p=2, metric='euclidean')
classifier_knn.fit(X_train, y_train)

In [None]:
y_pred_knn = classifier_knn.predict(X_test)

In [None]:
cm_knn = confusion_matrix(y_test, y_pred_knn)
print(cm_knn)

In [None]:
f1_score(y_test, y_pred_knn)

In [None]:
accuracy_score(y_test, y_pred_knn)

## 3.3 DEFINE CLASSIFIER (Decision Trees)

In [None]:
classifier_dt = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
classifier_dt.fit(X_train, y_train)

In [None]:
y_pred_dt = classifier_dt.predict(X_test)

In [None]:
cm_dt = confusion_matrix(y_test, y_pred_dt)
print(cm_dt)

In [None]:
f1_score(y_test, y_pred_dt)

In [None]:
accuracy_score(y_test, y_pred_dt)

## 3.4 DEFINE CLASSIFIER (Random Forest)

In [None]:
classifier_rf = RandomForestClassifier(n_estimators = 100, criterion = 'entropy', random_state = 0)
classifier_rf.fit(X_train, y_train)

In [None]:
y_pred_rf = classifier_rf.predict(X_test)

In [None]:
cm_rf = confusion_matrix(y_test, y_pred_rf)
print(cm_rf)

In [None]:
f1_score(y_test, y_pred_rf)

In [None]:
accuracy_score(y_test, y_pred_rf)

## COMBINING ACCURACY RESULTS:

In [None]:
print('SVM:' + str(round(accuracy_score(y_test, y_pred_svm)*100,2)) + '%')
print('KNN:' + str(round(accuracy_score(y_test, y_pred_knn)*100,2)) + '%')
print('Decision Trees:' + str(round(accuracy_score(y_test, y_pred_dt)*100,2)) + '%')
print('Random Forest:' + str(round(accuracy_score(y_test, y_pred_rf)*100,2)) + '%')

## COMBINING F1 RESULTS:

In [None]:
print('SVM:' + str(round(f1_score(y_test, y_pred_svm)*100,2)) + '%')
print('KNN:' + str(round(f1_score(y_test, y_pred_knn)*100,2)) + '%')
print('Decision Trees:' + str(round(f1_score(y_test, y_pred_dt)*100,2)) + '%')
print('Random Forest:' + str(round(f1_score(y_test, y_pred_rf)*100,2)) + '%')

## Predicting Result

In [None]:
# new_list = [[63,1,3,145,233,1,0,150,0,2.3,0,0,1]]

# new_list = sc_X.transform(new_list)
# x_predict = classifier.predict(new_list)

# if x_predict == 0:
#     print('No Heart Disease')
# else:
#     print('Heart Disease')