#### Import Libraries

In [None]:
# Deal with data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Splitting Data & Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Evalouation
from sklearn.metrics import classification_report, accuracy_score

# Algorithms
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBRFClassifier

# Prevent Warnings
import warnings
warnings.filterwarnings('ignore')

#### Read Data

In [None]:
df = pd.read_csv('../input/heart-attack-analysis-prediction-dataset/heart.csv')

In [None]:
df.head()

#### Exploratory Data Analysis

In [None]:
print('Mean of Age:', round(df.age.mean()))
print('Male Ratio:', round(df.sex.mean(), 2))
print('Female Ratio:', round(1 - df.sex.mean(), 2))

In [None]:
df.info()

#### Data Visulization

In [None]:
# How many female & male?

plt.figure(dpi = 90)
sex_data = df.sex.map({1: 'Male', 0: 'Female'})
sns.countplot(sex_data, palette = 'mako', alpha = 0.8)
plt.show()

In [None]:
# How many female & male based on target value (output)?

plt.figure(dpi = 90)
sex_data = df.sex.map({1: 'Male', 0: 'Female'})
sns.countplot(sex_data, hue = df.output, palette = 'mako', alpha = 0.8)
plt.show()

In [None]:
# Age based on sex

fig = sns.FacetGrid(df, palette = 'mako', hue = 'sex', aspect = 5)
fig.map(sns.kdeplot, 'age', shade = True, palette = 'mako')
fig.add_legend()
plt.show()

In [None]:
# Cholesterol based on sex

fig = sns.FacetGrid(df, palette = 'mako', hue = 'sex', aspect = 5)
fig.map(sns.kdeplot, 'chol', shade = True, palette = 'mako')
fig.add_legend()
plt.show()

#### Model Data

In [None]:
X = df.drop('output', axis = 1)
Y = df['output']

In [None]:
x_train, x_test, y_train, y_test = train_test_split(X, Y, random_state = 10)

In [None]:
std = StandardScaler()
std.fit(x_train)
x_train = std.transform(x_train)
x_test = std.transform(x_test)

In [None]:
xgb = XGBRFClassifier()
logReg = LogisticRegression()
knn = KNeighborsClassifier()

In [None]:
xgb.fit(x_train, y_train)

In [None]:
logReg.fit(x_train, y_train)

In [None]:
knn.fit(x_train, y_train)

In [None]:
print('XGB Train')
y_pred = xgb.predict(x_train)
print(classification_report(y_pred, y_train))

print('XGB Test')
y_pred = xgb.predict(x_test)
print(classification_report(y_pred, y_test))

In [None]:
print('LogReg Train')
y_pred = logReg.predict(x_train)
print(classification_report(y_pred, y_train))

print('LogReg Test')
y_pred = logReg.predict(x_test)
print(classification_report(y_pred, y_test))

In [None]:
print('KNN Train')
y_pred = knn.predict(x_train)
print(classification_report(y_pred, y_train))

print('KNN Test')
y_pred = knn.predict(x_test)
print(classification_report(y_pred, y_test))

#### Find Best `K` for KNN

In [None]:
result = []

for k in range(1, 101):
    new_knn = KNeighborsClassifier(n_neighbors = k)
    new_knn.fit(x_train, y_train)
    acc_train = accuracy_score(new_knn.predict(x_train), y_train)
    acc_test = accuracy_score(new_knn.predict(x_test), y_test)
    result.append((k, acc_train, acc_test))
    
best_result = max(result, key = lambda item: (item[2], item[1]))
print('K:', best_result[0])
print('Train Acc:', best_result[1])
print('Test Acc:', best_result[1])

In [None]:
best_knn = KNeighborsClassifier(n_neighbors = 8)
best_knn.fit(x_train, y_train)

In [None]:
print('KNN Train')
y_pred = best_knn.predict(x_train)
print(classification_report(y_pred, y_train))

print('KNN Test')
y_pred = best_knn.predict(x_test)
print(classification_report(y_pred, y_test))

## `KNN (K = 8) WON`