In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pandas import set_option
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import precision_score, recall_score, confusion_matrix, accuracy_score
from sklearn.metrics import classification_report, f1_score,roc_auc_score,cohen_kappa_score
import os

In [None]:
dataset = pd.read_csv('/kaggle/input/heart-disease-uci/heart.csv')


In [None]:
dataset = pd.DataFrame(dataset)
dataset.head()

In [None]:
types=dataset.dtypes
print(types)

In [None]:
set_option ('display.width',100)
set_option ('precision',3)
description = dataset.describe()
print(description)

In [None]:
dataset.isnull().sum()

In [None]:
hastalikdurumu_counts = dataset.groupby('target').size()
print(hastalikdurumu_counts)

In [None]:
set_option('display.width',100)
set_option('precision',3)
correlations = dataset.corr(method='pearson')
print(correlations)

In [None]:

dataset.hist(figsize=(10,10))
plt.show()

In [None]:
dataset.hist(column='age', bins=15, grid=False, figsize=(10,10), color='#000000', zorder=2, rwidth=0.9)

In [None]:
dataset.plot(kind='density', subplots=True, figsize=(10,10), layout=(4,4), sharex=False)
plt.show()

In [None]:
j=dataset.corr()
f, ax = plt.subplots(figsize=(10,5))
sns.heatmap(j, annot=True, linewidths=.5, ax=ax)

# Preparing Data for Machine Learning

In [None]:
dataset.head()

### Editing Categorical Variables

In [None]:
cat_col =["sex","cp","fbs","restecg","exang","slope","ca","thal"]
dataset = pd.get_dummies(dataset, columns = cat_col)

In [None]:
dataset.columns

In [None]:
dataset = dataset.drop(["sex_0","cp_0","fbs_0","restecg_0",
            "exang_0","slope_0","ca_0","thal_0"],axis = 1)

In [None]:
dataset.head()

In [None]:
dataset.columns

In [None]:
j=dataset.corr()
f, ax = plt.subplots(figsize=(15,10))
sns.heatmap(j, annot=True, linewidths=1, ax=ax)

In [None]:
dataset = dataset.drop(["slope_2","thal_2"],axis=1)

In [None]:
j=dataset.corr()
f, ax = plt.subplots(figsize=(15,10))
sns.heatmap(j, annot=True, linewidths=1, ax=ax)

In [None]:
dataset.columns

In [None]:
dataset.head()

### Preparing Training and Test Data

In [None]:
X = dataset.drop(["target"], axis=1)
Y = dataset["target"]

In [None]:
x_train,x_test,y_train,y_test= train_test_split(X,Y,
                                                test_size=0.25,
                                                random_state=42)

In [None]:
print(x_train)

# KNeighborsClassifier

In [None]:
model = KNeighborsClassifier()
model.fit(x_train, y_train)

y_pred = model.predict(x_test)
y_pred1 = model.predict(x_train)

In [None]:
print('eğitim verisi için doğruluk oranı: ',accuracy_score(y_pred1,y_train))
print('test verisi için doğruluk: ',accuracy_score(y_pred, y_test))

# Standardization

In [None]:
sc = StandardScaler()
x_train_std = sc.fit_transform(x_train)
x_test_std = sc.fit_transform (x_test)
x_train_std[0:5]

# Feature Extraction - PCA(Principal Component Analysis)

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=None, random_state=42)
pca.fit_transform(x_train_std)
pca.fit_transform(x_test_std)
exp_var = pca.explained_variance_ratio_
np.set_printoptions(suppress=True)
print(exp_var)

In [None]:
exp_var = np.round(pca.explained_variance_ratio_*100, decimals=1 )
labels = ['PC'+ str(x) for x in range(1, len(exp_var)+1)]

In [None]:
plt.bar(x=range(1,len(exp_var)+1),height = exp_var, tick_label=labels)
plt.ylabel("percentage of variance explained")
plt.xlabel("principal component")
plt.show()

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=14, random_state=42)

x_train_pca = pca.fit_transform(x_train_std)
x_test_pca = pca.fit_transform(x_test_std)
exp_var = pca.explained_variance_ratio_

np.set_printoptions(suppress=True)
print(exp_var)

In [None]:
print(x_train_pca)

# Parameter Optimization

In [None]:
#?KNeighborsClassifier

In [None]:
knc = KNeighborsClassifier()

In [None]:
knc_params = {"n_neighbors":np.arange(1,50),
             "leaf_size":[2,5,10,20,30,50,80,100]}

In [None]:
knc_tuned = GridSearchCV(knc, knc_params, cv=10, n_jobs=-1, verbose=2, scoring="accuracy").fit(x_train_pca, y_train)

In [None]:
knc_tuned.best_score_

In [None]:
knc_tuned.best_params_

In [None]:
knc_final = KNeighborsClassifier(n_neighbors=25, leaf_size=2).fit(x_train_pca, y_train)
y_pred = knc_final.predict(x_test_pca)
accuracy_score(y_test, y_pred)

# Precision and Recall 

In [None]:
print("Precision:", precision_score(y_test,y_pred))
print("Recall:",recall_score(y_test, y_pred))


In [None]:
print(confusion_matrix(y_test, y_pred))

In [None]:
print('test')
cm=confusion_matrix(y_test, y_pred)
sns.heatmap(cm,center=True,annot=True,fmt='d')

In [None]:
print(classification_report(y_test, y_pred))

In [None]:
print('ROC eğrisindeki alanı:',roc_auc_score(y_test, y_pred))
print("f1 score",f1_score(y_test, y_pred))
print('cohen kappa test',cohen_kappa_score(y_test, y_pred))
