In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from pandas_profiling import ProfileReport
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import SVC

import warnings
warnings.filterwarnings("ignore")
plt.style.use('ggplot')

# Data Preprocessing

In [None]:
df = pd.read_csv("/kaggle/input/heart-disease-uci/heart.csv")
df.index.rename('id', inplace = True)
df.drop_duplicates(inplace = True)
df.sample(10)

In [None]:
#Amount of Missing Data
percent_missing = df.isnull().sum() * 100 / len(df)
missing_value_df = pd.DataFrame({'column_name': df.columns,
                                 'percent_missing': percent_missing})
missing_value_df

# Data Distribution

In [None]:
#Disribution of Target
print(df.target.value_counts())
sns.countplot(df['target'])

In [None]:
#Distibution of Continous Variables
df.hist(figsize = (10,10))

# Split data to Train and Test

In [None]:
#Split Train and Test Data
x = df.drop(columns = "target")
y = df["target"]
x_train,x_test,y_train,y_test=train_test_split(x, y, test_size=0.25,random_state= 37)

# Data Scaling

In [None]:
#Standardize Data
SS = StandardScaler()
SS.fit(x_train)
x_train = SS.transform(x_train)
x_test = SS.transform(x_test)

In [None]:
# #Normalize Data
# Norm = MinMaxScaler()
# Norm.fit(x_train)
# x_train = Norm.transform(x_train)
# x_test = Norm.transform(x_test)

# Train Model

## K Neighbor

In [None]:
train_accuracy = []
test_accuracy = []
for i in range(2,10):
    KN_model = KNeighborsClassifier(n_neighbors = i)
    KN_model.fit(x_train,y_train)
    KN_predictions = KN_model.predict(x_test)
    
    train_accuracy.append(KN_model.score(x_train, y_train))
    test_accuracy.append(KN_model.score(x_test, y_test))
    
    print("for n =",i ,":",accuracy_score(y_test,KN_predictions))
print(classification_report(y_test, KN_predictions))

#Generate plot
plt.title('k-NN Varying number of neighbors')
plt.plot(range(2,10), test_accuracy, label='Testing Accuracy')
plt.plot(range(2,10), train_accuracy, label='Training accuracy')
plt.legend()
plt.xlabel('Number of neighbors')
plt.ylabel('Accuracy')
plt.show()

## Support Vector Machine(SVM)

In [None]:
SVM_model = SVC()
SVM_model.fit(x_train, y_train)
SVM_predictions = SVM_model.predict(x_test)
print(accuracy_score(y_test,SVM_predictions))
print(classification_report(y_test, SVM_predictions))

## Random Forrest

In [None]:
RF_model = RandomForestClassifier()
RF_model.fit(x_train, y_train)
RF_predictions = RF_model.predict(x_test)
print(accuracy_score(y_test,RF_predictions))
print(classification_report(y_test, RF_predictions))

## Decision Tree

In [None]:
DT_model = DecisionTreeClassifier()
DT_model.fit(x_train,y_train)
DT_predictions = DT_model.predict(x_test)
print(accuracy_score(y_test,DT_predictions))
print(classification_report(y_test, DT_predictions))