In [None]:
import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import scale, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier

In [None]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 
warnings.filterwarnings("ignore", category=FutureWarning) 

In [None]:
df=pd.read_csv('../input/cardiovascular-disease-dataset/cardio_train.csv', delimiter=';')
df=df.drop(['id'],axis=1)

## Dataset Structure

In [None]:
df.sample(5)

In [None]:
df.describe().T

In [None]:
y = df['cardio']
X = df.drop(['cardio'], axis = 1)

In [None]:
y.head(5)

In [None]:
X.head(5)

## Logistic Regression

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                   y,
                                                   test_size=0.2,
                                                   random_state=42)

#### Model

In [None]:
log_model = LogisticRegression(solver = "liblinear").fit(X_train,y_train)

#### Prediction Values

In [None]:
y_pred = log_model.predict(X_test)
y_pred[:10]

#### Real Values

In [None]:
y_test[:10]

#### Accuracy Rate

In [None]:
log_rate = accuracy_score(y_test, y_pred)
print(log_rate)

## K-Nearest Neighbors - KNN

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                   y,
                                                   test_size=0.2,
                                                   random_state=42)

#### Model

In [None]:
knn_model = KNeighborsClassifier().fit(X_train, y_train)

#### Prediction Values

In [None]:
y_pred = knn_model.predict(X_test)
y_pred[:10]

#### Real Values

In [None]:
y_test[:10]

#### Accuracy Rate

In [None]:
knn_rate = accuracy_score(y_test, y_pred)
print(knn_rate)

## Support Vector Machines - SVM

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                   y,
                                                   test_size=0.2,
                                                   random_state=42)

#### Model

In [None]:
svm_model = SVC(kernel = "linear").fit(X_train, y_train)

#### Prediction Values

In [None]:
y_pred = svm_model.predict(X_test)
y_pred[:10]

#### Real Values

In [None]:
y_test[:10]

#### Accuracy Rate

In [None]:
svm_rate = accuracy_score(y_test, y_pred)
print(svm_rate)

## Artificial Neural Network - MLPClassifier

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                   y,
                                                   test_size=0.2,
                                                   random_state=42)

#### Model

In [None]:
scaler = StandardScaler()

In [None]:
scaler.fit(X_train)
X_train = scaler.transform(X_train)

In [None]:
scaler.fit(X_test)
X_test = scaler.transform(X_test)

In [None]:
mlpc_model = MLPClassifier().fit(X_train, y_train)

#### Prediction Values

In [None]:
y_pred = mlpc_model.predict(X_test)
y_pred[:10]

#### Real Values

In [None]:
y_test[:10]

#### Accuracy Rate

In [None]:
mlpc_rate = accuracy_score(y_test, y_pred)
print(mlpc_rate)

## Classification and Regression Tree - CART


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                   y,
                                                   test_size=0.2,
                                                   random_state=42)

#### Model

In [None]:
cart_model = DecisionTreeClassifier().fit(X_train, y_train)

#### Prediction Values

In [None]:
y_pred = cart_model.predict(X_test)
y_pred[:10]

#### Real Values

In [None]:
y_test[:10]

#### Accuracy Rate

In [None]:
cart_rate = accuracy_score(y_test, y_pred)
print(cart_rate)

# Random Forests

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.30, 
                                                    random_state=42)

#### Model

In [None]:
rf_model = RandomForestClassifier().fit(X_train, y_train)

#### Prediction Values

In [None]:
y_pred = rf_model.predict(X_test)
y_pred[:10]

#### Real Values

In [None]:
y_test[:10]

#### Accuracy Rate

In [None]:
rf_rate = accuracy_score(y_test, y_pred)
print(rf_rate)

## Comparison of Classification Algorithms

In [None]:
result = []
results = pd.DataFrame(columns= ["Models","Accuracy"])

result = pd.DataFrame([["LogisticRegression", log_rate*100]], columns= ["Models","Accuracy"])
results = results.append(result)

result = pd.DataFrame([["KNeighborsClassifier", knn_rate*100]], columns= ["Models","Accuracy"])
results = results.append(result)

result = pd.DataFrame([["SVC", svm_rate*100]], columns= ["Models","Accuracy"])
results = results.append(result)

result = pd.DataFrame([["MLPClassifier", mlpc_rate*100]], columns= ["Models","Accuracy"])
results = results.append(result)

result = pd.DataFrame([["DecisionTreeClassifier", cart_rate*100]], columns= ["Models","Accuracy"])
results = results.append(result)

result = pd.DataFrame([["RandomForestClassifier", rf_rate*100]], columns= ["Models","Accuracy"])
results = results.append(result)

In [None]:
sns.barplot(x= 'Accuracy', y = 'Models', data=results, color="r")
plt.xlabel('Accuracy %')
plt.title(' Comparison of Classification Algorithms');

In [None]:
results