# Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns
import sys
if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")
sns.set_theme(style="darkgrid")
from sklearn.metrics import accuracy_score, precision_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.naive_bayes import BernoulliNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, log_loss
from sklearn.model_selection import train_test_split

# Reading the csv file

In [None]:
dataset = pd.read_csv("/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv")

# Some information about the dataset

In [None]:
dataset.info()

In [None]:
dataset.head()

# Correlation of attributes

In [None]:
colormap = plt.cm.viridis
plt.figure(figsize=(15,15))
plt.title('Dataset Correlation of attributes', y=1.05, size=19)
sns.heatmap(dataset.corr(),linewidths=0.1,vmax=1.0, 
            square=True, cmap=colormap, linecolor='white', annot=True)

# Total count of heart attack

In [None]:
sns.countplot(x="output", data=dataset)



# Bivariate analysis 

* ###  Pain type and heart attack

#### Reminding

##### cp : Chest Pain type chest pain type
* Value 1: typical angina
* Value 2: atypical angina
* Value 3: non-anginal pain
* Value 4: asymptomatic

In [None]:
plt.figure(figsize=(12,4))
sns.set_color_codes()
sns.barplot(dataset["cp"],dataset["output"])



* ### Age and heart attack

In [None]:
plt.figure(figsize=(18,5))
sns.set_color_codes()
sns.distplot(dataset["age"])

In [None]:
sns.catplot(x="output", y="age", data=dataset)

* ### Age and cp

In [None]:
plt.figure(figsize=(12,4))
sns.set_color_codes()
sns.boxplot(y=dataset["age"],x=dataset["cp"])

* ### Heart attack and by gender

In [None]:
pd.crosstab(dataset['output'],dataset['sex']).plot(kind="bar",stacked=True)

In [None]:
col = ["age", "trtbps", "chol", "thalachh", "oldpeak", "output"]
g = sns.pairplot(dataset[col], diag_kind="kde", hue='output')
g.map_lower(sns.kdeplot, levels=4, color=".2")
plt.show()

# Modelling

In [None]:
X = dataset.iloc[:,:-1].values
y = dataset.iloc[:,-1:].values

In [None]:

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size = 0.33, random_state = 0)

In [None]:
sc = StandardScaler()
X_train = sc.fit_transform(x_train)
X_test = sc.fit_transform(x_test)



In [None]:
model_list = [LogisticRegression(random_state = 0), 
              SVC(probability=True), 
              GaussianNB(), 
              BernoulliNB(), 
              KNeighborsClassifier(n_neighbors = 3, metric = 'minkowski'), 
              DecisionTreeClassifier(random_state = 0), 
              RandomForestClassifier(n_estimators = 10, criterion = 'entropy'),
              AdaBoostClassifier(n_estimators = 50),
              XGBClassifier(n_estimators = 100),
             ]



In [None]:
log_cols=["Classifier", "Accuracy", "Log Loss"]
log = pd.DataFrame(columns=log_cols)

model_name = []
accuracy = []

for clf in model_list:
    clf.fit(X_train, y_train)
    name = clf.__class__.__name__
    model_name.append(name)
    print("="*60)
    print(name, 'Classification Reports')
    train_predictions = clf.predict(X_test)
    acc = accuracy_score(y_test, train_predictions)
    
    ll = log_loss(y_test, train_predictions)
    log_entry = pd.DataFrame([[name, acc*100, ll]], columns=log_cols)
    log = log.append(log_entry)
    print(classification_report(y_test, train_predictions))
    accuracy.append(acc)
    
    print("Log Loss: {}".format(ll))
    categories = ['No', 'Yes']
    cm = confusion_matrix(y_test, train_predictions)  
    sns.heatmap(cm, cmap = 'Blues', fmt = '', annot = True,
                xticklabels = categories, yticklabels = categories)

    plt.xlabel("Predicted values", fontdict = {'size':14}, labelpad = 10)
    plt.ylabel("Actual values"   , fontdict = {'size':14}, labelpad = 10)
    plt.title ("{} Confusion Matrix".format(name), fontdict = {'size':18}, pad = 20)
    plt.show()
       
print("="*60)

## Comparison

In [None]:
df = pd.DataFrame({'model_name': model_name, 'accuracy':accuracy}, index=model_name)

df.plot.barh(figsize=(9,5))

for index, value in enumerate(accuracy):
    plt.text(value, index,
             str(value)[:4]+"%")
    
plt.title("Models and Accuracy")

plt.show()
