* Lets import the libraries

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()

* **Loading the data set**

In [None]:
data = pd.read_csv('../input/heart-attack-analysis-prediction-dataset/heart.csv')
data.head()

In [None]:
data.describe().transpose()

* **Checking for null values**

In [None]:
data.isnull().sum()

**We can see that there are no null values in the dataset**

In [None]:
cont_var=["age","trtbps","chol","thalachh","oldpeak"]
cate_var=['sex','exng','caa','cp','fbs','restecg','slp','thall']
print("continoius colums are: ",cont_var)
print("categorical colums are: ",cate_var)

* **Checking for correlations**

In [None]:
corrMat = data[cont_var].corr().transpose()
corrMat

* **Visualising the correlation matrix**

In [None]:
fig= plt.subplots(figsize=(10,10))
sns.heatmap(data[cont_var].corr(), annot = True, fmt='.1g')

**Performing variable analysis**

In [None]:
sns.kdeplot(data=data, x='age',hue="output", fill=True,palette=["#8000ff","#da8829"], alpha=.5, linewidth=0)

In [None]:
sns.kdeplot(data=data, x='cp',hue="output", fill=True,palette=["#8000ff","#da8829"], alpha=.5, linewidth=0)

In [None]:
sns.kdeplot(data=data, x='trtbps',hue="output", fill=True,palette=["#8000ff","#da8829"], alpha=.5, linewidth=0)

In [None]:
sns.kdeplot(data=data, x='age',hue="output", fill=True,palette=["#8000ff","#da8829"], alpha=.5, linewidth=0)

In [None]:
sns.kdeplot(data=data, x='thalachh',hue="output", fill=True,palette=["#8000ff","#da8829"], alpha=.5, linewidth=0)

In [None]:
sns.kdeplot(data=data, x='caa',hue="output", fill=True,palette=["#8000ff","#da8829"], alpha=.5, linewidth=0)

In [None]:
 sns.kdeplot(data=data, x='oldpeak',hue="output", fill=True,palette=["#8000ff","#da8829"], alpha=.5, linewidth=0)

In [None]:
 sns.kdeplot(data=data, x='chol',hue="output", fill=True,palette=["#8000ff","#da8829"], alpha=.5, linewidth=0)

**INSIGHTS:**

* **No significance correlation between the variables**.

* **Age doesn't seem to be directly causing heart attacks**

* **People with chest pain type 2 seems to be at greater risk**  

* **People with old peak 0 seems to be at greater risk** 

* **People with caa 0 seems to be at greater risk**

 **PREPROCESSING**

In [None]:
unscaled_inputs = data.drop(['output'],axis=1)
targets =data['output']
data_with_dummies = pd.get_dummies(unscaled_inputs, drop_first = True)
from sklearn import preprocessing 
scaledInputs=preprocessing.scale(data_with_dummies)
samples_count=data_with_dummies.shape[0]
data_with_dummies


 **Train-Test-Validation split**

In [None]:
 from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(scaledInputs,targets, test_size = 0.2, random_state = 365)
print("The shape of X_train is      ", X_train.shape)
print("The shape of X_test is       ",X_test.shape)
print("The shape of y_train is      ",y_train.shape)
print("The shape of y_test is       ",y_test.shape)

**Performing Logistic Rergession**

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
logistic_model=LogisticRegression()
logistic_model.fit(X_train, y_train)
logistic_acc = logistic_model. score (X_test, y_test)
print ("accuracy = ",logistic_acc)

**Kernal SVM**

In [None]:
from sklearn.svm import SVC
clf = SVC(kernel='rbf', random_state=42).fit(X_train,y_train)
y_pred = clf.predict(X_test)
svm_acc=accuracy_score(y_test, y_pred)

**KNN**

In [None]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn_acc = accuracy_score(y_test, knn.predict(X_test))
print(f"Accuracy Score of Training Data is {accuracy_score(y_train, knn.predict(X_train))}")
print(f"Accuracy Score of Test Data is {knn_acc}\n")

**Naive Bayes**

In [None]:
from sklearn.naive_bayes import GaussianNB
nbc=GaussianNB()
nbc.fit(X_train, y_train)
nbc_acc = accuracy_score(y_test, nbc.predict(X_test))
print(f"Accuracy Score of Training Data is {accuracy_score(y_train, nbc.predict(X_train))}")
print(f"Accuracy Score of Test Data is {nbc_acc}\n")

**Random Forest**

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf.fit(X_train, y_train)
rf_acc = accuracy_score(y_test, rf.predict(X_test))
print(f"Accuracy Score of Training Data is {accuracy_score(y_train, rf.predict(X_train))}")
print(f"Accuracy Score of Test Data is {rf_acc}\n")

**Comparing the prediction results**

In [None]:
models = pd.DataFrame({
    'Model' : ['Logistic Regression','SVM', 'KNN','Navie Bayes','Random Forest'],
    'Score' : [logistic_acc,svm_acc,knn_acc,nbc_acc,rf_acc]
})
models.sort_values(by = 'Score', ascending = False)
plt.figure(figsize = (6, 6))
sns.barplot(x = 'Score', y = 'Model', data = models)

plt.show()