Let us see the columns description
age - Age of the patient

sex - Sex of the patient

cp - Chest pain type ~ 0 = Typical Angina, 1 = Atypical Angina, 2 = Non-anginal Pain, 3 = Asymptomatic

trtbps - Resting blood pressure (in mm Hg)

chol - Cholestoral in mg/dl fetched via BMI sensor

fbs - (fasting blood sugar > 120 mg/dl) ~ 1 = True, 0 = False

restecg - Resting electrocardiographic results ~ 0 = Normal, 1 = ST-T wave normality, 2 = Left ventricular hypertrophy

thalachh - Maximum heart rate achieved

oldpeak - Previous peak

slp - Slope

caa - Number of major vessels

thall - Thalium Stress Test result ~ (0,3)

exng - Exercise induced angina ~ 1 = Yes, 0 = No

output - Target variable

## Our Task is To perform EDA and predict if a person is prone to a heart attack or not.

In [None]:
## Lets us import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

In [None]:
df=pd.read_csv("/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv")

In [None]:
df.head()

In [None]:
##Shape pf dataset
df.shape

In [None]:
##Dataset description
df.describe()

In [None]:
##Lets check the null values
df.isnull().sum()
###There are no null values in our dataset

In [None]:
#Lets check the unique values in our dataset
dict = {}
for i in list(df.columns):
    dict[i] = df[i].value_counts().shape[0]

pd.DataFrame(dict,index=["unique count"]).transpose()

In [None]:
##Lets check the data types of our columns
df.dtypes

In [None]:
## Univariate analysis
ax = sns.countplot(x="sex", data=df,
                   facecolor=(0, 0, 0, 0),
                   linewidth=5,
                   edgecolor=sns.color_palette("dark", 3))
ax.set(xlabel='sex', ylabel='count')
### The count of male is more in out dataset

In [None]:
ax = sns.countplot(x="exng", data=df,
                   facecolor=(0, 0, 0, 0),
                   linewidth=5,
                   edgecolor=sns.color_palette("dark", 2))
ax.set(xlabel='exng', ylabel='count')

In [None]:
# Lets plot age (Continous variable)
plt.hist(x='age',bins=20,histtype='step',color='#28B463',data=df)
### Our dataset contains age group between 40-70

In [None]:
## trtbps plotting
plt.hist(x='trtbps',bins=20,histtype='barstacked',color='#9B59B6',data=df)

In [None]:
##chol plotting
plt.hist(x='chol',bins=20,histtype='stepfilled',color='#B9770E',data=df)
## We can observe outliers as well

In [None]:
ax = sns.countplot(x="caa", data=df,
                   facecolor=(0, 0, 0, 0),
                   linewidth=5,
                   edgecolor=sns.color_palette("dark", 5))
ax.set(xlabel='caa', ylabel='count')

In [None]:
ax = sns.countplot(x="cp", data=df,
                   facecolor=(0, 0, 0, 0),
                   linewidth=5,
                   edgecolor=sns.color_palette("dark", 4))
ax.set(xlabel='cp', ylabel='count')

In [None]:
ax = sns.countplot(x="restecg", data=df,
                   facecolor=(0, 0, 0, 0),
                   linewidth=5,
                   edgecolor=sns.color_palette("dark", 3))
ax.set(xlabel='restecg', ylabel='count')

In [None]:
plt.hist(x='oldpeak',bins=20,color='#3498DB',data=df)

In [None]:
### Correlation
corr = df.corr()
corr.style.background_gradient(cmap='coolwarm')

In [None]:
plt.figure(figsize=(20,25), facecolor='white')
plotnumber = 1

for column in df:
    if plotnumber<=9 :
        ax = plt.subplot(3,3,plotnumber)
        sns.stripplot(df['output'],df[column])
    plotnumber+=1
plt.tight_layout()

In [None]:
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict
from sklearn.preprocessing import scale
from sklearn import model_selection
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_curve
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler 
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, roc_auc_score
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
cat_col = ['sex','exng','caa','cp','fbs','restecg','slp','thall']
con_col = ["age","trtbps","chol","thalachh","oldpeak"]

In [None]:
df1 =df

In [None]:
df1 = pd.get_dummies(df1,columns=cat_col,drop_first=True)

In [None]:
df1.head()

In [None]:
X = df1.drop(['output'],axis=1)
y = df1['output']

In [None]:
y.head()

In [None]:
## Scaling
scaler = StandardScaler()
### Lets scale the continous variable column
X[con_col] = scaler.fit_transform(X[con_col])

In [None]:
X.head()

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=52)

In [None]:
X_test.head()

In [None]:
###Lets start with using algorithm:Support Vector Machine
svm = SVC(kernel='sigmoid',C=1,random_state=42).fit(X_train,y_train)

In [None]:
svm

In [None]:
y_pred = svm.predict(X_test)

In [None]:
accuracy_score(y_test,y_pred)
### The accuracy score is 81.31%

In [None]:
### Lets do hyper parameter tuning
param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear','sigmoid','rbf'], 'gamma': [0.001, 0.0001,0.0005]}]

In [None]:
param_grid

In [None]:
### GridSearchCV
gridsearch = GridSearchCV(svm,param_grid)

In [None]:
gridsearch

In [None]:
gridsearch.fit(X_train,y_train)

In [None]:
gridsearch.best_params_

In [None]:
svm = SVC(kernel='linear',C=10,gamma=0.001,random_state=42).fit(X_train,y_train)

In [None]:
svm.score(X_train,y_train)

In [None]:
y_pred = svm.predict(X_test)

In [None]:
accuracy_score(y_test,y_pred)
## Our accuracy is reduced after hyper paramter tuning.. we need to tweak the values

In [None]:
### In meanwhile lets try to apply LogisticRegression Algorithm
log_reg = LogisticRegression()
log_reg.fit(X_train,y_train)

In [None]:
y_pred = log_reg.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test,y_pred)
accuracy
### Accuracy is 81.31%

In [None]:
# Confusion Matrix
conf_mat = confusion_matrix(y_test,y_pred)
conf_mat

In [None]:
dtc = DecisionTreeClassifier()

In [None]:
dtc

In [None]:
dtc.fit(X_train,y_train)

In [None]:
y_pred = dtc.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test,y_pred)
accuracy
### Accuracy is 79.12%

In [None]:
### Random Forest Classifier
rf = RandomForestClassifier()

In [None]:
rf

In [None]:
rf.fit(X_train, y_train)

In [None]:
y_pred_rf = rf.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test,y_pred_rf)
accuracy
### Accuracy is 79.12%

In [None]:
### Gradient boosting Classifier without tuning
gbc = GradientBoostingClassifier(n_estimators = 300,max_depth=1,subsample=0.8,max_features=0.2,random_state=42)

In [None]:
gbc.fit(X_train,y_train)

In [None]:
y_pred = gbc.predict(X_test)

In [None]:
accuracy_score(y_test, y_pred)
### 81.31% Accuracy