In this notebook im going to perform EDA and prediction on the given data .


Features:

    age: Age of the patient

    sex: Sex of the patient

    cp: Chest pain type, 0 = Typical Angina, 1 = Atypical Angina, 2 = Non-anginal Pain, 3 = Asymptomatic

    trtbps: Resting blood pressure (in mm Hg)

    chol: Cholestoral in mg/dl fetched via BMI sensor

    fbs: (fasting blood sugar > 120 mg/dl), 1 = True, 0 = False

    restecg: Resting electrocardiographic results, 0 = Normal, 1 = ST-T wave normality, 2 = showing probable or definite left ventricular hypertrophy by Estes' criteria

    thalachh: Maximum heart rate achieved

    oldpeak: Previous peak

    slp: Slope

    caa: Number of major vessels

    thall: Thalium Stress Test result, (0-3)

    exng: Exercise induced angina, 1 = Yes, 0 = No

    output: 0 = less chance of heart attack, 1 = more chance of heart attack


# Import

In [None]:
import pandas as pd
import seaborn as sns
import  matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder,StandardScaler
from sklearn import svm
from sklearn.metrics import plot_confusion_matrix,precision_score,recall_score,accuracy_score
from sklearn.model_selection import RandomizedSearchCV,GridSearchCV
from sklearn.compose import ColumnTransformer




In [None]:

train=pd.read_csv("/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv")

In [None]:
train.head(5)

In [None]:
print(f"The shape of the data is :{train.shape}")

In [None]:

cat_cols=['sex','cp','fbs','restecg','exng','slp','caa','thall','output']
num_cols=['age','trtbps','chol','thalachh','oldpeak']
print(f"The categorical columns are :{cat_cols}")
print(f"The continuous columns are :{num_cols}")


In [None]:
train[num_cols].describe().transpose()

In [None]:
train.isna().sum()


In [None]:

train.drop_duplicates(inplace =True,keep=False)

# Plotting the data

**Univariate graphs**

In [None]:
plt.style.use(['dark_background'])
fig1,cat_ax=plt.subplots(3,3,gridspec_kw={'hspace':0.5,'wspace':0.5},figsize=(15,10))
fig1.suptitle('Count Plot',fontsize=25)

#sex
cat_ax[0,0].set_title("Sex",fontsize=10)
sns.countplot(x='sex',data=train,ax=cat_ax[0,0])
cat_ax[0,0].set_xlabel(" ")
cat_ax[0,0].set_facecolor("black")

#fbs
cat_ax[0,1].set_title("Fbs",fontsize=10)
sns.countplot(x='fbs',data=train,ax=cat_ax[0,1])
cat_ax[0,1].set_xlabel(" ")
cat_ax[0,1].set_facecolor("black")

#cp
cat_ax[0,2].set_title("Cp",fontsize=10)
sns.countplot(x='cp',data=train,ax=cat_ax[0,2])
cat_ax[0,2].set_xlabel(" ")
cat_ax[0,2].set_facecolor("black")

#restecg
cat_ax[1,0].set_title("Rest Ecg",fontsize=10)
sns.countplot(x='restecg',data=train,ax=cat_ax[1,0])
cat_ax[1,0].set_xlabel(" ")
cat_ax[1,0].set_facecolor("black")

#exng
cat_ax[1,1].set_title("Exng",fontsize=10)
sns.countplot(ax=cat_ax[1,1],x='exng',data=train)
cat_ax[1,1].set_xlabel(" ")
cat_ax[1,1].set_facecolor("black")

#slp
cat_ax[1,2].set_title("Slp",fontsize=10)
sns.countplot(x="slp",data=train,ax=cat_ax[1,2])
cat_ax[1,2].set_xlabel(" ")
cat_ax[1,2].set_facecolor("black")

#caa
cat_ax[2,0].set_title("Caa",fontsize=10)
sns.countplot(x="caa",data=train,ax=cat_ax[2,0])
cat_ax[2,0].set_xlabel(" ")
cat_ax[2,0].set_facecolor("black")

#Thall
cat_ax[2,1].set_title("Thall",fontsize=10)
sns.countplot(x="thall",data=train,ax=cat_ax[2,1])
cat_ax[2,1].set_xlabel(" ")
cat_ax[2,1].set_facecolor("black")

#output
cat_ax[2,2].set_title("Output",fontsize=10)
sns.countplot(x="output",data=train,ax=cat_ax[2,2])
cat_ax[2,2].set_xlabel(" ")
cat_ax[2,2].set_facecolor("black")





In [None]:
fig2,ax=plt.subplots(2,3,gridspec_kw={'hspace':0.5,'wspace':0.5},figsize=(15,10))
fig2.suptitle('Continuous Data',fontsize=25)
fig2.set_facecolor("black")

ax[1,2].spines['bottom'].set_visible(False)
ax[1,2].spines['left'].set_visible(False)
ax[1,2].spines['top'].set_visible(False)
ax[1,2].spines['right'].set_visible(False)
ax[1,2].tick_params(left=False,bottom=False)
ax[1,2].set_xticklabels([])
ax[1,2].set_yticklabels([])
#age
ax[0,0].set_title("Age",fontsize=10)
sns.violinplot(x='age',data=train,ax=ax[0,0])
ax[0,0].set_xlabel(" ")
ax[0,0].set_ylabel(" ")
ax[0,0].set_facecolor("black")

#trtbps
ax[0,1].set_title("Trt bps",fontsize=10)
sns.violinplot(x='trtbps',data=train,ax=ax[0,1])
ax[0,1].set_xlabel(" ")
ax[0,1].set_ylabel(" ")
ax[0,1].set_facecolor("black")

#chol
ax[0,2].set_title("Chol",fontsize=10)
sns.violinplot(x='chol',data=train,ax=ax[0,2])
ax[0,2].set_xlabel(" ")
ax[0,2].set_ylabel(" ")
ax[0,2].set_facecolor("black")

#thalach
ax[1,0].set_title("Thalachh",fontsize=10)
sns.violinplot(x='thalachh',data=train,ax=ax[1,0])
ax[1,0].set_xlabel(" ")
ax[1,0].set_ylabel(" ")
ax[1,0].set_facecolor("black")

#oldpeak
ax[1,1].set_title("Oldpeak",fontsize=10)
sns.violinplot(x='oldpeak',data=train,ax=ax[1,1])
ax[1,1].set_xlabel(" ")
ax[1,1].set_ylabel(" ")
ax[1,1].set_facecolor("black")



**Bivariate Analysis**

In [None]:
fig3,ax=plt.subplots(2,3,gridspec_kw={'hspace':0.5,'wspace':0.5},figsize=(15,10))
fig3.suptitle('Continuous Data & Output',fontsize=25)

ax[0,0].set_title("Age",fontsize=15)
sns.histplot(x='age',hue='output',ax=ax[0,0],data=train)

ax[0,1].set_title("trtbps",fontsize=15)
sns.histplot(x='trtbps',hue='output',ax=ax[0,1],data=train)

ax[0,2].set_title("chol",fontsize=15)
sns.histplot(x='chol',hue='output',ax=ax[0,2],data=train)

ax[1,0].set_title("OldPeak",fontsize=15)
sns.histplot(x='oldpeak',hue='output',ax=ax[1,0],data=train)

ax[1,1].set_title("Thalachh",fontsize=15)
sns.histplot(x='thalachh',hue='output',ax=ax[1,1],data=train)

ax[1,2].spines['bottom'].set_visible(False)
ax[1,2].spines['left'].set_visible(False)
ax[1,2].spines['top'].set_visible(False)
ax[1,2].spines['right'].set_visible(False)
ax[1,2].tick_params(left=False,bottom=False)
ax[1,2].set_xticklabels([])
ax[1,2].set_yticklabels([])





In [None]:
corr_data=train.corr()

In [None]:
fig=plt.figure(figsize=(10,10))

sns.heatmap(corr_data)

In [None]:
corr_data['output']

In [None]:
fig=plt.figure(figsize=(5,5))
sns.barplot(y=corr_data['output'].index,x=corr_data['output'].values)
plt.yticks(fontsize=12)

# Some conclusions:
* The persons above 40 and below 55 are at more severe risk of heart attack.this may be due to the work pressure and anxiety since most of the persons in above age class is working people.
*The variable Chol and oldpeak contains  Outlier.
* Slp,Thalach and cp are directly correletd with output variable.
*  If thalachh is between 150 to 178 their are more chances of heart attack.


# Model Building


**preprocessings**

In [None]:
X=train.drop(['output'],axis=1)
Y=train['output']
train_x,test_x,train_y,test_y=train_test_split(X,Y,test_size=0.2)

In [None]:
ss=StandardScaler()
model=svm.SVC(random_state=21)
pipeline=Pipeline(steps=[('ss',ss),('model',model)])


In [None]:
pipeline.fit(train_x,train_y)


In [None]:

plot_confusion_matrix(pipeline,test_x,test_y)

In [None]:
y_pred=pipeline.predict(test_x)
print(f"Precision:{precision_score(test_y,y_pred)} \n Recall:{recall_score(test_y,y_pred)} \n Accuracy:{accuracy_score(test_y,y_pred)}")

In [None]:
params=dict(model__C=[1,10,100],
          model__kernel=['rbf','poly','linear'],
          model__gamma=[0.001,0.01,0.1])
search=GridSearchCV(pipeline,param_grid=params)
search.fit(train_x,train_y)

In [None]:
pipeline.get_params().keys()

In [None]:
y_pred_search=search.predict(test_x)

In [None]:
plot_confusion_matrix(search,test_x,test_y)

In [None]:
y_pred_search=search.predict(test_x)
print(f"Precision:{precision_score(test_y,y_pred_search)} \n Recall:{recall_score(test_y,y_pred_search)} \n Accuracy:{accuracy_score(test_y,y_pred_search)}")