In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
logmodel = LogisticRegression()
from sklearn.metrics import accuracy_score,confusion_matrix
from sklearn.metrics import classification_report
from sklearn.preprocessing  import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [None]:
df = pd.read_csv('../input/heart-attack-analysis-prediction-dataset/heart.csv')

In [None]:
df.info()

In [None]:
df.head()

In [None]:
#1 age - age in years
#2 sex - sex (1 = male; 0 = female)
#3 cp - chest pain type (1 = typical angina; 2 = atypical angina; 3 = non-anginal pain; 0 = asymptomatic)
#4 trestbps - resting blood pressure (in mm Hg on admission to the hospital)
#5 chol - serum cholestoral in mg/dl
#6 fbs - fasting blood sugar > 120 mg/dl (1 = true; 0 = false)
#7 restecg - resting electrocardiographic results (1 = normal; 2 = having ST-T wave abnormality; 0 = hypertrophy)
#8 thalach - maximum heart rate achieved
#9 exang - exercise induced angina (1 = yes; 0 = no)
#10 oldpeak - ST depression induced by exercise relative to rest
#11 slp - the slope of the peak exercise ST segment (2 = upsloping; 1 = flat; 0 = downsloping)
#12 caa - number of major vessels (0-4) colored by flourosopy
#13 thall - 2 = normal; 1 = fixed defect; 3 = reversable defect
#14 output - the predicted attribute - diagnosis of heart disease (angiographic disease status) (Value 0 = < diameter narrowing; Value 1 = > 50% diameter narrowing)

In [None]:
df['heart_attack'] = df['output'] 
df.drop('output',inplace=True,axis=1)

In [None]:
df['heart_attack'].value_counts()

In [None]:
reoutput = {0: 'No heart disease', 1: 'Heart disease'}
df['heart_attack'] = df['heart_attack'].map(reoutput)

fig = px.pie(df['heart_attack'].value_counts().reset_index(), values = 'heart_attack', names = 'index', width = 800, height = 800)
fig.update_traces(textposition = 'inside', 
                  textinfo = 'percent + label', 
                  hole = 0.8, 
                  marker = dict(colors = ['#8d230f','#1e434c'], line = dict(color = 'white', width = 2)))

fig.update_layout(annotations = [dict(text = 'Distribution of  heart disease  in patients', 
                                      x = 0.5, y = 0.5, font_size = 20, showarrow = False, 
                                      font_family = 'monospace',
                                      font_color = 'black')],
                  showlegend = False)
                  

In [None]:
fig = plt.figure(figsize=(13,12))
plt.subplot(221)
sns.countplot(data=df,x='heart_attack',palette='pastel')
plt.title('heart-attack Vs Non heart-attack')
plt.subplot(222)
sns.kdeplot(data=df['heart_attack'],fill=True,alpha =0.5)
plt.title('heart-attack Vs Non heart-attack')
plt.tight_layout()
plt.show()

In [None]:
#Sex Column
fig = plt.figure(figsize=(13.5,14))
gs = fig.add_gridspec(3,4)
sns.set_style("darkgrid")
sns.set_context("poster", font_scale = 0.8)
plt.subplot(221)
sns.countplot(x='sex',data=df,palette='coolwarm')
plt.title('Sex Distribution')
sns.despine()
plt.subplot(222)
sns.countplot(x='sex', hue='heart_attack', data=df,palette='coolwarm')
plt.title('Sex Vs Heart-Attack')
sns.despine()
plt.subplot(223)
sns.countplot(hue='sex', x='cp', data=df,palette='coolwarm')
plt.title('Sex Vs Chest Pain')
plt.subplot(224)
sns.countplot(hue='sex', x='exng', data=df,palette='coolwarm')
plt.title('Sex Vs Exng')
plt.tight_layout()
plt.show()

In [None]:
fig = plt.figure(figsize=(14.5,13))
plt.subplot(222)
sns.boxplot(x='cp',y='oldpeak',data=df,palette='coolwarm')
plt.title('Chest Pain Vs Angime')
plt.subplot(221)
sns.boxplot(x='cp',y='age',data=df,palette='coolwarm')
plt.title('Chest Pain Vs Age')
plt.subplot(223)
sns.boxplot(y='trtbps', x='cp', data=df,palette='coolwarm')
plt.title('Chest Pain Vs Blood Pressure')
plt.subplot(224)
sns.boxplot(y='thalachh', x='cp', data=df,palette='coolwarm')
plt.title('Chest Pain Vs Heart Rate')
plt.tight_layout()
plt.show()

In [None]:
fig, axes = plt.subplots(3, 2, figsize=(15, 21))
fig.suptitle('Heart-Attack Vs Non Heart-Attack')
sns.kdeplot(ax=axes[0,0], x='age',data=df[df['heart_attack']==0],hue='sex',fill=True,alpha =0.8,palette='pastel')
sns.kdeplot(ax=axes[0,1], x='age',data=df[df['heart_attack']==1],hue='sex',fill=True,alpha =0.5,palette='colorblind')
sns.kdeplot(ax=axes[1,0], x='age',data=df[df['heart_attack']==0],hue='exng',fill=True,alpha =0.5,palette='colorblind')
sns.kdeplot(ax=axes[1,1], x='age',data=df[df['heart_attack']==1],hue='exng',fill=True,alpha =0.6,palette='pastel')
sns.kdeplot(ax=axes[2,0], x='age',data=df[df['heart_attack']==0],hue='restecg',fill=True,alpha =0.5,palette='colorblind')
sns.kdeplot(ax=axes[2,1], x='age',data=df[df['heart_attack']==1],hue='restecg',fill=True,alpha =0.5,palette='colorblind')

In [None]:
#Chol column
fig = plt.figure(figsize=(15,16))
plt.subplot(221)
sns.kdeplot(x='chol',data=df,hue='heart_attack',y='oldpeak',alpha =0.8,fill=True)
plt.title('oldpeak Vs Chol')
plt.subplot(222)
sns.kdeplot(x='chol',data=df,hue='heart_attack',y='trtbps',alpha =0.8,fill=True)
plt.title('trtbps Vs Chol')
plt.subplot(223)
sns.kdeplot(x='chol',data=df,y='age',hue='heart_attack',alpha =0.8,fill=True)
plt.title('Age Vs Chol')
plt.subplot(224)
sns.kdeplot(x='chol',data=df,y='thalachh',hue='heart_attack',alpha =0.8,fill=True)
plt.title('Heart Rate Vs Chol')

In [None]:
#exng Column
plt.figure(figsize=(15,14))
sns.set_style('darkgrid')
plt.subplot(221)
sns.histplot(x='trtbps',data=df,hue='exng',alpha =0.8,fill=True)
plt.title('trtbps Vs exng')
plt.subplot(222)
sns.histplot(x='thalachh',data=df,hue='exng',fill=True,alpha =0.8,palette='colorblind')
plt.title('Heart Rate Vs exng')
plt.subplot(223)
sns.histplot(x='age',data=df,hue='exng',fill=True,alpha =0.8,palette='colorblind')
plt.title('Age Vs exng')
plt.subplot(224)
sns.histplot(x='chol',data=df,hue='exng',fill=True,alpha =0.8,palette='colorblind')
plt.title('Chol Vs exng')
plt.tight_layout()

In [None]:
label1 = ['Male','Female']
label2 = ['Heart-Attack','No Heart-Attack']
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=label1, values=df['sex'].value_counts(), name='Sex'),
              1, 1)
fig.add_trace(go.Pie(labels=label2, values=df['heart_attack'].value_counts(), name='heart-attack'),
              1, 2)
# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.65, hoverinfo="label+value+name",marker = dict(colors = ['#ffa600','#ff6361'], line = dict(color = 'white', width = 2)))
fig.update_layout(
    title_text="Categorical Distribution",
    
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='heart-attack', x=0.84, y=0.5, font_size=20, showarrow=False),
                 dict(text='Sex', x=0.18, y=0.5, font_size=20, showarrow=False)])
fig.show()

In [None]:
label = ['typical angina','non-anginal pain','asymptomatic','atypical angina']
# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie( values=df['caa'].value_counts(), name="Major Vessels"),
              1, 1)
fig.add_trace(go.Pie(labels=label, values=df['cp'].value_counts(), name="Chest Pain"),
              1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.65, hoverinfo="label+value+name")
fig.update_layout(
    title_text="Categorical Distribution",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Major Vessels', x=0.87, y=0.5, font_size=20, showarrow=False),
        dict(text='Chest-Pain', x=0.15, y=0.5, font_size=20, showarrow=False)
                 ])
fig.show()

In [None]:
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(values=df['restecg'].value_counts(), name=" Resting electrocardiographic"),
              1, 1)
fig.add_trace(go.Pie( values=df['exng'].value_counts(), name="Exercise induced angina "),
              1, 2)
# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.65, hoverinfo="label+value+name",marker = dict(colors = ['#bc5090','#ff6361'], line = dict(color = 'white', width = 2)))
fig.update_layout(
    title_text="Rest-Energy And Exercise Induced-Engime",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Restecg ', x=0.17, y=0.5, font_size=20, showarrow=False),
                 dict(text='Exng', x=0.80, y=0.5, font_size=20, showarrow=False)])
fig.show()

In [None]:
plt.style.use("fivethirtyeight")
fig = plt.figure(figsize=(16,8))
plt.subplot(211)
sns.lineplot(y='thalachh',x='age',data=df,hue='heart_attack',palette='colorblind',lw=3,ls='-')
plt.title('Heart Rate Vs Age')
plt.subplot(212)
sns.kdeplot(data=df,x='thalachh',hue='heart_attack',alpha =0.5,fill=True)
plt.tight_layout()

In [None]:
#Thus With increase in Age heart Rate decreases

In [None]:
plt.style.use("fivethirtyeight")
fig = plt.figure(figsize=(16,8))
plt.subplot(211)
sns.lineplot(y='trtbps',x='age',data=df,hue='heart_attack',palette='colorblind',lw=3,ls='-')
plt.title('Blood Pressure Vs Age')
plt.subplot(212)
sns.kdeplot(data=df,x='trtbps',hue='heart_attack',alpha =0.5,fill=True)
plt.tight_layout()

In [None]:
#Model Building

In [None]:
#Random Forest

In [None]:
X = df.drop('heart_attack',axis=1)
y = df['heart_attack']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=101)
rfc = RandomForestClassifier(n_estimators=1000)
rfc.fit(X_train, y_train)
rfc_pred = rfc.predict(X_test)
print('\n')
print('Random Forest Accuracy score is: ',accuracy_score(y_test,rfc_pred)*100)
k=accuracy_score(y_test,rfc_pred)*100
print('\n')
print(confusion_matrix(y_test,rfc_pred))
print('\n')
print(classification_report(y_test,rfc_pred))

In [None]:
#KNN ALGORITHM

In [None]:
scale= StandardScaler()
scale.fit(df.drop('heart_attack',axis=1))
scaled_features = scale.transform(df.drop('heart_attack',axis=1))
df_feat = pd.DataFrame(scaled_features,columns=df.columns[:-1])
X = df_feat
y = df['heart_attack']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,random_state=101)
knn = KNeighborsClassifier(n_neighbors=1)
pred = knn.fit(X_train,y_train)
pred = knn.predict(X_test)
print('\n')
print('KNN Accuracy score is: ',accuracy_score(y_test,pred)*100)
print('\n')
print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))

In [None]:
#Choose KNN Value
fig = plt.figure(figsize=(17,6))
sns.set_context("poster", font_scale = 0.7)
sns.set_style("darkgrid")
error = []
for i in range(1,40):
    knn = KNeighborsClassifier(n_neighbors=i)
    knn.fit(X_train,y_train)
    pred_i = knn.predict(X_test)
    error.append(np.mean(pred_i != y_test))

sns.set_style('whitegrid')
plt.plot(range(1,40),error,color='blue', linestyle='dashed', marker='o',
         markerfacecolor='red', markersize=10)
plt.title('Error Rate vs. K Value')
plt.xlabel('K')
plt.ylabel('Error Rate')

In [None]:
#Best KNN Accuracy Score
knn = KNeighborsClassifier(n_neighbors=13)
pred = knn.fit(X_train,y_train)
pred = knn.predict(X_test)
#print('LogisticRegression score is: ',np.round(model.score(y_test,pred)*100,decimals=2))
print('\n')
print('Best KNN Accuracy score is: ',accuracy_score(y_test,pred)*100)
print('\n')
m=accuracy_score(y_test,pred)*100

print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))

In [None]:
#Logistics Regression

In [None]:
X = df.drop('heart_attack',axis=1)
y = df['heart_attack']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30,random_state=101)
logmodel = LogisticRegression()
logmodel.fit(X_train,y_train)
predictions = logmodel.predict(X_test)
print('\n')
print('Logistics Accuracy score is: ',accuracy_score(y_test,predictions)*100)
p=accuracy_score(y_test,predictions)*100
print('\n')
print(confusion_matrix(y_test,predictions))
print('\n')
print(classification_report(y_test,predictions))

In [None]:
cm = confusion_matrix(y_test, predictions)
fig, ax = plt.subplots(figsize=(10, 7))
ax.imshow(cm)
ax.grid(False)
ax.xaxis.set(ticks=(0, 1), ticklabels=('Predicted 0s', 'Predicted 1s'))
ax.yaxis.set(ticks=(0, 1), ticklabels=('Actual 0s', 'Actual 1s'))
ax.set_ylim(1.5, -0.5)
for i in range(2):
    for j in range(2):
        ax.text(j, i, cm[i, j], ha='center', va='center', color='white')
plt.show()

In [None]:
#Vector Machine

In [None]:
X = df.drop('heart_attack',axis=1)
y = df['heart_attack']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,random_state=101)
model = SVC()
model.fit(X_train,y_train)
pred = model.predict(X_test)
print('Vector Machine Accuracy score is: ',accuracy_score(y_test,pred)*100)
print('\n')
print(confusion_matrix(y_test,pred))
print('\n')
print(classification_report(y_test,pred))

In [None]:
#Grid Search
#Choosing C and Gamma values
param_grid = {'C': [0.1,1, 10, 100, 1000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} 
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=3)
grid.fit(X_train,y_train)
print(grid.best_params_)
print(grid.best_estimator_)
print(grid.best_score_)

In [None]:
grid_pred = grid.predict(X_test)
print('Best Vector Machine Accuracy score is: ',accuracy_score(y_test,grid_pred)*100)
g=accuracy_score(y_test,grid_pred)*100
print('\n')
print(confusion_matrix(y_test,grid_pred))
print('\n')
print(classification_report(y_test,grid_pred))

In [None]:
label = ['Random Forest','K Nearest Neighbours','Logistics Regression','Vector Machine']
fig = px.pie(labels=label,values=[k,m,p,g], names = label,width = 800, height = 700)
fig.update_traces(textposition = 'inside', 
                  textinfo = 'percent + label', 
                  hole = 0.8, 
                  marker = dict(colors = ['#8d230f','#ff6361','#ffa600','#bc5090'], line = dict(color = 'white', width = 2)))

fig.update_layout(annotations = [dict(text = 'Performance Comaprison', 
                                      x = 0.5, y = 0.5, font_size = 28, showarrow = False, 
                                      font_family = 'monospace',
                                      font_color = 'black')],
                  showlegend = False)
                  

In [None]:
#Thanks