## Predictor

age - Age of the patient  
  
sex - Sex of the patient  

cp - Chest pain type ~ 0 = Typical Angina, 1 = Atypical Angina, 2 = Non-anginal Pain, 3 = Asymptomatic

trtbps - Resting blood pressure (in mm Hg)

chol - Cholestoral in mg/dl fetched via BMI sensor

fbs - (fasting blood sugar > 120 mg/dl) ~ 1 = True, 0 = False

restecg - Resting electrocardiographic results ~ 0 = Normal, 1 = ST-T wave normality, 2 = Left ventricular hypertrophy

thalachh - Maximum heart rate achieved

oldpeak - Previous peak

slp - Slope

caa - Number of major vessels

thall - Thalium Stress Test result ~ (0,3)

exng - Exercise induced angina ~ 1 = Yes, 0 = No

output - Target variable

In [None]:
#Importing basic libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set_style("whitegrid")
plt.rcParams['figure.figsize']=[8,6]

In [None]:
#Loading the dataset
df = pd.read_csv("../input/heart-attack-analysis-prediction-dataset/heart.csv")

In [None]:
#Checking first few rows of our dataset
df.head()

In [None]:
#Checking column datatype and shape of dataset
df.info()

In [None]:
#Checking the summary of numeric columns
df.describe()

## Data Visualization

In [None]:
#Comparing the age distribution of patients who had heart attack and those who didn't
sns.histplot(data=df,x="age",hue="output",bins=30, kde=True)
plt.title("Distribution of age in output class")
plt.show()

In [None]:
#Comparing the population of different sex in each class
sns.histplot(data=df,x="age",hue="sex",bins=30, kde=True)
plt.title("Distribution of age based on sex")
plt.show()

In [None]:
#Infering the effect of maximum heart rate achieved on occurence of heart attack
sns.histplot(data=df,x="thalachh",hue="output",bins=30, kde=True)
plt.title("Maximum Heart Rate achieved in each output class")
plt.show()

In [None]:
#Resting blood pressure vs occurence of heart-attack for different sex class
sns.boxplot(data=df,x="output",y="trtbps",hue="sex")
plt.title("Resting blood pressure for each gender and output class")
plt.show()

In [None]:
#Previous peak heart rate vs occurence of heart-attack for different sex class
sns.boxplot(data=df,x="sex",y="oldpeak",hue="output")
plt.title("Previous peak heart rate for each gender and output class")
plt.show()

In [None]:
sns.countplot(data=df,x="sex",hue="output")
plt.title("Population distribution of output in each gender class")
plt.show()

In [None]:
sns.countplot(data=df,x="fbs",hue="output")
plt.title("Population distribution of output in fbs class")
plt.show()

In [None]:
sns.countplot(data=df,x="exng",hue="output")
plt.title("Population distribution of output in exercise induced cases")
plt.show()

In [None]:
#Infering the effect of maximum heart rate achieved on occurence of heart attack
sns.histplot(data=df,x="chol",hue="output",bins=40, kde=True)
plt.title("Distribution of cholestrol level in output class")
plt.show()

In [None]:
sns.boxplot(data=df,x="sex",y="chol",hue="output")
plt.title("Cholestrol levels in sex and output class")
plt.show()

In [None]:
sns.countplot(data=df,x="cp",hue="output")
plt.title("Population distribution of output class in chest pain category")
plt.show()

In [None]:
sns.countplot(data=df,x="thall",hue="output")
plt.title("Population Distribution of output class according to Thallium Stress Test Results")
plt.show()

In [None]:
sns.countplot(data=df,x="slp",hue="output")
plt.title("Population Distribution of output class according to slope")
plt.show()

In [None]:
fig=plt.figure(figsize=(15,10))
fig=sns.heatmap(df.corr(),annot=True,cmap='RdYlGn_r')
plt.title("Correlation Matrix")
plt.show()

## Data Manipulation

In [None]:
df.replace({"cp":{0: "typical",1: "atypical", 2: "non-anginal", 3: "asymptomatic"}},inplace=True)

In [None]:
df=df.join(pd.get_dummies(df.cp,drop_first=True))
df.drop('cp',axis=1,inplace=True)

In [None]:
df.replace({"restecg":{0 : "Normal", 1 : "ST-T", 2 : "Left_ventricular-hypertrophy"}},inplace=True)

In [None]:
df=df.join(pd.get_dummies(df.restecg,drop_first=True))
df.drop('restecg',axis=1,inplace=True)

## Model Building - Training and Test Data

In [None]:
# Importing model building libraries
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score
from sklearn.metrics import roc_curve,roc_auc_score

In [None]:
X=df.drop("output",axis=1)
y=df.output

In [None]:
X_train, X_test, y_train, y_test=train_test_split(X,y,test_size=0.3,random_state=101)

## Logistic Regression

In [None]:
model=LogisticRegression(max_iter=500,solver='liblinear',random_state=101)

In [None]:
model.fit(X_train,y_train)

In [None]:
X_train_pred=model.predict(X_train)

In [None]:
print(classification_report(y_train, X_train_pred))

In [None]:
print(classification_report(y_test, model.predict(X_test)))

In [None]:
X_test_prob=model.predict_proba(X_test)[:,1]

In [None]:
scores=pd.DataFrame()

In [None]:
scores.loc[:,["Cutoff","Accuracy","Precision","Recall","F1_Score"]]=""

In [None]:
for i in range(101):
    x=i/100
    cutoff=x
    accuracy=accuracy_score(y_test,np.where(X_test_prob>=x,1,0))
    precision=precision_score(y_test,np.where(X_test_prob>=x,1,0))
    recall=recall_score(y_test,np.where(X_test_prob>=x,1,0))
    f1=f1_score(y_test,np.where(X_test_prob>=x,1,0))
    scores.loc[i,:]=[cutoff,accuracy,precision,recall,f1]

In [None]:
scores

In [None]:
plt.figure(figsize=[15,8])
scores.Accuracy.plot()
scores.Precision.plot()
scores.Recall.plot()
scores.F1_Score.plot()
plt.legend()
plt.show()

In [None]:
print(classification_report(y_test, model.predict(X_test)))

In [None]:
print(classification_report(y_test, np.where(model.predict_proba(X_test)[:,1]>=0.44, 1, 0)))

In [None]:
a1,b1,c1=roc_curve(y_train,model.predict_proba(X_train)[:,1])
a2,b2,c2=roc_curve(y_test,model.predict_proba(X_test)[:,1])

In [None]:
plt.figure(dpi=100)
train_auc=round(roc_auc_score(y_train,model.predict(X_train)),2)
test_auc=round(roc_auc_score(y_test,model.predict(X_test)),2)
plt.plot(a1,b1,label="Training Data - AUC: {}".format(train_auc))
plt.plot(a2,b2,label="Testing Data - AUC: {}".format(test_auc))
plt.legend()
plt.show()

In [None]:
X_train_pred_new=np.where(model.predict_proba(X_train)[:,1]>=0.44,1,0)
X_test_pred_new=np.where(model.predict_proba(X_test)[:,1]>=0.44,1,0)

In [None]:
a1,b1,c1=roc_curve(y_train,model.predict(X_train))
a2,b2,c2=roc_curve(y_train,X_train_pred_new)

In [None]:
x1,y1,z1=roc_curve(y_test,model.predict(X_test))
x2,y2,z2=roc_curve(y_test,X_test_pred_new)

In [None]:
plt.figure(figsize=[12,6],dpi=200)
plt.subplot(1,2,1)

train_auc=round(roc_auc_score(y_train,model.predict(X_train)),2)
train_auc_new=round(roc_auc_score(y_train,X_train_pred_new),2)
plt.plot(a1,b1,label="Cutoff - 0.5 - AUC: {}".format(train_auc))
plt.plot(a2,b2,label="Cutoff - 0.44 - AUC: {}".format(train_auc_new))
plt.title("Training Set")
plt.legend()

plt.subplot(1,2,2)
test_auc=round(roc_auc_score(y_test,model.predict(X_test)),2)
test_auc_new=round(roc_auc_score(y_test,X_test_pred_new),2)
plt.plot(x1,y1,label="Cutoff - 0.5 - AUC: {}".format(test_auc))
plt.plot(x2,y2,label="Cutoff - 0.44 - AUC: {}".format(test_auc_new))
plt.title("Testing Set")

plt.legend()
plt.show()

In [None]:
coeffs=pd.DataFrame(list(model.coef_[0]),index=list(X.columns),columns=["Co-efficient"])
coeffs.sort_values(by="Co-efficient",inplace=True,ascending=False)
coeffs

In [None]:
plt.figure(dpi=100)
sns.barplot(y=coeffs.index,x=coeffs['Co-efficient'])
plt.show()