# **Heart Disease** - Classification Problem

In [None]:
# importing libraries
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings(action="ignore")
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# importing dataset
df = pd.read_csv("/kaggle/input/heart-disease-uci/heart.csv")

In [None]:
# Lets take a look at our dataset
df.head()

Data Atributes:

* age - age in years 
* sex - (1 = male; 0 = female) 
* cp - chest pain type 
* trestbps - resting blood pressure (in mm Hg on admission to the hospital) 
* chol - serum cholestoral in mg/dl 
* fbs - (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false) 
* restecg - resting electrocardiographic results 
* thalach - maximum heart rate achieved 
* exang - exercise induced angina (1 = yes; 0 = no) 
* oldpeak - ST depression induced by exercise relative to rest 
* slope - the slope of the peak exercise ST segment 
* ca - number of major vessels (0-3) colored by flourosopy 
* thal - 3 = normal; 6 = fixed defect; 7 = reversable defect 
* target - have disease or not (1=yes, 0=no)

In [None]:
# checking datatypes of attributes
df.info()

In [None]:
# Statistical data
df.describe()

In [None]:
# Checking for null values
df.isna().sum()

In [None]:
print("Chest Pain types:",df.cp.unique())

In [None]:
df.target.value_counts()

In [None]:
sns.countplot(df["sex"], hue="target", data=df)
plt.show()

In [None]:
df.groupby("target").mean()

# Data Visualization

In [None]:
sns.countplot(df.target, palette=("Blues_d"))
plt.show()

In [None]:
pd.crosstab(df.age,df.target).plot(kind="bar",figsize=(20,8))
plt.xlabel("Age")
plt.ylabel("Frequency")
plt.title("Heart Disease for Different Ages")
plt.show()

In [None]:
plt.scatter(df.age[df.target==1],y= df.thalach[df.target==1], c="red")
plt.scatter(df.age[df.target==0], y=df.thalach[df.target==0])
plt.ylabel("Heart Rate(Max)")
plt.xlabel("Age")
plt.title("Heart Rate vs Age Scatter Plot")
plt.show()

In [None]:
plt.scatter(df.age[df.target==1],y = df.trestbps[df.target==1],c="red")
plt.scatter(df.age[df.target==0], y=df.trestbps[df.target==0])
plt.xlabel("Age")
plt.ylabel("Resting Blood Pressure")
plt.title("Resting Blood Pressure vs Age Scatter Plot")
plt.show()

In [None]:
pd.crosstab(df.slope,df.target).plot(kind="bar",figsize=(10,7))
plt.title('Heart Disease Frequency for Slope')
plt.xlabel('Slope of The Peak Exercise ST Segment')
plt.ylabel('Frequency')
plt.show()

# Feature Engineering

In [None]:
df.head()

In [None]:
a = pd.get_dummies(df['cp'], prefix = "cp")
b = pd.get_dummies(df['thal'], prefix = "thal")
c = pd.get_dummies(df['slope'], prefix = "slope")

In [None]:
frames = [df, a, b, c]
df = pd.concat(frames, axis = 1)
df.head()

In [None]:
df = df.drop(columns = ['cp', 'thal', 'slope'])
df.head()

In [None]:
y = df.target.values
X = df.drop(columns="target")

In [None]:
#Splitting data into training and testing set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=24)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

In [None]:
# Normalizing Data
from sklearn.preprocessing import Normalizer
scaler = Normalizer()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

# Model Selection

### Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
model = LogisticRegression()
model.fit(X_train,y_train)
y_pred = model.predict(X_test)

from sklearn.metrics import classification_report , accuracy_score
report = classification_report(y_test,y_pred)
print(report)
print(accuracy_score(y_test,y_pred))

In [None]:
df1 = pd.DataFrame(y_test,y_pred)
df1.head(10)

### Support Vector Machine Classifier

In [None]:
from sklearn.svm import SVC
model2 = SVC(verbose=True)
model2.fit(X_train,y_train)
y_pred2 = model2.predict(X_test)
report_svc = classification_report(y_test,y_pred2)
print(report_svc)
print(accuracy_score(y_test,y_pred2))

### Decision Tree Classifier

In [None]:
from sklearn.tree import DecisionTreeClassifier
model3 = DecisionTreeClassifier(criterion="entropy",max_depth=200)
model3.fit(X_train,y_train)
y_pred3 = model3.predict(X_test)
report_dtree = classification_report(y_pred3,y_test)
print(report_dtree)
print(accuracy_score(y_test,y_pred3))

In [None]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()
nb.fit(X_train, y_train)
y_pred_nb = nb.predict(X_test)
report_nb = classification_report(y_pred_nb,y_test)
print(report_nb)
acc = nb.score(X_test,y_test)*100
print("Accuracy of Naive Bayes:",acc)

### Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier
model_rf = RandomForestClassifier(n_estimators=1000, random_state=1)
model_rf.fit(X_train,y_train)
y_pred_rf = model_rf.predict(X_test)
report_rf2 = classification_report(y_test,y_pred_rf)
print(report_rf2)
print(accuracy_score(y_pred_rf,y_test))

In [None]:
from sklearn.metrics import confusion_matrix
cm_lr = confusion_matrix(y_test,y_pred)
cm_svc = confusion_matrix(y_test,y_pred2)
cm_dtree = confusion_matrix(y_test,y_pred3)
cm_rf = confusion_matrix(y_test,y_pred_rf)
cm_nb = confusion_matrix(y_test,y_pred_nb)

# Confusion Matrixes

In [None]:
plt.figure(figsize=(24,12))

plt.suptitle("Confusion Matrixes",fontsize=24)
plt.subplots_adjust(wspace = 0.4, hspace= 0.4)

plt.subplot(2,3,1)
plt.title("Logistic Regression Confusion Matrix")
sns.heatmap(cm_lr,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 24})

plt.subplot(2,3,3)
plt.title("Support Vector Machine Confusion Matrix")
sns.heatmap(cm_svc,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 24})

plt.subplot(2,3,4)
plt.title("Naive Bayes Confusion Matrix")
sns.heatmap(cm_nb,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 24})

plt.subplot(2,3,5)
plt.title("Decision Tree Classifier Confusion Matrix")
sns.heatmap(cm_dtree,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 24})

plt.subplot(2,3,6)
plt.title("Random Forest Confusion Matrix")
sns.heatmap(cm_rf,annot=True,cmap="Blues",fmt="d",cbar=False, annot_kws={"size": 24})

plt.show()