# Import Libraries

In [None]:
!pip install dtreeviz

In [None]:
import numpy as np 
import pandas as pd 
import seaborn as sns
import dtreeviz
import pickle
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import plot_tree
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score, confusion_matrix, ConfusionMatrixDisplay, classification_report

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
import warnings
warnings.filterwarnings("ignore")

# Load Data

In [None]:
df = pd.read_csv('/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv')

In [None]:
df.head(10)

# Data Visualization

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.describe().T

In [None]:
sns.heatmap(df.isnull())

In [None]:
df.plot()

In [None]:
sns.histplot(data=df, x='age', bins=40, color='blue')

In [None]:
women=df.loc[df.sex==0]
w=len(women)

In [None]:
men=df.loc[df.sex==1]
m=len(men)

In [None]:
total=len(df['sex'])

In [None]:
rate_women=(w/total)*100
rate_men=(m/total)*100

In [None]:
p=[rate_women, rate_men]
labels=['Women Percentage', 'Men Percenatge']
plt.pie(p, labels=labels, autopct='%3.f%%')

In [None]:
plt.figure(figsize=(25,10))
sns.heatmap(df.corr(), annot=True)

In [None]:
sns.pairplot(df, hue='output')

In [None]:
x = df.iloc[:, :13]
x

In [None]:
x = x.values
x

In [None]:
y = df.iloc[:, -1]
y

In [None]:
y = y.values
y

# Train & Test Splitting the Data

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

# Function to Measure Performance

In [None]:
def perform(y_pred):
    print("Precision : ", precision_score(y_test, y_pred))
    print("Recall : ", recall_score(y_test, y_pred))
    print("Accuracy : ", accuracy_score(y_test, y_pred))
    print("F1 Score : ", f1_score(y_test, y_pred))
    print('')
    print(confusion_matrix(y_test, y_pred), '\n')
    cm = ConfusionMatrixDisplay(confusion_matrix=confusion_matrix(y_test, y_pred))
    cm.plot()

# Logistic Regression

In [None]:
model_lr = LogisticRegression()
model_lr.fit(x_train, y_train)

In [None]:
y_pred_lr = model_lr.predict(x_test)

In [None]:
perform(y_pred_lr)

In [None]:
print(classification_report(y_test, y_pred_lr))

# Decision Tree 

In [None]:
model_dt = DecisionTreeClassifier()
model_dt.fit(x_train, y_train)

In [None]:
y_pred_dt = model_lr.predict(x_test)

In [None]:
perform(y_pred_dt)

In [None]:
print(classification_report(y_test, y_pred_dt))

In [None]:
filename = 'Heart_Attack_Prediction_DT.h5'
pickle.dump(model_dt, open(filename, 'wb'))

In [None]:
feature_names = df.columns[0:13]
viz = df.copy()
viz["output"]=viz["output"].values.astype(str)
print(viz.dtypes)
target_names = viz['output'].unique().tolist()

In [None]:
target_names

In [None]:
plt.figure(figsize=(25, 20))
plot_tree(model_dt, feature_names = feature_names, class_names = target_names, filled = True, rounded = False)
plt.savefig('tree_visualization.png')

# Random Forest 

In [None]:
model_rf = RandomForestClassifier(n_estimators = 10, criterion = 'entropy', random_state = 0)
model_rf.fit(x_train, y_train)

In [None]:
y_pred_rf = model_rf.predict(x_test)

In [None]:
perform(y_pred_rf)

In [None]:
print(classification_report(y_test, y_pred_rf))