In [None]:
import numpy as numpy
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
!pip install pywaffle
from pywaffle import Waffle

from sklearn import tree
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from yellowbrick.classifier import ClassificationReport
from yellowbrick.classifier import ConfusionMatrix



In [None]:
df1 = pd.read_csv("../input/heart-attack-analysis-prediction-dataset/heart.csv")
df1.info()

In [None]:
df1.head()

In [None]:
df1 = df1.rename(columns={"cp": "ChestPain", "trtbps": "restingBloodPressure", "chol": "cholestoral", "fbs" : "fastingBloodSugar", "restecg": "restElectro", "thalachh": "maxHeartRate", "exng": "exeInducedAngina", "caa" :"NoOfMajorVessels", "thall": "presentState"})
df1.head()

In [None]:
df1.corr()

In [None]:
#HeatMap
plt.figure(figsize=(15,10))
sns.heatmap(df1.corr(),cmap='hot_r')

In [None]:
heart_attack = df1["output"].value_counts().values[0]
heart_attack_non = df1["output"].value_counts().values[1]

heart_attack_pct = int(round(heart_attack/(heart_attack + heart_attack_non) * 100, 2))
heart_attack_non_pct = int(round(heart_attack_non/(heart_attack + heart_attack_non) * 100, 2))

In [None]:
# Heart Attack Pct
fig = plt.figure(
    FigureClass=Waffle,
    rows=10, 
    columns=10,
    values = [heart_attack_pct, heart_attack_non_pct],
    title = {
        'label': 'Heart Attack Rate',
        'loc': 'Left',
        'fontdict': {
            'fontsize': 10
        }
    },
    labels = ['Heart attack ' + str(heart_attack_pct) + '%', 'No heart attack ' + str(heart_attack_non_pct) + '%'],
    legend= {
        'loc': 'lower left',
        'bbox_to_anchor': (0, -0.2),
        'fontsize': 10
    }
)


In [None]:
# distribution with age
import plotly.express as px

fig = px.histogram(df1, x='age', color='output', hover_data=df1.columns, nbins=20)
fig.show()

In [None]:
#heart attack rate for each gender
male_pct = int(df1[df1['sex']==0]['output'].sum() / len(df1[df1['sex']==0]['output']) * 100)
female_pct = int(df1[df1['sex']==1]['output'].sum() / len(df1[df1['sex']==1]['output']) * 100)

fig = plt.figure(
    FigureClass=Waffle,
    figsize=(10, 10),
    rows=10,
    vertical=False,
    plots = {
        '121': {
            'values': [male_pct, 100 - male_pct],
            'colors':["#4C8CB5", "#B7CBD7"],
            'vertical': True,
            'labels': ['Heart attack in male ' + str(male_pct) + "%", 'Heart attack not in male ' + str(100-male_pct) + "%"],
            'legend': {
                'loc': 'lower left',
                'bbox_to_anchor': (0, -0.2),
            }
        },
        '122': {
            'values': [female_pct, 100 - female_pct],
            'colors':["#c23454", "#e0a2b0"],
            'vertical': True,
            'labels': ['Heart attack in female ' + str(female_pct) + "%", 'Heart attack not in female ' + str(100-female_pct) + "%"],
            'legend': {
                'loc': 'lower left',
                'bbox_to_anchor': (0, -0.2),
            }
        }
    }
)


In [None]:
fig = plt.figure(figsize=(15,10))
fig = plt.subplot(1,2,1)
sns.histplot(df1[df1['output'] == 0]['cholestoral'], color='#4C8CB5', label='No heart Disease', kde=True)
sns.histplot(df1[df1['output'] == 1]['cholestoral'], color="#c23454", label='Heart Disease', kde=True)
plt.legend()


fig = plt.subplot(1,2,2)
sns.histplot(df1[df1['output'] == 0]['restingBloodPressure'], color='#4C8CB5', label='No heart Disease', kde=True)
sns.histplot(df1[df1['output'] == 1]['restingBloodPressure'], color="#c23454", label='Heart Disease', kde=True)

plt.legend()
plt.show()



In [None]:
#heart attack distribution for each heart disease
cp_1 = round(df1[df1['ChestPain']==0]['output'].sum() / len(df1[df1['ChestPain']==0]['output']) * 100, 2)
cp_2 = round(df1[df1['ChestPain']==1]['output'].sum() / len(df1[df1['ChestPain']==1]['output']) * 100, 2)
cp_3 = round(df1[df1['ChestPain']==2]['output'].sum() / len(df1[df1['ChestPain']==2]['output']) * 100, 2)
cp_4 = round(df1[df1['ChestPain']==3]['output'].sum() / len(df1[df1['ChestPain']==3]['output']) * 100, 2)

fig = plt.figure(
    FigureClass=Waffle,
    figsize=(20, 20),
    rows=10,
    columns=10,
    vertical=False,
    plots = {
        '141': {
            'values': [cp_1, 100 - cp_1],
            'colors':["#4C8CB5", "#B7CBD7"],
            'vertical': True,
            'labels': ['Heart attack with CP1 ' + str(cp_1) + "%", 'No heart attact with CP1 ' + str(round(100-cp_1, 2)) + "%"],
            'legend': {
                'loc': 'lower left',
                'bbox_to_anchor': (0, -0.2),
            }
        },
        '142': {
            'values': [cp_2, 100 - cp_2],
            'colors':["#c23454", "#e0a2b0"],
            'vertical': True,
            'labels': ['Heart attack with CP2 ' + str(cp_2) + "%", 'No heart attact with CP2 ' + str(round(100-cp_2, 2)) + "%"],
            'legend': {
                'loc': 'lower left',
                'bbox_to_anchor': (0, -0.2),
            }
        },
        '143': {
            'values': [cp_3, 100 - cp_3],
            'colors':["#46c764", "#b8e6c2"],
            'vertical': True,
            'labels': ['Heart attack with CP3 ' + str(cp_3) + "%", 'No heart attact with CP3 ' + str(round(100-cp_3, 2)) + "%"],
            'legend': {
                'loc': 'lower left',
                'bbox_to_anchor': (0, -0.2),
            }
        },
        '144': {
            'values': [cp_4, 100 - cp_4],
            'colors':["#cfe84f", "#e8e7c1"],
            'vertical': True,
            'labels': ['Heart attack with CP4 ' + str(cp_4) + "%", 'No heart attact with CP4 ' + str(round(100-cp_4, 2)) + "%"],
            'legend': {
                'loc': 'lower left',
                'bbox_to_anchor': (0, -0.2),
            }
        },
    }
)


In [None]:
#Predictions
y = df1['output']
x = df1.drop('output', axis=1)
x.head()

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.8, test_size=0.2, random_state=0)

In [None]:
models = [
    tree.DecisionTreeClassifier(max_depth=10),
    RandomForestClassifier(n_estimators=15),
    KNeighborsClassifier(),
    GaussianNB(),
    MLPClassifier(alpha=1, max_iter=1000),
    LogisticRegression(max_iter=1000)
]   

classes = [
    'Heart Attack',
    'No Heart Attack'
]

def visualize_matrix():
    visualizer = ConfusionMatrix(model, classes=classes)
    visualizer.fit(x_train, y_train)
    visualizer.score(x_test, y_test)
    visualizer.show()

for model in models:
    visualize_matrix()


In [None]:
def visualize_classification():
    visualizer = ClassificationReport(model, classes=classes, support=True)
    visualizer.fit(x_train, y_train)
    visualizer.score(x_test, y_test)
    visualizer.show()

for model in models:
    visualize_classification()

In [None]:
from sklearn.metrics import accuracy_score
names = [
    'DecisionTree', 'RandomForestClassifier', 'KNeighborsClassifier', 'GaussianNB', 'MLPClassifier', 'LogisticRegression'
]

def model_score():
    model.fit(x_train, y_train)

    y_predict = model.predict(x_train)
    y_valid = model.predict(x_test)

    print('Training set',model.score(x_train, y_train))
    print('Test set',model.score(x_test, y_test))
    
for model, name in zip(models, names):
    print(name)
    model_score()