In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Heart Attack Prediction Data

Important Insights

1. This comes as a surprise that in this data the mean age is lesser for higher chance of heart attack

2. Some, features like resting heart rate are indifferent to chances of heart attack

3. Maximum heart rate is directly proportional to the chances of heart attack

4. Oldpeak is negatively correlated with the output

5. For certain categories the chances of heart attack was found high:-

Age = 0
cp = 2,3
thall = 2
caa = 0,4
slp = 2

In [None]:
import pandas as pd
import numpy as np
import os
import re
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.subplots import make_subplots
import matplotlib

Dataset Details:

Age : Age of the patient

Sex : Sex of the patient

exang: exercise induced angina (1 = yes; 0 = no)

ca: number of major vessels (0-3)

cp : Chest Pain type chest pain type

Value 1: typical angina

Value 2: atypical angina

Value 3: non-anginal pain

Value 4: asymptomatic

trtbps : resting blood pressure (in mm Hg)

chol : cholestoral in mg/dl fetched via BMI sensor

fbs : (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false)

restecg : resting electrocardiographic results

Value 0: normal

Value 1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)

Value 2: showing probable or definite left ventricular hypertrophy by Estes' criteria

thalachh : maximum heart rate achieved

output : 0= less chance of heart attack 1= more chance of heart attack

In [None]:
df = pd.read_csv('/kaggle/input/heart-attack-analysis-prediction-dataset/heart.csv')
df.head()

In [None]:
df2 = df.copy()
ot = {0: "Less chance of HA",1:'More chance of HA'}
df2.output = [ot[item] for item in df2.output]

In [None]:
df.isnull().any()

In [None]:
ax = sns.countplot(data=df, x='output',palette=['#85bfdc','#f64c72'])
ax.set(xticklabels=['less chance of heart attack', 'more chance of heart attack'],title="Target Distribution")
ax.tick_params(bottom=False)

In [None]:
fig = px.histogram(df2, x="age",color="output",
                   marginal="box",
                   hover_data=df.columns,
                  color_discrete_sequence=['#f64c72','#85bfdc'])
fig.update_layout(
    title="Heart attack chance corresponding to age"
)
fig.show()

In [None]:
more = df[df['output']==1]['trtbps']
less = df[df['output']==0]['trtbps']
fig = ff.create_distplot([less, more],['less chance of heart attack', 'more chance of heart attack']
                         , show_hist=False, 
                        colors=['#85bfdc','#f64c72'])
fig.update_layout(
    title="Heart Attack chance corresponding to resting heart rate",
    xaxis_title="Resting heart rate",
)
fig.show()

In [None]:
more = df[df['output']==1]['thalachh']
less = df[df['output']==0]['thalachh']
fig = ff.create_distplot([less, more],['less chance of heart attack', 'more chance of heart attack']
                         , bin_size=5,
                        colors=['#85bfdc','#f64c72'])
fig.update_layout(
    title="Heart Attack chance corresponding to maximum heart rate achieved",
    xaxis_title="Maximum heart rate achieved",
)
fig.show()

In [None]:
fig = px.box(df2, x="cp", y="chol",color='output',color_discrete_map={'Less chance of HA':'#85bfdc','More chance of HA':'#f64c72'})
fig.update_layout(title="Effects of cholestrol corresponding to chest pain type on chances of heart attack")
fig.show()

In [None]:
temp = df.drop(['sex','cp','fbs','exng','restecg','exng','thall','caa','slp'], axis=1)
fig, ax = plt.subplots(1, 1, figsize=(6,6))
df_cor = temp.corr()
half = np.triu(np.ones_like(df_cor, dtype=np.bool))

my_colors = ['#85bfdc','#f64c72']
cmap = matplotlib.colors.LinearSegmentedColormap.from_list('Custom', my_colors)

heatmap = sns.heatmap(df_cor, 
            square=True, 
            mask=half,
            linewidth=2.5, 
            vmax=0.4, vmin=0, 
            cmap=cmap, 
            cbar=False, 
            ax=ax,annot=True)

heatmap.set(title="Heatmap of continous variables")
heatmap.set_yticklabels(heatmap.get_xticklabels(), rotation = 0)
heatmap.spines['top'].set_visible(True)
fig.text(1.2, 0.85, '''* thalachh(Maximum heart rate achieved) is positively correlated while,
* oldpeak is negatively correlated with the output ''', 
         fontweight='light', fontfamily='serif', fontsize=11, va='top', ha='right') 

plt.tight_layout()

In [None]:
# Create dimensions
exng = go.parcats.Dimension(
    values=df.exng,label="exng"
)

cp = go.parcats.Dimension(
    values=df.cp,label="cp"
)

fbs = go.parcats.Dimension(
    values=df.fbs,label="fbs"
)

gender_dim = go.parcats.Dimension(values=df.sex, label="sex")

restecg = go.parcats.Dimension(values=df.sex, label="restecg")
thall = go.parcats.Dimension(values=df.sex, label="thall")
caa = go.parcats.Dimension(values=df.sex, label="caa")
slp = go.parcats.Dimension(values=df.sex, label="slp")

survival_dim = go.parcats.Dimension(
    values=df.output, label="Outcome", categoryarray=[0, 1],
    ticktext=['Less chance', 'More chance']
)

# Create parcats trace
color = df.output;
colorscale = [[0, '#85bfdc'], [1, '#f64c72']];

fig = go.Figure(data = [go.Parcats(dimensions=[exng,slp,restecg,fbs,thall,caa,cp,
                                              gender_dim,survival_dim],
        line={'color': color, 'colorscale': colorscale},
        hoveron='color', hoverinfo='count+probability',
        labelfont={'size': 18, 'family': 'Times'},
        tickfont={'size': 16, 'family': 'Times'},
        arrangement='freeform')])
fig.update_layout(title="Plotly parallel categorical plot for all the categorical labels", )
fig.show()

In [None]:
lbs = ['sex','cp','fbs','exng','restecg','thall','caa','slp']

rows = 3
cols = 3

subplot_titles = [l for l in lbs]

specs=[[{"type": "bar"},{"type": "bar"},{"type": "bar"}],
       [{"type": "bar"},{"type": "bar"},{"type": "bar"}],
       [{"type": "bar"},{"type": "bar"},None]]


fig = make_subplots(
        rows=rows,
        cols=cols,
        subplot_titles=subplot_titles,
        specs=specs,  
        print_grid=False
)

for i, b in enumerate(lbs):
    row = i // cols + 1
    col = (i % rows) + 1
    name = lbs[i]
    l = [(100)*df[df[name]==x]['output'].sum()/len(df[df[name]==x]['output']) 
         for x in range(len(df[name].value_counts().tolist()))]
    fig.add_trace(go.Bar(
    x = [x for x in range(len(df[name].value_counts().tolist()))],
    y = l,
    marker_color=['#85bfdc','#9999c9','#aa77aa','#cc6397','#f64c72'],
    ),row=row,col=col)

fig.update_layout(autosize = True,
                  title="Percertage of people having 'more chance of heart attack' for each type", 
                  title_x=0.5,
                 showlegend=False)
fig.show()

For certain categories the chances of heart attack was found high:-

Age = 0
cp = 2,3
thall = 2
caa = 0,4
slp = 2

In [None]:
fig = px.scatter_3d(df2, x='oldpeak', y='thalachh', z='age',
              color='output',size='trtbps',color_discrete_sequence=['#f64c72','#85bfdc'])
fig.show()

In [None]:
fig = px.scatter(df2,
x='thalachh',
y= 'chol',
color='output',
facet_col='cp', 
facet_row='sex',
color_discrete_sequence=['#f64c72','#85bfdc'], 
)

fig.show()

#  Your suggestions will be highly appreciated. Please upvote if you like it...