In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import pandas as pd

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
heart_df = pd.read_csv('../input/heart-attack-analysis-prediction-dataset/heart.csv')

In [None]:
heart_df.head()

In [None]:
heart_df.describe()

In [None]:
heart_df.dtypes

In [None]:
heart_df.isnull().sum()

In [None]:
plt.figure(figsize=(20, 10))
plt.xlabel("Age")
sns.countplot(x=heart_df['age'])

In [None]:
# Sex distribution
plt.figure(figsize=(20, 10))
plt.subplot(1, 2, 1)
sex_labels = ['Male', 'Female']
sex_explode = (.1, .1)
plt.pie(heart_df['sex'].value_counts(), labels=sex_labels, startangle=90, shadow=True, explode=sex_explode,
       autopct='%1.1f%%', colors=['blue', 'pink'])
plt.title('Sexs of patients')
plt.legend(loc='lower right')

plt.subplot(1, 2, 2)
heart_df['sex'].value_counts().plot(kind='bar', color=['blue', 'pink'])
plt.ylabel('Count')
plt.xticks()

In [None]:
cp_data = heart_df['cp'].value_counts().reset_index()
cp_data['index'][1] = 'typical'
cp_data['index'][2] = 'atypical'
cp_data['index'][3] = 'non-anginal'
cp_data['index'][0] = 'asymtomatic'
cp_data

In [None]:
# Chest pain distribution
plt.figure(figsize=(20, 10))
plt.subplot(1, 2, 1)
cp_labels = ['asymtomatic', 'typical', 'atypical', 'non-anginal']
cp_explode = (.1, .1, .1, .1)
plt.pie(heart_df['cp'].value_counts(), labels=cp_labels, startangle=90, shadow=True, explode=cp_explode,
       autopct='%1.1f%%')
plt.title('Type of chest pain')
plt.legend(loc='lower right')

plt.subplot(1, 2, 2)
sns.barplot(x=cp_data['index'], y=cp_data['cp'])
plt.ylabel('Count')
plt.xticks()

In [None]:
# Blood pressure
plt.figure(figsize=(20, 10))
plt.subplot(1, 2, 1)
sns.distplot(heart_df['trtbps'], color='purple')
plt.xlabel('resting blood pressure (in mm Hg)')

# Maximum heart rate
plt.subplot(1, 2, 2)
sns.distplot(heart_df['thalachh'], color='teal')
plt.xlabel('maximum heart rate achieved')

In [None]:
# cholestoral
plt.figure(figsize=(20, 10))
plt.subplot(1, 2, 1)
sns.distplot(heart_df['chol'], color='yellow')
plt.xlabel('cholestoral in mg/dl fetched via BMI sensor')

# fasting blood sugar
plt.subplot(1, 2, 2)
fbs_labels = ['False', 'True']
fbs_explode = (.1, .1)
plt.pie(heart_df['fbs'].value_counts(), labels=fbs_labels, startangle=90, shadow=True, explode=fbs_explode,
       autopct='%1.1f%%', colors=['lime', 'grey'])
plt.title('fasting blood sugar > 120 mg/dl')
plt.legend(loc='lower right')

In [None]:
ecg_data = heart_df['restecg'].value_counts().reset_index()
ecg_data['index'][1] = 'normal'
ecg_data['index'][2] = 'having ST-T'
ecg_data['index'][0] = 'hypertrophy'
ecg_data

In [None]:
# resting electrocardiographic
plt.figure(figsize=(20, 10))
plt.subplot(1, 2, 1)
ecg_labels = ['hypertrophy', 'normal', 'having ST-T']
ecg_explode = (.1, .1, .1)
plt.pie(heart_df['restecg'].value_counts(), labels=ecg_labels, startangle=90, shadow=True, explode=ecg_explode,
       autopct='%1.1f%%')
plt.title('resting electrocardiographic results')
plt.legend(loc='lower right')

plt.subplot(1, 2, 2)
sns.barplot(x=ecg_data['index'], y=ecg_data['restecg'])
plt.ylabel('Count')
plt.xticks()

In [None]:
# Heatmap correlation
plt.figure(figsize=(20, 10))
sns.heatmap(heart_df.corr(), annot=True, cmap='jet')

In [None]:
# Pairplot
plt.figure(figsize=(20, 10))
sns.pairplot(heart_df, hue='output')

# Analysis


1. age - age in years

2. sex - sex (1 = male; 0 = female)

3. cp - chest pain type (1 = typical angina; 2 = atypical angina; 3 = non-anginal pain; 0 = asymptomatic)

4. trestbps - resting blood pressure (in mm Hg on admission to the hospital)

5. chol - serum cholestoral in mg/dl

6. fbs - fasting blood sugar > 120 mg/dl (1 = true; 0 = false)

7. restecg - resting electrocardiographic results (1 = normal; 2 = having ST-T wave abnormality; 0 = hypertrophy)

8. thalach - maximum heart rate achieved

9. exang - exercise induced angina (1 = yes; 0 = no)

10. oldpeak - ST depression induced by exercise relative to rest

11. slope - the slope of the peak exercise ST segment (2 = upsloping; 1 = flat; 0 = downsloping)

12. ca - number of major vessels (0-3) colored by flourosopy

13. thal - 2 = normal; 1 = fixed defect; 3 = reversable defect

14. num - the predicted attribute - diagnosis of heart disease (angiographic disease status) (Value 0 = < diameter narrowing; Value 1 = > 50% diameter narrowing)

In [None]:
import plotly.figure_factory as ff

In [None]:
ff_age = ff.create_distplot([heart_df[heart_df.output==1].age, heart_df[heart_df.output==0].age],
                           ['Heart Diease', 'No Heart Diease'], colors=['red', 'blue'])
ff_age.update_layout(title='Distribution of Heart Disease wrt Age', xaxis_title='Age')
ff_age.show()

In [None]:
ff_age = ff.create_distplot([heart_df[heart_df.output==1].trtbps, heart_df[heart_df.output==0].trtbps],
                           ['Heart Diease', 'No Heart Diease'], colors=['red', 'blue'])
ff_age.update_layout(title='Distribution of Heart Disease wrt resting blood pressure (in mm Hg)', xaxis_title='resting blood pressure')
ff_age.show()

In [None]:
ff_age = ff.create_distplot([heart_df[heart_df.output==1].thalachh, heart_df[heart_df.output==0].thalachh],
                           ['Heart Diease', 'No Heart Diease'], colors=['red', 'blue'])
ff_age.update_layout(title='Distribution of Heart Disease wrt maximum heart rate achieved', xaxis_title='maximum heart rate achieved')
ff_age.show()

In [None]:
ff_age = ff.create_distplot([heart_df[heart_df.output==1].chol, heart_df[heart_df.output==0].chol],
                           ['Heart Diease', 'No Heart Diease'], colors=['red', 'blue'])
ff_age.update_layout(title='Distribution of Heart Disease wrt cholestoral', xaxis_title='cholestoral in mg/dl fetched via BMI sensor')
ff_age.show()

In [None]:
ff_age = ff.create_distplot([heart_df[heart_df.output==1].oldpeak, heart_df[heart_df.output==0].oldpeak],
                           ['Heart Diease', 'No Heart Diease'], colors=['red', 'blue'])
ff_age.update_layout(title='Distribution of Heart Disease wrt oldpeak', xaxis_title='Oldpeak')
ff_age.show()

In [None]:
sex_data = heart_df[['sex', 'output']]
sex_data['sex'] = sex_data['sex'].replace([1], 'male')
sex_data['sex'] = sex_data['sex'].replace([0], 'female')
sex_data['output'] = sex_data['output'].replace([1], 'Heart Disease')
sex_data['output'] = sex_data['output'].replace([0], 'No Heart Disease')
sns.catplot(x='sex', data=sex_data, kind='count', hue='output')

In [None]:
cp_data = heart_df[['cp', 'output']]
cp_data['cp'] = cp_data['cp'].replace([1], 'typical')
cp_data['cp'] = cp_data['cp'].replace([2], 'atrypical')
cp_data['cp'] = cp_data['cp'].replace([3], 'non-anginal')
cp_data['cp'] = cp_data['cp'].replace([0], 'asymtomatic')
cp_data['output'] = cp_data['output'].replace([1], 'Heart Disease')
cp_data['output'] = cp_data['output'].replace([0], 'No Heart Disease')
sns.catplot(x='cp', data=cp_data, kind='count', hue='output')

In [None]:
fbs_data = heart_df[['fbs', 'output']]
fbs_data['fbs'] = fbs_data['fbs'].replace([1], '>120 mg/dl')
fbs_data['fbs'] = fbs_data['fbs'].replace([0], '<=120 mg/dl')
fbs_data['output'] = fbs_data['output'].replace([1], 'Heart Disease')
fbs_data['output'] = fbs_data['output'].replace([0], 'No Heart Disease')
sns.catplot(x='fbs', data=fbs_data, kind='count', hue='output')

In [None]:
ecg_data = heart_df[['restecg', 'output']]
ecg_data['restecg'] = ecg_data['restecg'].replace([1], 'normal')
ecg_data['restecg'] = ecg_data['restecg'].replace([2], 'having ST-T')
ecg_data['restecg'] = ecg_data['restecg'].replace([0], 'hypertrophy')
ecg_data['output'] = ecg_data['output'].replace([1], 'Heart Disease')
ecg_data['output'] = ecg_data['output'].replace([0], 'No Heart Disease')
sns.catplot(x='restecg', data=ecg_data, kind='count', hue='output')