In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
filepath = "../input/heart-attack-analysis-prediction-dataset/heart.csv"
df = pd.read_csv(filepath)
df.head()

# About the DataSet
###### Age : Age of the patient
###### Sex : Sex of the patient
###### exang: exercise induced angina (1 = yes; 0 = no)
###### ca: number of major vessels (0-3)
###### cp : Chest Pain type chest pain type
   ######   - Value 1: typical angina
   ######   - Value 2: atypical angina
   ######   - Value 3: non-anginal pain
   ######   - Value 4: asymptomatic
###### trtbps : resting blood pressure (in mm Hg)
###### chol : cholestoral in mg/dl fetched via BMI sensor
###### fbs : (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false)
###### rest_ecg : resting electrocardiographic results
   ###### - Value 0: normal
   ###### - Value 1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)
   ###### - Value 2: showing probable or definite left ventricular hypertrophy by Estes' criteria
   ###### thalach : maximum heart rate achieved
###### target : 0= less chance of heart attack 1= more chance of heart attack

# THE PURPOSE OF THIS NOTEBOOK IS NOT TO ANALYZE OR PREDICT THE OUTPUT, JUST TO EXPLORE THE DIFFERENT THINGS WE CAN DO USING SEABORN AND TO SEE HOW THE PLOT CHANGES USING DIFFERENT FEATURES.

### Let's introduce the Seaborn Library using the Heart Attack dataset. 

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

The target of this dataset is the "output" variable, so we are going to make some plots in order to extract some insights without using any Machine Learning algorithm.

##  1 - Scatterplots & Regplots - Looking for relationship between features

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20,6))
sns.regplot(data = df, x = 'age', y = 'chol', ax=axes[0])
axes[0].set_xlabel('Age')
axes[0].set_ylabel('Cholesterol (mg/dl)')

sns.regplot(data= df, x= 'age', y = 'trtbps', ax=axes[1])
axes[1].set_xlabel('Age')
axes[1].set_ylabel('Resting blood pressure (mmHg)')
plt.show()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20,6))
sns.scatterplot(data = df, x = 'age', y = 'chol', ax=axes[0], hue='sex')
axes[0].set_xlabel('Age')
axes[0].set_ylabel('Cholesterol (mg/dl)')

sns.scatterplot(data= df, x= 'age', y = 'trtbps', ax=axes[1], hue='sex')
axes[1].set_xlabel('Age')
axes[1].set_ylabel('Resting blood pressure (mmHg)')
plt.show()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20,6))
sns.regplot(data = df.loc[df['sex']==0], x = 'age', y = 'chol', ax=axes[0])
axes[0].set_title('Sex = Female')
axes[0].set_xlabel('Age')
axes[0].set_ylabel('Cholesterol (mg/dl)')

sns.regplot(data= df.loc[df['sex']==1], x= 'age', y = 'chol', ax=axes[1])
axes[1].set_title('Sex = Male')
axes[1].set_xlabel('Age')
axes[1].set_ylabel('Cholesterol (mg/dl)')
plt.show()

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20,6))
sns.regplot(data = df.loc[df['sex']==0], x = 'age', y = 'trtbps', ax=axes[0])
axes[0].set_title('Sex = Female')
axes[0].set_xlabel('Age')
axes[0].set_ylabel(' Resting blood pressure (mmHg)')

sns.regplot(data= df.loc[df['sex']==1], x= 'age', y = 'trtbps', ax=axes[1])
axes[1].set_title('Sex = Male')
axes[1].set_xlabel('Age')
axes[1].set_ylabel('Resting blood pressure (mmHg)')
plt.show()



In [None]:
fig, axes = plt.subplots(1,2, figsize=(20,6))
sns.regplot(data=df.loc[df['output']==1], x='age', y='thalachh',ax=axes[0],color='red',label='Heart Attack = 1')
axes[0].set_xlabel('Age')
axes[0].set_ylabel('Maximum Heart Rate Achieved')


sns.regplot(data=df.loc[df['output']==0], x='age', y='thalachh',ax=axes[1], label='Heart Attack = 0')
axes[1].set_xlabel('Age')
axes[1].set_ylabel('Maximum Heart Rate Achieved')

fig.legend()

### Some insights from these previous plots

We can see that there is a slight relationship between cholesterol & age and also a slight relationship between resting blood pressure (RBP) & age too. So we can say that older people tend to have a higher cholesterol rate; something similar happens to the RBP.
Concretely, this relationship doesn't vary a lot between men and women. So now lets see how these two variables affect the output.
Also, from the last two regplots we can see that young people who suffered a heart attack registered a higher maximum heart rate than young people who didn't. From 60 to - this variation is not that remarkable.

### 2 - Boxplots & Violinplots - Studying categorical variables

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20,6))
sns.boxplot(data = df, x = 'output', y = 'trtbps', ax=axes[0], hue='sex')
axes[0].set_title('Relationship between RBP and Heart Attack')
axes[0].set_xlabel('Heart attack (0 or 1)')
axes[0].set_ylabel(' Resting blood pressure (mmHg)')

sns.boxplot(data= df, x= 'output', y = 'chol', ax=axes[1], hue='sex')
axes[1].set_title('Relationship between Cholesterol and Heart Attack')
axes[1].set_xlabel('Heart attack (0 or 1)')
axes[1].set_ylabel('Cholesterol (mg/dl)')
plt.show()



In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20,6))
sns.violinplot(data = df, x = 'output', y = 'restecg', ax=axes[0], hue='sex')
axes[0].set_title('Relationship between Electrocardiographic Results and Heart Attack')
axes[0].set_xlabel('Heart attack (0 or 1)')
axes[0].set_ylabel(' Electrocardiographic Results')

sns.violinplot(data= df, x= 'output', y = 'cp', ax=axes[1], hue='sex')
axes[1].set_title('Relationship between Chest Pain and Heart Attack')
axes[1].set_xlabel('Heart attack (0 or 1)')
axes[1].set_ylabel('Chest Pain Type')
plt.show()

## 3 - Histograms & Densities

We can plot a general histogram for the people who suffered a heart attack by indexing with Pandas using output=1. For this purpose, we use a distplot.

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20,6))

sns.distplot(a = df.loc[df['output']==1]['age'], kde=False, ax= axes[0]) 
axes[0].set_title('Number of people who suffered a heart attack & age')
axes[0].set_xlabel('Age')
axes[0].set_ylabel(' People suffered a Heart Attack')

sns.kdeplot(data = df.loc[df['output']==1]['age'],shade=True, ax= axes[1]) 
axes[1].set_xlabel('Age')


We can also see this same plot but separated by gender in order to look for differences between men and women that have suffer a heart attack. 

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(20,6))


sns.distplot(a = df.loc[(df['output']==1)&(df['sex']==0)]['age'], kde=False,ax=axes[0], label='Female',bins=8) 
sns.distplot(a = df.loc[(df['output']==1)&(df['sex']==1)]['age'], kde=False,ax=axes[0], label='Male',bins=8) 
axes[0].legend()
axes[0].set_xlabel('Age')
axes[0].set_ylabel('Number of heart attacks by gender')


sns.kdeplot(data = df.loc[(df['output']==1)&(df['sex']==0)]['age'],ax=axes[1],shade=True, label='Female') 
sns.kdeplot(data = df.loc[(df['output']==1)&(df['sex']==1)]['age'],ax=axes[1],shade=True, label='Male') 
axes[1].legend()
axes[1].set_xlabel('Age')

## 4 - Heatmaps : Finding correlation between variables

Now, for the last plot, we are going to show the correlation matrix using a seaborn heatmap.

In [None]:
correlation = df.corr(method='pearson')

fig, axes = plt.subplots(1, 2, figsize=(25, 8))

plt.figure(figsize=(15,7))
hmap=sns.heatmap(correlation, annot=True,ax=axes[0])

heatmap = sns.heatmap(df.corr()[['output']].sort_values(by='output', ascending=False), vmin=-1, vmax=1, annot=True, cmap='BrBG',ax=axes[1])
heatmap.set_title('Features Correlating with Heart Attack', fontdict={'fontsize':18}, pad=16);