# INTRODUCTION
* This data includes some values of health status indicators such as resting blood pressure, serum cholestoral in mg/dl, resting electrocardiographic results etc. about patients.


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sns

from plotly.offline import init_notebook_mode, iplot, plot
import plotly as py
init_notebook_mode(connected=True)
import plotly.graph_objs as go

import matplotlib.pyplot as plt

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

# Loading and Description of Data
   * age : Age of the patient.
   * sex : Gender of the patient (1 = male, 0 = female).
   * cp : Type of the chest pain (0,1,2,3).
   * trestbps : Resting blood pressure in mm Hg.
   * chol : Serum cholestoral in mg/dl.
   * fbs : 1 for greater fasting blood sugar than 120 mg/dl, 0 for otherwise.
   * restecg : Resting electrocardiographic results (0,1,2)
   * thalach : Achieved maximum heart rate.
   * exang : 1 for exercise induced angina, 0 for not.
   * oldpeak : ST depression induced by exercise relative to rest.
   * slope : Slope of the peak exercise ST segment.
   * ca : Number of major vessels colored by flourosopy (0,1,2,3).
   * thal : A blood disorder called thalassemia (3 = normal; 6 = fixed defect; 7 = reversable defect)
   * target : Heart disease (1 = yes 0 = no)
   

In [None]:
data = pd.read_csv("/kaggle/input/heart-disease-uci/heart.csv")

In [None]:
data.info()

In [None]:
data.isnull().any()

# There is no null values.

In [None]:
data.head()

In [None]:
sex = ["male" if each == 1 else "female" for each in data.sex]
data.sex = sex

# Visualization

In [None]:
# Let take a look at the proportion of the patient's gender.

print(data.sex.value_counts())

sns.countplot(data.sex)
plt.xlabel("Sex")
plt.ylabel("Count")

# There are 207 male and 96 female patient.

In [None]:
# Distribution of the patient's ages.

data1 = [go.Histogram(x = data.age, opacity = 0.8, name = "Ages", marker = dict(color = "rgba(235,123,25,0.7)"))]

layout = go.Layout(title = "Distribution of Age", xaxis = dict(title = "Age"), yaxis = dict(title = "Count"))

fig = go.Figure(data = data1, layout = layout)

iplot(fig)


In [None]:
# Impact of gender and age on the resting electrocardiographic results (restecg)

plt.figure(figsize=(15,10))
sns.swarmplot(x = "sex", y = "age", hue = "restecg", data = data)
plt.show()



We can see here result 2 happens in ages between 50-60 only.

In [None]:
# Is there any correlation between maximum heart rate and serum cholesttoral amount.

import scipy.stats as stats

sns.jointplot(x = data.chol, y = data.trestbps, kind = "kde", size = 7).annotate(stats.pearsonr)
plt.show()



There is almost no relation between maximum heart rate and serum cholesttoral amount.

And we can understand from here most of the patients have almost 130 mm Hg resting blood pressure and 210 maximum heart rate.

In [None]:
# Distribution of oldpeak values

data3 = go.Histogram(x = data.oldpeak, opacity = 0.5, name = "Oldpeak", marker = dict(color = "rgba(123,145,25,0.8)"))

layout = go.Layout(title = "Distribution of Oldpeak Values", xaxis = dict(title = "Oldpeak"), yaxis = dict(title = "Count"))

fig = go.Figure(data = data3, layout = layout)

iplot(fig)

* Now handle our values with separating the patient who has heart disese from who has not.

In [None]:
ill = data[data.target == 1]

normal = data[data.target == 0]



In [None]:
ill.age.mean()
normal.age.mean()

print("Average age of ill patients is", ill.age.mean())
print("Average age of normal patients is", normal.age.mean())

In [None]:
trace1 = go.Bar(x = ["trestbps","chol","thalach"], y = [ill.trestbps.mean(),ill.chol.mean(),ill.thalach.mean()], name = "Ill")

trace2 = go.Bar(x = ["trestbps","chol","thalach"], y = [normal.trestbps.mean(),normal.chol.mean(),normal.thalach.mean()], name = "Normal")

data5 = [trace1,trace2]

layout = go.Layout(barmode = "group", title = "Ill vs Normal Patient")

fig =go.Figure(data = data5, layout = layout)

iplot(fig)