In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#Imports
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style('whitegrid')
import plotly
import plotly.graph_objs as go
import warnings
warnings.filterwarnings('ignore')
from collections import Counter
from plotly.offline import init_notebook_mode, iplot, plot

# Content
1. [Information](#1)
1. [Load and Check Data](#2)
1. [Basic Data Analysis](#3)
    * [Categorical Variable Analysis](#4)
    * [Numerical Variable Analysis](#5)
    * [Basic Data Correlation Analysis ](#6)

<a id="1"></a>
## Information
About this dataset<br>
Age : Age of the patient<br>
Sex : Sex of the patient<br>
exang: exercise induced angina (1 = yes; 0 = no)<br>
ca: number of major vessels (0-3)<br>
cp : Chest Pain type chest pain type<br>
<br>
Value 1: typical angina<br>
Value 2: atypical angina<br>
Value 3: non-anginal pain<br>
Value 4: asymptomatic<br>

trtbps : resting blood pressure (in mm Hg)<br>
<br>
chol : cholestoral in mg/dl fetched via BMI sensor<br>
<br>
fbs : (fasting blood sugar > 120 mg/dl) (1 = true; 0 = false)<br>
<br>
rest_ecg : resting electrocardiographic results<br>
<br>
Value 0: normal<br>
Value 1: having ST-T wave abnormality (T wave inversions and/or ST elevation or depression of > 0.05 mV)<br>
Value 2: showing probable or definite left ventricular hypertrophy by Estes' criteria<br>
thalach : maximum heart rate achieved<br>
<br>
target : 0= less chance of heart attack 1= more chance of heart attack<br>

<a id="2"></a>
## Load and Check Data

In [None]:
heart_df=pd.read_csv("../input/heart-attack-analysis-prediction-dataset/heart.csv")
o2_df=pd.read_csv("../input/heart-attack-analysis-prediction-dataset/o2Saturation.csv")
print(heart_df.info())

In [None]:
heart_df.shape

In [None]:
heart_df.describe()

In [None]:
heart_df.head()

In [None]:
heart_df.isnull().any()
#no miss values

 ***Outlier detection***

In [None]:
def outlier(df,features):
    outlier_indices=[]
    
    for i in features:
        q1=np.percentile(df[i],25)
        q3=np.percentile(df[i],75)
        ıqr_step=1.5*(q3-q1)
        outlier_list=df[(df[i] < q1-ıqr_step) | (df[i] > q3+ıqr_step)].index
        outlier_indices.extend(outlier_list)
         
    outlier_indices = Counter(outlier_indices)
    return outlier_indices

In [None]:
heart_df.loc[outlier(heart_df,["age","trtbps","chol","thalachh","oldpeak"])]

In [None]:
heart_df = heart_df.drop(outlier(heart_df,["age","trtbps","chol","thalachh","oldpeak"]),axis = 0).reset_index(drop = True)
heart_df.loc[outlier(heart_df,["age","trtbps","chol","thalachh","oldpeak"])]

<a id="3"></a>
## Basic Data Analysis

#### Categorical Variable
SEX<br>CP<br>FBS<br>RESTECG<br>EXNG<br>SLP<br>CAA<br>THALL<br>OUTPUT 
#### Numerical Variable
AGE<br>TRTBPS<br>CHOL<br>THALACHH<br>OLDPEAK

In [None]:
c_list=["sex","cp","fbs","restecg","exng","slp","caa","thall","output"]
n_list=["age","trtbps","chol","thalachh","oldpeak"]

<a id="4"></a>
#### Categorical Variable Analysis

In [None]:
def cateplot(fea):

    for i in fea:
        sns.countplot(heart_df[i])
        plt.show()
        
cateplot(c_list)

<a id="5"></a>
#### Numerical Variable Analysis

In [None]:
def numplot(fea):
    for i in fea:
        trace1 = go.Histogram(
            x=heart_df[i],
            opacity=0.75,
            name = i,
            marker=dict(color='lime'))
        layout = go.Layout(barmode='overlay',title=i + " Count Plot",autosize=True,
                   xaxis=dict(title=i),
                   yaxis=dict( title='Count'),bargap=0.1)
        fig = go.Figure(data=trace1, layout=layout)
        fig.update_traces(xbins=dict( # bins used for histogram
        start=0.0,
        size=1)
         
                         )
        
        iplot(fig)
numplot(n_list)


<a id="6"></a>
# Basic Data Correlation Analysis
Age-Chol <br>
Age-Output <br>
Age-Trtbps<br>
Age-Exng<br>
Age-Fbs<br>
Age_Thalachh

In [None]:
plt.figure(figsize=(18,20))
sns.pairplot(heart_df, hue = "output")

In [None]:
plt.figure(figsize=(16,10))
sns.heatmap(heart_df.corr(),annot=True,fmt = ".2f")

Now we can understand that ther is a relation between:<br>
output-oldpeak<br>
output-exng<br>
output-cp<br>
output-thalachh<br>
output-caa<br>
output-thall<br>
output-slp<br>
output-sex<br>
<br>
slp-oldpeak<br>
<br>
exng-oldpeak<br>
exng-cp<br>
exng-thalachh<br>
<br>
<br>
thalachh-age<br>
thalachh-oldpeak<br>
thalachh-slp<br>



In [None]:
trace_1=go.Scatter(
    y=heart_df.output,
    x=heart_df.oldpeak,
    #mode="markers",
    name="Output-oldpeak",
    marker=dict(color='purple'),
    text=heart_df.oldpeak
)
trace_2=go.Scatter(
    y=heart_df.output,
    x=heart_df.exng,
    #mode="markers",
    xaxis='x2',
    yaxis='y2',
    name="Output-Exng",
    marker=dict(color='blue'),
    text=heart_df.exng
)
trace_3=go.Scatter(
    y=heart_df.output,
    x=heart_df.cp,
    xaxis='x3',
    yaxis='y3',
    #mode="markers",
    name="Output-Cp",
    marker=dict(color='yellow'),
    text=heart_df.cp
)
trace_4=go.Scatter(
    y=heart_df.output,
    x=heart_df.thalachh,
    xaxis='x4',
    yaxis='y4',
    #mode="markers",
    name="Output-thalachh",
    marker=dict(color='green'),
    text=heart_df.thalachh
)


#,
#layout=dict(title='Output-(exng,cp,thalachh,caa,thall,slp,sex)',
#           xaxis=dict(title="Numbers",ticklen=5,zeroline=False),
#           yaxis=dict(title="Output ",ticklen=5,zeroline=False) )
layout = go.Layout(
    xaxis=dict(
        domain=[0, 0.45]
    ),
    yaxis=dict(
        domain=[0, 0.45]
    ),
    xaxis2=dict(
        domain=[0.55, 1],
        anchor='y2'
    ),
    yaxis2=dict(
        domain=[0, 0.45],
        anchor='x2'
    ),
    xaxis3=dict(
        domain=[0, 0.45],
        anchor='y3'
    ),
    yaxis3=dict(
        domain=[0.55, 1],
        anchor='x3'
    ),
    xaxis4=dict(
        domain=[0.55, 1],
        anchor='y4'
    ),
    yaxis4=dict(
        domain=[0.55, 1],
        anchor='x4'
    ))
   
    
    
data=[trace_1,trace_2,trace_3,trace_4]
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [None]:
trace_5=go.Scatter(
    y=heart_df.output,
    x=heart_df.caa,
    xaxis='x1',
    yaxis='y1',
    #mode="markers",
    name="Output-Caa",
    marker=dict(color='orange'),
    text=heart_df.caa
)
trace_6=go.Scatter(
    y=heart_df.output,
    x=heart_df.thall,
    xaxis='x2',
    yaxis='y2',
    #mode="markers",
    name="Output-Thall",
    marker=dict(color='red'),
    text=heart_df.thall
)
trace_7=go.Scatter(
    y=heart_df.output,
    x=heart_df.slp,
    xaxis='x3',
    yaxis='y3',
    #mode="markers",
    name="Output-Slp",
    marker=dict(color='black'),
    text=heart_df.slp
)
trace_8=go.Scatter(
    y=heart_df.output,
    x=heart_df.sex,
    xaxis='x4',
    yaxis='y4',
    #mode="markers",
    name="Output-Sex",
    marker=dict(color='purple'),
    text=heart_df.sex)
layout = go.Layout(
    xaxis=dict(
        domain=[0, 0.45]
    ),
    yaxis=dict(
        domain=[0, 0.45]
    ),
    xaxis2=dict(
        domain=[0.55, 1],
        anchor='y2'
    ),
    yaxis2=dict(
        domain=[0, 0.45],
        anchor='x2'
    ),
    xaxis3=dict(
        domain=[0, 0.45],
        anchor='y3'
    ),
    yaxis3=dict(
        domain=[0.55, 1],
        anchor='x3'
    ),
    xaxis4=dict(
        domain=[0.55, 1],
        anchor='y4'
    ),
    yaxis4=dict(
        domain=[0.55, 1],
        anchor='x4'
    ))
data=[trace_5,trace_6,trace_7,trace_8]
fig = go.Figure(data=data, layout=layout)
iplot(fig)

In [None]:
#slp-oldpeak
trace_1=go.Scatter(
    x=heart_df.slp,
    y=heart_df.oldpeak,
    mode="markers",
    marker = dict(color = 'rgba(16, 112, 2, 0.8)'),
    text=heart_df.oldpeak
    )

   
data=[trace_1]   
layout = dict(title = 'Slp-Oldpeak plot',
              xaxis= dict(title= 'Slp',ticklen= 5,zeroline= False),
              yaxis=dict(title="Oldpeak"))
fig = dict(data = data, layout = layout)
iplot(fig)

In [None]:
#oldpeak-exng
trace_1=go.Scatter(
    x=heart_df.exng,
    y=heart_df.oldpeak,
    mode="markers",
    marker = dict(color = 'rgba(16, 112, 2, 0.8)'),
    text=(heart_df.oldpeak," ",heart_df.exng)
    )

   
data=[trace_1]   
layout = dict(title = 'Exng-Oldpeak plot',
              xaxis= dict(title= 'Exng',ticklen= 5,zeroline= False),
              yaxis=dict(title="Oldpeak"))
fig = dict(data = data, layout = layout)
iplot(fig)

In [None]:

#exng-cp<br>
#exng-thalachh<br>
def plotexng(fea):
    trace_1=go.Scatter(
        x=heart_df.exng,
        y=heart_df[fea],
        mode="markers",
        marker = dict(color = 'rgba(16, 112, 2, 0.8)'),
        text=(heart_df[fea]," ",heart_df.exng)
    )
    data=[trace_1]   
    layout = dict(title = "Exng-"+fea +" plot",
                  xaxis= dict(title= 'Exng',ticklen= 5,zeroline= False),
                  yaxis=dict(title=fea))
    fig = dict(data = data, layout = layout)
    iplot(fig)
plotexng("cp")
plotexng("thalachh")

In [None]:
#thalachh-age
#thalachh-oldpeak
#thalachh-slp
def plotthalachh(fea):
    trace_1=go.Scatter(
        x=heart_df.thalachh,
        y=heart_df[fea],
        mode="markers",
        marker = dict(color = 'rgba(16, 112, 2, 0.8)'),
        text=(heart_df[fea]," ",heart_df.thalachh)
    )
    data=[trace_1]   
    layout = dict(title = "thalachh-"+fea +" plot",
                  xaxis= dict(title= 'thalachh',ticklen= 5,zeroline= False),
                  yaxis=dict(title=fea))
    fig = dict(data = data, layout = layout)
    iplot(fig)
plotthalachh("age")
plotthalachh("oldpeak")
plotthalachh("slp")

When slp increases oldpeak starts to decline