# Heart disease Prediction : Decision Tree Approach

### importing Libraries

In [1]:
import numpy as np
import pandas as pd

### Reading Dataset

In [2]:
df = pd.read_csv("heart.csv")
df.head()

Unnamed: 0,age,sex,cp,trtbps,chol,fbs,restecg,thalachh,exng,oldpeak,slp,caa,thall,output
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


### Checking Dataset for irregular data

In [3]:
df.dtypes

age           int64
sex           int64
cp            int64
trtbps        int64
chol          int64
fbs           int64
restecg       int64
thalachh      int64
exng          int64
oldpeak     float64
slp           int64
caa           int64
thall         int64
output        int64
dtype: object

In [4]:
df.columns

Index(['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh',
       'exng', 'oldpeak', 'slp', 'caa', 'thall', 'output'],
      dtype='object')

In [5]:
x = df[['age', 'sex', 'cp', 'trtbps', 'chol', 'fbs', 'restecg', 'thalachh',
       'exng', 'oldpeak', 'slp', 'caa', 'thall']]
x = np.asarray(x)
x[:3]

array([[ 63. ,   1. ,   3. , 145. , 233. ,   1. ,   0. , 150. ,   0. ,
          2.3,   0. ,   0. ,   1. ],
       [ 37. ,   1. ,   2. , 130. , 250. ,   0. ,   1. , 187. ,   0. ,
          3.5,   0. ,   0. ,   2. ],
       [ 41. ,   0. ,   1. , 130. , 204. ,   0. ,   0. , 172. ,   0. ,
          1.4,   2. ,   0. ,   2. ]])

In [6]:
y = df['output']
y = np.asarray(y)
y[:5]

array([1, 1, 1, 1, 1], dtype=int64)

### Splitting Dataset into **Train** and **Test** parts

In [7]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=4)

## Creating and Training Decision Tree Model with different criterion and depths

class sklearn.tree.DecisionTreeClassifier(*, criterion='gini', splitter='best', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features=None, random_state=None, max_leaf_nodes=None, min_impurity_decrease=0.0, class_weight=None, ccp_alpha=0.0)

In [11]:
from sklearn.tree import DecisionTreeClassifier  
from sklearn.metrics import accuracy_score

criterions = ["entropy", "gini"]
results = list()

for criterion in criterions:
    for i in range(10,21):
        tree = DecisionTreeClassifier(criterion=criterion, max_depth=i)
        tree.fit(x_train, y_train)
        
        yhat = tree.predict(x_test)
        accuracy = "{:3.2f}".format(accuracy_score(y_test, yhat))
        
        score = [criterion, i, accuracy]
        results.append(score)

### Sorting and Displaying results in a human readable pandas dataset

In [9]:
results.sort(key=lambda x: x[2], reverse=True)
results = pd.DataFrame(results, columns=["Criterion", "Max Depth", "Score"])
results

Unnamed: 0,Criterion,Max Depth,Score
0,entropy,10,0.89
1,entropy,14,0.85
2,entropy,15,0.85
3,entropy,17,0.85
4,entropy,18,0.85
5,entropy,11,0.84
6,entropy,12,0.84
7,entropy,16,0.84
8,entropy,20,0.84
9,gini,11,0.84


### Getting the most efficient result(s)

In [10]:
max_score = results["Score"].max()
max_result = results[results["Score"] == max_score]
max_result

Unnamed: 0,Criterion,Max Depth,Score
0,entropy,10,0.89


## By Sina Kazemi