In [255]:
import numpy as np
import pandas as pd
import os
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score,confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

**function that returns the required dataset.**

In [256]:
#Reading imported file in the Notebook
#Showing the first 5 col.
data = '../input/heartdt/heart(1).csv'
balance_data = pd.read_csv(data)
balance_data.head()

In [257]:
# Separating the target variable 
X = balance_data.drop(columns=['target'])
Y = balance_data.target

In [258]:
# Splitting the dataset into train and test 
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.3, random_state = 1) 

**create a decision tree classifier model and train using Gini**

In [259]:
# perform training with giniIndex
# Creating the classifier object 
clf_gini = DecisionTreeClassifier(criterion = "gini",random_state = 100)
# Fit the model 
clf_gini.fit(X_train, y_train) 

**create a decision tree classifier model and train using Entropy**

In [260]:
# perform training with entropy
# Decision tree with entropy 
clf_entropy = DecisionTreeClassifier(criterion = "entropy", random_state = 100)
# Fit the model  
clf_entropy.fit(X_train, y_train) 

**This function will take the respective models and X_test as input and return the predicted values for each approach.**

In [261]:
# Function to make predictions 
def prediction(X_test, clf_object):
    # Predicton on test with giniIndex 
    y_pred = clf_object.predict(X_test)
    print("Predicted values:")
    print(y_pred)
    return y_pred 

 **function to calculate the accuracy of both the models.**

In [262]:
# Function to calculate accuracy 
def cal_accuracy(y_test, y_pred):
    print("Accuracy : ", accuracy_score(y_test,y_pred)*100)

**Execute the above 2 functions to get the predicted values and accuracy of each model.**

In [263]:
# Operational Phase 
print("Results Using Gini:")
# Prediction using gini 
y_pred_gini = prediction(X_test, clf_gini)
cal_accuracy(y_test, y_pred_gini)

print("Results Using Entropy:")
# Prediction using entropy 
y_pred_entropy = prediction(X_test, clf_entropy)
cal_accuracy(y_test, y_pred_entropy)

**Most of the time both the models have similar accuracy. Sometimes it's different too.**

**Print the tree created by both both the models and see which is the root node.**

In [264]:
#Print tree
def printTree(classifier):
    feature_names = ['Chest Pain', 'Blood Circulation', 
                         'Blocked Arteries']
    target_names = ['HD-Yes', 'HD-No']
    
    #Build the tree
    dot_data = tree.export_graphviz(classifier,                                      
                         out_file=None,feature_names=feature_names,
                         class_names=target_names, filled = True)
    
    #Draw tree
    tr = graphviz.Source(dot_data, format ="png")
    return tr

In [265]:
#Print gini tree
plt.figure(figsize=(20,20))
features = balance_data.columns
classes = ['Not heart disease','heart disease']
tree.plot_tree(clf_gini,feature_names=features,class_names=classes,filled=True)
plt.show()

In [266]:
#Print entropy tree
plt.figure(figsize=(20,20))
features = balance_data.columns
classes = ['Not heart disease','heart disease']
tree.plot_tree(clf_entropy,feature_names=features,class_names=classes,filled=True)
plt.show()