### Imports needed

In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split

import numpy as np

old_settings = np.seterr(all='raise') # it lets you know the errors in floating points

## Load Iris data

In [2]:
iris = datasets.load_iris()
X = iris.data
Y = iris.target
features = iris.feature_names

In [3]:
'''
    Store feature names and its corresponding indices initially. Otherwise in every recusive call 
    we need to make new X ndarray which increases complexity.
'''
feature_indices = {}
for i in range(len(features)):
    feature_indices[features[i]] = feature_indices.get(features[i],0) + i

## Actual Implementation of Decision Tree class

In [4]:
class DecisionTree:
    def __init__(self):
        self.left = None
        self.right = None
        self.level = 0
        self.max_gain_ratio_feature = None
        self.X_feature_boundry = None # boundry of max_gain_ratio_feature on which we split in two parts
        
    
    # claclate entropy of node having output Y
    def entropy_node(self, Y):
        total_freq = len(Y)

        # store different classes possible with its frequency
        class_freq = {}
        for c in Y:
            class_freq[c] = class_freq.get(c,0) + 1

        # Now we calculate entropy or info required
        info_req =  0
        for k in class_freq:
            prob_k = class_freq[k]/total_freq
            if prob_k!=0:
                info_req += (-1 * prob_k * np.log2(prob_k))
        return info_req
    
    
    
    # calculate entropy(info required) after spliting on basis of feature having index f_i
    def info_fi(self, X, Y, f_i, boundry):
        entropy_after_split = np.float(0)
        # feature on which we goona split
        feature = X[:, f_i] # feature corresponds to index f_i
        Y_left = Y[np.where(feature <= boundry)]
        Y_right = Y[np.where(feature > boundry)]

        if len(Y_left)!=0:
            entropy_after_split += ( np.float(len(Y_left)/len(Y)) * self.entropy_node(Y_left) ) 
        if len(Y_right) !=0 :
            entropy_after_split += ( np.float(len(Y_right)/len(Y)) * self.entropy_node(Y_right) )

        return entropy_after_split
    
    
    
    # calculate split_info of feature having index f_i
    def find_split_info(self, X, Y, f_i, boundry):
        size = len(Y)
        split_info = np.float(0)

        # feature on which we goona split
        feature = X[:, f_i] # feature corresponds to index f_i
        Y_left = Y[np.where(feature <= boundry)]
        Y_right = Y[np.where(feature > boundry)]

        if len(Y_left) != 0:
            split_info += np.float( -1*np.float(len(Y_left)/size) * np.log2(len(Y_left)/size) ) 
        if len(Y_right) != 0:
            split_info += np.float( -1*np.float(len(Y_right)/size) * np.log2(len(Y_right)/size) )

        return split_info
    
    
    
    '''
        Fit function learns all parameters of a node which we use during predict function
    '''
    def fit(self,X, Y, features, level=0):
        self.level = level
        
        # Find current entropy
        entropy_current = self.entropy_node(Y)

        # Base case
        # If node is pure, 
        if len(set(Y))==1 :
            pass
        # If no feature left to split
        elif len(features) == 0:
            pass
        else:
            # Find max info_gain
            max_gain_ratio = -10000 # Initially , it will be changed later
            max_gain_ratio_feature = features[0]
            X_feature_boundry = -1 # initially, we change this inside loop

            # check info_gain for each feature
            for f in features:
                '''
                    Since in Iris dataset all features have continuous data. So we need to find a boundry
                    To find an boundry we first sort feature data, and then try boundry as middle value b/w 2 consecutive data
                '''
                X_feature = X[:, feature_indices[f]]
                X_feature.sort()
             
                # Try different boundries
                X_len = len(X_feature)
                if X_len == 1:
                    continue

                for i in range(X_len-1):
                    curr_boundry = np.float(X_feature[i]+X_feature[i+1])/2
                    info_after_split_f = self.info_fi(X, Y, feature_indices[f], curr_boundry)
                    info_gain_f = entropy_current - info_after_split_f
                    split_info_f = self.find_split_info(X, Y, feature_indices[f], curr_boundry)
                    gain_ratio_f = 0
                    try:
                        gain_ratio_f = np.float(info_gain_f/split_info_f)
                    except:
                        gain_ratio_f = 0

                    if gain_ratio_f>max_gain_ratio:
                        max_gain_ratio = gain_ratio_f
                        max_gain_ratio_feature = f
                        X_feature_boundry = curr_boundry

                        
            self.max_gain_ratio_feature = max_gain_ratio_feature
            self.X_feature_boundry = X_feature_boundry
            
            # Now split on basis of max_gain_ratio_feature and X_feature_boundry and call on two sides of boundry
            # 1st remove this max_gain_ratio_feature from current features list
            new_features = []
            for f in features:
                if f != max_gain_ratio_feature:
                    new_features.append(f)

            f_i = feature_indices[max_gain_ratio_feature] # bcz this index(f_i) gives us the exact column correspond to required featue 
            f_i_feature = X[:, f_i] # feature corresponds to index f_i

            # Recursive calls on 2 sides of boundry
            X_left = X[np.where(f_i_feature <= X_feature_boundry)]
            Y_left = Y[np.where(f_i_feature <= X_feature_boundry)]
            #left decision tree
            DT_left = DecisionTree() 
            DT_left.fit(X_left, Y_left, new_features, level+1)
            self.left = DT_left # Attach on left side
            
            X_right = X[np.where(f_i_feature > X_feature_boundry)]
            Y_right = Y[np.where(f_i_feature > X_feature_boundry)]
            # right decision tree
            DT_right = DecisionTree()
            DT_right.fit(X_right, Y_right, new_features, level+1)
            self.right = DT_right # Attach on right side
            
            
            
    def predict(self, X , Y, features):
        # Print some info as required
        print("Level",self.level) 

        # store different classes possible with its frequency
        class_freq = {}
        for c in Y:
            class_freq[c] = class_freq.get(c,0) + 1
        
        output = None
        max_ = 0
        for c in class_freq:
            if class_freq[c] != 0:
                print("Count of",c,"=",class_freq[c])
                if max_ < class_freq[c]:
                    max_ = class_freq[c]
                    output = c

        # Find current entropy
        entropy_current = 0
        if len(set(Y)) > 1:
            entropy_current = self.entropy_node(Y)

        
        if entropy_current==0:
            print("Current Entropy is =",entropy_current)
            print("Reached Leaf Node")
        elif self.max_gain_ratio_feature is None :
            print("Current Entropy is =",entropy_current)
            print("Reached Leaf Node and output is majority class i.e class", output)
        else:
            print("Current Entropy is =",entropy_current)
                            
            # boundry value on which max_gain_ratio_feature is splitted
            curr_boundry = self.X_feature_boundry 
            
            # feature to split
            f_split = self.max_gain_ratio_feature 
            
            info_after_split_f = self.info_fi(X, Y, feature_indices[f_split], curr_boundry)
            
            # Info_gain after splitting max_gain_ratio_feature with boundry X_feature_boundry
            info_gain_f = entropy_current - info_after_split_f
            
            split_info_f = self.find_split_info(X, Y, feature_indices[f_split], curr_boundry)
            
            # gain ratio = (Info_gain / split_info)
            gain_ratio_f = 0
            try:
                gain_ratio_f = np.float(info_gain_f/split_info_f)
            except:
                gain_ratio_f = 0
                
            print("Splitting on feature",f_split, "<=",curr_boundry, "with gain ratio", gain_ratio_f)

            # Now split on basis of f_split and curr_boundry and call on two sides of boundry
            # 1st remove this max_gain_ratio_feature from current features list
            new_features = []
            for f in features:
                if f != f_split:
                    new_features.append(f)

            f_i = feature_indices[f_split] # bcz this index(f_i) gives us the exact column correspond to required featue
            f_i_feature = X[:, f_i] # feature corresponds to index f_i
            
            if self.left is not None:
                X_left = X[np.where(f_i_feature <= curr_boundry)]
                Y_left = Y[np.where(f_i_feature <= curr_boundry)]
                print()
                self.left.predict(X_left, Y_left, new_features)
                
            if self.right is not None:
                X_right = X[np.where(f_i_feature > curr_boundry)]
                Y_right = Y[np.where(f_i_feature > curr_boundry)]
                print()
                self.right.predict(X_right, Y_right, new_features)
            

### split data

In [5]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=101)

### Create object of DecisionTree class

In [6]:
dt = DecisionTree()

### Build tree (learn parameters) by calling fit function

In [7]:
dt.fit(X_train,Y_train,features)

### call predict function on test data to print results

In [8]:
dt.predict(X_test,Y_test,features)

Level 0
Count of 0 = 13
Count of 2 = 12
Count of 1 = 20
Current Entropy is = 1.545990246502223
Splitting on feature petal length (cm) <= 6.0 with gain ratio 0.3391845278504918

Level 1
Count of 0 = 13
Count of 2 = 10
Count of 1 = 20
Current Entropy is = 1.524786634179887
Splitting on feature sepal length (cm) <= 4.35 with gain ratio 0

Level 2
Current Entropy is = 0
Reached Leaf Node

Level 2
Count of 0 = 13
Count of 2 = 10
Count of 1 = 20
Current Entropy is = 1.524786634179887
Splitting on feature sepal width (cm) <= 2.2 with gain ratio 0.16405500703523648

Level 3
Count of 1 = 1
Current Entropy is = 0
Reached Leaf Node

Level 3
Count of 0 = 13
Count of 2 = 10
Count of 1 = 19
Current Entropy is = 1.53432646944219
Splitting on feature petal width (cm) <= 0.15000000000000002 with gain ratio 0.25397617232567776

Level 4
Count of 0 = 1
Current Entropy is = 0
Reached Leaf Node

Level 4
Count of 0 = 12
Count of 2 = 10
Count of 1 = 19
Current Entropy is = 1.529516548204532
Reached Leaf Node 

In [9]:
dt.predict(X_train,Y_train,features)

Level 0
Count of 0 = 37
Count of 2 = 38
Count of 1 = 30
Current Entropy is = 1.5773146804529516
Splitting on feature petal length (cm) <= 6.0 with gain ratio 0.23751386521103268

Level 1
Count of 0 = 36
Count of 2 = 38
Count of 1 = 24
Current Entropy is = 1.5577950932979914
Splitting on feature sepal length (cm) <= 4.35 with gain ratio 0.18107094736685828

Level 2
Count of 0 = 1
Current Entropy is = 0
Reached Leaf Node

Level 2
Count of 2 = 38
Count of 1 = 24
Count of 0 = 35
Current Entropy is = 1.5588277790815832
Splitting on feature sepal width (cm) <= 2.2 with gain ratio 0.09865463918569896

Level 3
Count of 2 = 1
Count of 1 = 1
Current Entropy is = 1.0
Splitting on feature petal width (cm) <= 0.1 with gain ratio 0

Level 4
Count of 2 = 1
Count of 1 = 1
Current Entropy is = 1.0
Reached Leaf Node and output is majority class i.e class 2

Level 4
Current Entropy is = 0
Reached Leaf Node

Level 3
Count of 0 = 35
Count of 1 = 23
Count of 2 = 37
Current Entropy is = 1.5559966071661449
Sp