In [21]:
import numpy as np
from sklearn.datasets import load_iris
import random
    

In [25]:

class Node:          
    def __init__(self, depth):
        self.x = []
        self.y = []
        self.feature = None
        self.threshold = None
        self.leftChild = None
        self.rightChild = None
        self.depth = depth
        
    def setXAndY(self, x, y):
        self.x = x
        self.y = np.array(y, dtype=np.int64)
        self.get_class_for_node()
        self.gini_impurity()
    
    def appendRecord(self, input_net, classification):
        self.x.append(input_net)
        self.y.append(classification)
        
    def convertToNumpyArray(self):
        self.x = np.asarray(self.x)
        self.y = np.asarray(self.y, dtype=np.int64)
        
    def setLeftChild(self, leftChild):
        self.leftChild = leftChild
        
    def setRightChild(self, rightChild):
        self.rightChild = rightChild
        
    def get_class_for_node(self):
        self.class_label = np.argmax(np.bincount(self.y))
        return self.class_label
        
    def gini_impurity(self):
        instances = np.bincount(self.y)
        total = np.sum(instances)
        p = instances/total
        
        self.gini = 1.0 - np.sum( np.power(p,2) )
        return self.gini    
    
    def split_node(self, feature, threshold):
        leftChild = Node(depth=self.depth + 1)
        rightChild = Node(depth=self.depth + 1)
        
        for input_net, classification in zip(self.x, self.y):
            if input_net[feature] > threshold:
                leftChild.appendRecord(input_net, classification)
            else:
                rightChild.appendRecord(input_net, classification)
        
        leftChild.convertToNumpyArray()
        rightChild.convertToNumpyArray()
        
        return leftChild, rightChild
        


In [26]:
class PJ_Cart_Tree:
    def __init__(self, max_depth, acceptable_impurity):
        self.max_depth = max_depth
        self.acceptable_impurity = acceptable_impurity        
        
    def fit(self, x, y):
        self.root = Node(depth=0)
        self.root.setXAndY(x, y)
        self.cart_split(self.root)
        
    def cart_split(self, node, granulation=10):
        if (node.gini <= self.acceptable_impurity or node.depth == self.max_depth):
            node.get_class_for_node()
            return
        
        score_best = None
        
        for feature in range(node.x.shape[1]):
            start = np.min(node.x[:,feature])
            end = np.max(node.x[:,feature])
            step = (end-start)/granulation
            
            for threshold in np.arange(start, end, step):
                leftChild, rightChild = node.split_node(feature, threshold)
                score = self.get_score_for_split(node, leftChild, rightChild)
                
                if score_best is None or score_best > score:
                    node.leftChild = leftChild
                    node.rightChild = rightChild
                    score_best = score
                    node.feature = feature
                    node.threshold = np.float64(threshold)
        
        self.cart_split(node.rightChild, granulation)
        self.cart_split(node.leftChild, granulation)    
            
    def get_score_for_split(self, node, leftChild, rightChild):
        leftChild.gini_impurity()
        rightChild.gini_impurity()
        
        left_score = leftChild.gini * leftChild.y.shape[0]/node.y.shape[0]
        right_score = rightChild.gini * rightChild.y.shape[0]/node.y.shape[0]
        return left_score + right_score
        
    def predict(self, x):
        currNode = self.root
        
        while(True):
            if currNode.leftChild is None:
                break
            
            if x[currNode.feature] > currNode.threshold:
                currNode = currNode.leftChild
            else:
                currNode = currNode.rightChild
                
                
        return currNode.class_label     

In [27]:
iris = load_iris()
tree = PJ_Cart_Tree(max_depth=4, acceptable_impurity=0.2 )
tree.fit(iris.data, iris.target)
cl = tree.predict(iris.data[80])
print('Classified as {}'.format(iris.target_names[cl]))

Classified as versicolor
