In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [6]:
data = pd.read_csv("./data.csv", index_col="Unnamed: 0")
data.head()

Unnamed: 0,Outlook,Temperature,Humidity,Windy,Play Golf
0,Rainy,Hot,High,False,No
1,Rainy,Hot,High,True,No
2,Overcast,Hot,High,False,Yes
3,Sunny,Mild,High,False,Yes
4,Sunny,Cool,Normal,False,Yes


In [11]:
X, y = data.drop(["Play Golf"], axis=1), data["Play Golf"]

In [125]:
class Node:
    def __init__(self):
        self.feature = None
        self.children = {}
        self.X = None
        self.y = None
        self.leaf = False
        self.pred = None
    
    def predict(self):
        if not self.leaf:
            raise ValueError("Prediction called at non-leaf node.")

        counts = self.y.value_counts()
        prob_yes = prob_no = 0
        if "Yes" in counts:        
            prob_yes = counts["Yes"]/counts.sum()
        if "No" in counts:
            prob_no = counts["No"]/counts.sum()

        return {"Yes": prob_yes, "No": prob_no}


class DecisionTreeCustom:
    def __init__(self):
        self.root = None
        pass
    
    @staticmethod
    def entropy(y):
        counts = y.value_counts()
        prob_yes = prob_no = 0
        if "Yes" in counts:        
            prob_yes = counts["Yes"]/counts.sum()
        if "No" in counts:
            prob_no = counts["No"]/counts.sum()
        log_yes = log_no = 0
        if prob_yes:
            log_yes = np.math.log2(prob_yes)
        if prob_no:
            log_no = np.math.log2(prob_no)

        return -(prob_yes*log_yes + prob_no * log_no)

    @staticmethod
    def entropy_after_split(feature, X, y):
        unique = X[feature].unique()
        entropy = 0
        for val in unique:
            splitted_y = y[X[feature] == val]
            weight = len(splitted_y)/len(X)
            entropy += weight*DecisionTreeCustom.entropy(splitted_y)
        
        return entropy
            
    @staticmethod
    def make_split(feature, X, y):
        unique = X[feature].unique()
        children = {}

        for val in unique:
            node = Node()
            node.X = X[X[feature] == val].drop([feature], axis=1)
            node.y = y[X[feature] == val]
            children[val] = node
        
        return children

    @staticmethod
    def make_tree(node, X, y):
        own_entropy = DecisionTreeCustom.entropy(y)
        features = X.columns
        feature_info_gains = []
        for feature in features:
            feature_info_gains.append(own_entropy - DecisionTreeCustom.entropy_after_split(feature, X, y))
        
        ix = np.argmax(feature_info_gains)

        if feature_info_gains[ix] > 0:
            node.feature = features[ix]
            node.children = DecisionTreeCustom.make_split(features[ix], X, y)
            for child in node.children.values():
                DecisionTreeCustom.make_tree(child, child.X, child.y)
            return None
        else:
            node.leaf = True
            node.y = y 
            return None

    def fit(self, X, y):
        self.root = Node()
        DecisionTreeCustom.make_tree(self.root, X, y)
        return self
    
    def predict_tree_recursive(node, X):
        if node.leaf:
            return node.predict()
        
        val = X[node.feature]
        return DecisionTreeCustom.predict_tree_recursive(node.children[val], X)

    def predict_tree(self, X):
        return DecisionTreeCustom.predict_tree_recursive(self.root, X)

    def predict(self, X):
        y_pred = []

        for row_ix in range(len(X)):
            y_pred.append(self.predict_tree(X.iloc[row_ix]))

        return y_pred


    @staticmethod
    def print_tree_recursive(node, intent):
        print(end=intent)
        if(node.leaf):
            print("Leaf->", node.predict())
        else:
            print("Feature Split->", node.feature)
            for child_name, child in node.children.items():
                print(intent, child_name, "-->", end=" ")
                DecisionTreeCustom.print_tree_recursive(child, intent+"\t")

    def print_tree(self):
        return DecisionTreeCustom.print_tree_recursive(self.root, "")

        

In [126]:
dt = DecisionTreeCustom()

In [127]:
dt.fit(X, y)

<__main__.DecisionTreeCustom at 0x1d1869ff320>

In [128]:
dt.print_tree()

Feature Split-> Outlook
 Rainy --> 	Feature Split-> Temperature
	 Hot --> 		Leaf-> {'Yes': 0, 'No': 1.0}
	 Mild --> 		Leaf-> {'Yes': 0, 'No': 1.0}
	 Cool --> 		Leaf-> {'Yes': 1.0, 'No': 0}
 Overcast --> 	Leaf-> {'Yes': 1.0, 'No': 0}
 Sunny --> 	Feature Split-> Windy
	 False --> 		Leaf-> {'Yes': 1.0, 'No': 0}
	 True --> 		Leaf-> {'Yes': 0, 'No': 1.0}


In [129]:
dt.predict(X)

[{'Yes': 0, 'No': 1.0},
 {'Yes': 0, 'No': 1.0},
 {'Yes': 1.0, 'No': 0},
 {'Yes': 1.0, 'No': 0},
 {'Yes': 1.0, 'No': 0},
 {'Yes': 0, 'No': 1.0},
 {'Yes': 1.0, 'No': 0},
 {'Yes': 0, 'No': 1.0},
 {'Yes': 1.0, 'No': 0},
 {'Yes': 1.0, 'No': 0}]