In [3]:
import pandas as pd
import numpy as np

class Node:
    def __init__(self, label=None, feature=None, value=None, depth=0, parent=None):
        self.label = label
        self.feature = feature
        self.value = value
        self.depth = depth
        self.children = []
        self.parent = parent
    def add_child(self, label=None, feature=None, value=None):
        child = Node(label=label, feature=feature, value=value, depth=self.depth+1, parent=self)
        self.children.append(child)
        return child

def entropy(data_frame, label_column):
    label_series = data_frame[label_column]
    relative_frequency = label_series.value_counts()/label_series.value_counts().sum()
    return relative_frequency.apply(lambda x: -x * np.log2(x)).sum()

def conditional_entropy(data_frame, feature_column, label_column):
    feature_series = data_frame[feature_column]
    relative_frequency = feature_series.value_counts()/feature_series.value_counts().sum()
    conditional_entropy = 0
    for value, value_relative_frequency in zip(relative_frequency.index, relative_frequency.values):
        conditional_entropy += value_relative_frequency*entropy(data_frame.loc[feature_series==value, :], label_column)
    return conditional_entropy

def information_gain(data_frame, feature_column, label_column):
    return entropy(data_frame, label_column) - conditional_entropy(data_frame, feature_column, label_column)

def select_next_feature(data_frame, label_column):
    feature_column_list = list(data_frame.columns)
    feature_column_list.remove(label_column)
    information_gain_dict = {}
    for feature_column in feature_column_list:
        information_gain_dict[feature_column] = information_gain(data_frame, feature_column, label_column)
    next_feature = max(information_gain_dict, key=information_gain_dict.get)
    return next_feature

def print_branch(node, n_tabs=0):
    for child_node in node.children:
        print_label = ""
        if child_node.label is not None:
            print_label = str(child_node.label)
        post_print_label = " " + "."*(8-len(print_label)-4)
        print("\t"*n_tabs*2 + "....... " + print_label + post_print_label + str(child_node.feature) + "=" + str(child_node.value))
        print_branch(child_node, n_tabs=n_tabs+1)

def print_tree(root_node):
    print("ROOT............")
    print_branch(root_node, n_tabs=1)

def build_node(node, data_frame, unique_value_dict, label_column):
    # If there are no values left
    if data_frame.size == 0:
        node.label = node.parent.label
        return
    # If there is no column left to choose from
    if data_frame.drop(columns=label_column).columns.size == 0:
        node.label = data_frame[label_column].value_counts().idxmax()
        return
    # If all samples in the dataset have the same class
    if data_frame[label_column].unique().size == 1:
        node.label = data_frame[label_column].unique()[0]
        return
    # Get next feature column with the highest information gain
    next_feature_column = select_next_feature(data_frame, label_column)
    # Iterate over possible values of next feature column
    for value in s[next_feature_column]:
        new_node = node.add_child(feature=next_feature_column, value=value)
        new_data_frame = data_frame.loc[data_frame[next_feature_column]==value, :].drop(columns=next_feature_column)
        build_node(new_node, new_data_frame, unique_value_dict, label_column)

def build_tree(data_frame, label_column):
    unique_value_dict = make_unique_value_dict(data_frame, label_column)
    root_node = Node()
    build_node(root_node, data_frame, unique_value_dict, label_column)
    return root_node

def make_unique_value_dict(data_frame, label_column):
    column_list = list(data_frame.columns)
    column_list.remove(label_column)
    unique_value_dict = {}
    for column in column_list:
        unique_value_dict[column] = list(data_frame[column].unique())
    return unique_value_dict

column_A_value_list = [1, 2, 3, 4, 5]
column_B_value_list = [1, 2, 3]
label_column = "species"
label_column_value_list = [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0]
data = [[a, b] for a in column_A_value_list for b in column_B_value_list]
data_frame = pd.DataFrame(data=data, columns=["A", "B"])
data_frame[label_column] = label_column_value_list

root_node = build_tree(data_frame, label_column)
print_tree(root_node)

ROOT............
		....... 1 ...A=1
		....... 1 ...A=2
		.......  ....A=3
				....... 1 ...B=1
				....... 1 ...B=2
				....... 0 ...B=3
		....... 0 ...A=4
		....... 0 ...A=5
