In [1]:
import math

data = [
    [1,20],[2,30],[3,40],[3,62],[1,70],
    [7,70],[8,80],[8,85],[9,80],[9,85],
    [10,90],[10,96]
]
labels = [0,0,0,0,0,1,1,1,1,1,1,1]

# Function to compute entropy
def entropy(zero, one):
    total = zero + one
    if total == 0:
        return 0
    p0 = zero / total
    p1 = one / total
    ent = 0
    if p0 != 0:
        ent -= p0 * math.log(p0, 2)
    if p1 != 0:
        ent -= p1 * math.log(p1, 2)
    return ent

# Function to find best threshold for a feature

def best_threshold(data_indices, feature_index):
    # All unique values of this feature
    values = sorted(set([data[i][feature_index] for i in data_indices]))
    best_thr = None
    best_IG = -1

    # Parent entropy
    total_labels = [labels[i] for i in data_indices]
    zeroes = total_labels.count(0)
    ones = total_labels.count(1)
    parent_entropy = entropy(zeroes, ones)

    # Try all thresholds
    for thr in values:
        left = [i for i in data_indices if data[i][feature_index] <= thr]
        right = [i for i in data_indices if data[i][feature_index] > thr]

        left_zero = sum(1 for i in left if labels[i]==0)
        left_one = sum(1 for i in left if labels[i]==1)
        right_zero = sum(1 for i in right if labels[i]==0)
        right_one = sum(1 for i in right if labels[i]==1)

        total_left = len(left)
        total_right = len(right)
        total_s = total_left + total_right

        weighted_entropy = (total_left/total_s)*entropy(left_zero, left_one) + \
                           (total_right/total_s)*entropy(right_zero, right_one)
        IG = parent_entropy - weighted_entropy

        if IG > best_IG:
            best_IG = IG
            best_thr = thr

    return best_thr, best_IG

# Recursive Decision Tree Builder

def build_tree(data_indices, feature_indices):
    node_labels = [labels[i] for i in data_indices]

    # If pure node, return class
    if all(l == node_labels[0] for l in node_labels):
        return node_labels[0]

    # If no features left, return majority class
    if len(feature_indices) == 0:
        return max(set(node_labels), key=node_labels.count)

    # Find best feature + threshold
    best_feature = None
    best_threshold_value = None
    best_IG = -1

    for feature_index in feature_indices:
        thr, IG = best_threshold(data_indices, feature_index)
        if IG > best_IG:
            best_IG = IG
            best_threshold_value = thr
            best_feature = feature_index

    if best_IG == 0:
        return max(set(node_labels), key=node_labels.count)

    # Split data
    left_indices = [i for i in data_indices if data[i][best_feature] <= best_threshold_value]
    right_indices = [i for i in data_indices if data[i][best_feature] > best_threshold_value]

    # Recursive build
    left_subtree = build_tree(left_indices, feature_indices)
    right_subtree = build_tree(right_indices, feature_indices)

    # Return tree node as dictionary
    return {
        'feature': best_feature,
        'threshold': best_threshold_value,
        'left': left_subtree,
        'right': right_subtree
    }
# Build the tree

features = [0,1]  # Feature 0 = Hours studied, Feature 1 = Attendance
tree = build_tree(list(range(len(data))), features)


# Display the tree

import pprint
pprint.pprint(tree)


# Prediction function

def predict(sample, tree):
    if isinstance(tree, int):
        return tree
    feature = tree['feature']
    threshold = tree['threshold']
    if sample[feature] <= threshold:
        return predict(sample, tree['left'])
    else:
        return predict(sample, tree['right'])

# Example prediction
sample = [4, 65]  # Hours=4, Attendance=65
print("Prediction for sample [4,65]:", predict(sample, tree))


{'feature': 0, 'left': 0, 'right': 1, 'threshold': 3}
Prediction for sample [4,65]: 1
