In [3]:
import numpy as np
import pandas as pd

def entropy(y):
    values, counts = np.unique(y, return_counts=True)
    probs = counts / len(y)
    return -np.sum(probs * np.log2(probs))

def info_gain(X, y, feature):
    unique_vals = np.unique(X[:, feature])
    weighted_entropy = 0
    for val in unique_vals:
        sub_y = y[X[:, feature] == val]
        weighted_entropy += len(sub_y) / len(y) * entropy(sub_y)
    return entropy(y) - weighted_entropy

def best_feature(X, y):
    gains = [info_gain(X, y, i) for i in range(X.shape[1])]
    return np.argmax(gains)

def decision_tree(X, y):
    if len(np.unique(y)) == 1:
        return np.unique(y)[0]
    if X.shape[1] == 0:
        return np.bincount(y).argmax()
    best_feat = best_feature(X, y)
    tree = {best_feat: {}}
    for val in np.unique(X[:, best_feat]):
        sub_X = X[X[:, best_feat] == val]
        sub_y = y[X[:, best_feat] == val]
        tree[best_feat][val] = decision_tree(np.delete(sub_X, best_feat, axis=1), sub_y)
    return tree

data = pd.read_csv('/content/sample_data/play_tennis.csv')
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
tree = decision_tree(X, y)
print(tree)


{0: {'D1': 'No', 'D10': 'Yes', 'D11': 'Yes', 'D12': 'Yes', 'D13': 'Yes', 'D14': 'No', 'D2': 'No', 'D3': 'Yes', 'D4': 'Yes', 'D5': 'Yes', 'D6': 'No', 'D7': 'Yes', 'D8': 'No', 'D9': 'Yes'}}
