In [5]:
import pandas as pd
import numpy as np
 
data = {
    'Outlook': ['Sunny','Sunny','Overcast','Rainy','Rainy','Rainy','Overcast','Sunny','Sunny','Rainy','Sunny','Overcast','Overcast','Rainy'],
    'Temperature': ['Hot','Hot','Hot','Mild','Cool','Cool','Cool','Mild','Cool','Mild','Mild','Mild','Hot','Mild'],
    'Humidity': ['High','High','High','High','Normal','Normal','Normal','High','Normal','Normal','Normal','High','Normal','High'],
    'Windy': [False, True, False, False, False, True, True, False, False, False, True, True, False, True],
    'Play': ['No','No','Yes','Yes','Yes','No','Yes','No','Yes','Yes','Yes','Yes','Yes','No']
}
df = pd.DataFrame(data)
 
def entropy(target_col):
    vals, counts = np.unique(target_col, return_counts=True)
    return -np.sum((counts/len(target_col)) * np.log2(counts/len(target_col)))
 
def info_gain(data, split_attr, target="Play"):
    total_entropy = entropy(data[target])
    vals, counts = np.unique(data[split_attr], return_counts=True)
    weighted_entropy = np.sum([
        (counts[i]/np.sum(counts)) * entropy(data[data[split_attr]==vals[i]][target])
        for i in range(len(vals))
    ])
    return total_entropy - weighted_entropy
 
def id3(data, features, target="Play"):
    unique_classes = np.unique(data[target])
    if len(unique_classes) == 1:
        return unique_classes[0]
    if len(features) == 0:
        return data[target].mode()[0]
    gains = [info_gain(data, attr, target) for attr in features]
    best_feat = features[np.argmax(gains)]
    tree = {best_feat: {}}
    for v in np.unique(data[best_feat]):
        sub = data[data[best_feat] == v]
        subtree = id3(sub, [f for f in features if f != best_feat], target)
        tree[best_feat][v] = subtree
    return tree
 
tree = id3(df, ['Outlook','Temperature','Humidity','Windy'], 'Play')
print(tree)


{'Outlook': {'Overcast': 'Yes', 'Rainy': {'Windy': {False: 'Yes', True: 'No'}}, 'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}}}
