In [4]:
import pandas as pd
from pprint import pprint
from sklearn.feature_selection import mutual_info_classif
from collections import Counter

def id3(df, target_attribute, attribute_names, default_class=None):
    cnt = Counter(x for x in df[target_attribute])
    
    if len(cnt) == 1:
        return next(iter(cnt))
    
    elif df.empty or (not attribute_names):
        return default_class
    
    else:
        gainz = mutual_info_classif(df[attribute_names], df[target_attribute], discrete_features=True)
        index_of_max = gainz.tolist().index(max(gainz))
        best_attr = attribute_names[index_of_max]
        tree = {best_attr: {}}
        remaining_attribute_names = [i for i in attribute_names if i != best_attr]
        
        for attr_val, data_subset in df.groupby(best_attr):
            subtree = id3(data_subset, target_attribute, remaining_attribute_names, default_class)
            tree[best_attr][attr_val] = subtree
        
        return tree

df = pd.read_csv("tennisdata1.csv")

# Factorize object-type columns
for colname in df.select_dtypes("object"):
    df[colname], _ = df[colname].factorize()

attribute_names = df.columns.tolist()
attribute_names.remove("Target")

print("List of attribute names:", attribute_names)

tree = id3(df, "Target", attribute_names)
print("The tree structure:")
pprint(tree)


List of attribute names: ['Outlook', 'Temperature', 'Humidity', 'Wind']
The tree structure:
{'Outlook': {0: {'Temperature': {0: 0, 1: 0, 2: 1}},
             1: 1,
             2: {'Temperature': {1: 1, 2: {'Wind': {0: 1, 1: 0}}}}}}
