In [55]:
import pandas as pd
from collections import Counter
df_tennis = pd.read_csv('PlayTennis.csv')
df_tennis


Unnamed: 0,PlayTennis,Outlook,Temperature,Humidity,Wind
0,No,Sunny,Hot,High,Weak
1,No,Sunny,Hot,High,Strong
2,Yes,Overcast,Hot,High,Weak
3,Yes,Rain,Mild,High,Weak
4,Yes,Rain,Cool,Normal,Weak
5,No,Rain,Cool,Normal,Strong
6,Yes,Overcast,Cool,Normal,Strong
7,No,Sunny,Mild,High,Weak
8,Yes,Sunny,Cool,Normal,Weak
9,Yes,Rain,Mild,Normal,Weak


In [56]:
attr = list(df_tennis.columns)
attr.remove('PlayTennis')
attr

['Outlook', 'Temperature', 'Humidity', 'Wind']

In [57]:
import math
def entropy(probs):
    return sum([-prob * math.log(prob, 2) for prob in probs])

def entropy_of_list(yes_no_ls):
    count_class = Counter(l for l in yes_no_ls)
    total = len(yes_no_ls)
    probs = [x/total for x in count_class.values()]
    return entropy(probs)
    

In [58]:
def info_gain(df, split_attr, target_attr):
    datalen = len(df.index)
    df_split = df.groupby(split_attr)
    df_agg = df_split.agg({target_attr: [entropy_of_list, lambda x: len(x)/datalen]})[target_attr]
    df_agg.columns = ['Entropy', 'Proportion']
    
    new_entropy = sum(df_agg['Entropy']*df_agg['Proportion'])
    old_entropy = entropy_of_list(df[target_attr])
    
    return old_entropy - new_entropy

In [59]:
def id3(df, attr, target_attr, default_class=None, default_attr='S'):
    pos_neg = Counter(x for x in df[target_attr])

    if(len(pos_neg)==1):
        return next(iter(pos_neg))
    elif df.empty or (not attr):
        return default_class 
    else:
        gains = {}
        for a in attr:
            gains[a] = info_gain(df, a, target_attr)
        
        best_attr = max(gains, key = lambda x: gains[x])
        tree = {best_attr:{}}
        attr.remove(best_attr)
        for val, data in df.groupby(best_attr):
            subtree = id3(data, attr, target_attr, default_class, best_attr)
            tree[best_attr][val] = subtree
        
        return tree

In [60]:
tree = id3(df_tennis, attr, 'PlayTennis')
print(tree)

{'Outlook': {'Overcast': 'Yes', 'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}}, 'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}}}
