## Import required packages

In [1]:
import pandas as pd
import numpy as np
import math

### Read data

In [2]:
df_tennis = pd.read_csv("/content/weather.csv") 
df_tennis 

Unnamed: 0,id,outlook,temperature,humidity,wind,play
0,1,sunny,hot,high,weak,no
1,2,sunny,hot,high,strong,no
2,3,overcast,hot,high,weak,yes
3,4,rainy,mild,high,weak,yes
4,5,rainy,cool,normal,weak,yes
5,6,rainy,cool,normal,strong,no
6,7,overcast,cool,normal,strong,yes
7,8,sunny,mild,high,weak,no
8,9,sunny,cool,normal,weak,yes
9,10,rainy,mild,normal,weak,yes


In [17]:
class Node:
    def __init__(self):
        self.children = []
        self.value = ""
        self.isLeaf = False
        self.pred = ""

def entropy(examples, target_attr_name):
    pos = 0.0
    neg = 0.0
    for _, row in examples.iterrows():
        if row[target_attr_name] == "yes":
            pos += 1
        else:
            neg += 1
    if pos == 0.0 or neg == 0.0:
        return 0.0
    else:
        p = pos / (pos + neg)
        n = neg / (pos + neg)
        return -(p * math.log(p, 2) + n * math.log(n, 2))

def info_gain(examples, attr, target_attr_name):
    uniq = np.unique(examples[attr])
    print ("Uniq is \n",uniq, "for Attribute", attr)
    gain = entropy(examples, target_attr_name) #redundant function call 
    print ("\n Total Gain for current sample",gain)
    for u in uniq:
        subdata = examples[examples[attr] == u]
        print ("\n Data for sub-entropy",subdata)
        sub_e = entropy(subdata, target_attr_name)
        gain -= (float(len(subdata)) / float(len(examples))) * sub_e
    return gain

def ID3(examples, attrs, target_attr_name):
    root = Node()

    max_gain = 0
    max_feat = ""
    for feature in attrs:
        #print ("\n",examples)
        gain = info_gain(examples, feature, target_attr_name)
        print("Feature: {} ; Info-Gain: {}".format(feature, gain))
        if gain > max_gain:
            max_gain = gain
            max_feat = feature
    root.value = max_feat
    print ("\nMax feature attr : {} ; max_gain = {}\n\n".format(max_feat, max_gain))
    uniq = np.unique(examples[max_feat])
    #print ("\n",uniq)
    for u in uniq:
        #print ("\n",u)
        subdata = examples[examples[max_feat] == u]
        #print ("\n",subdata)
        if entropy(subdata, target_attr_name) == 0.0:
            newNode = Node()
            newNode.isLeaf = True
            newNode.value = u
            newNode.pred = np.unique(subdata[target_attr_name])
            root.children.append(newNode)
            print("New leaf node: {}".format(newNode.value))
        else:
            dummyNode = Node()
            dummyNode.value = u
            new_attrs = attrs.copy()
            new_attrs.remove(max_feat)
            child = ID3(subdata, new_attrs, target_attr_name)
            dummyNode.children.append(child)
            root.children.append(dummyNode)
    return root

def printTree(root: Node, depth=0):
    for i in range(depth):
        print("\t", end="")
    print(root.value, end="")
    if root.isLeaf:
        print(" -> ", root.pred)
    print()
    for child in root.children:
        printTree(child, depth + 1)

In [4]:
df_tennis.columns

Index(['id', 'outlook', 'temperature', 'humidity', 'wind', 'play'], dtype='object')

In [5]:
features = ['outlook', 'temperature', 'humidity', 'wind']

In [18]:
root = ID3(df_tennis, features, 'play')
printTree(root)

Uniq is 
 ['overcast' 'rainy' 'sunny'] for Attribute outlook

 Total Gain for current sample 0.9402859586706309

 Data for sub-entropy     id   outlook temperature humidity    wind play
2    3  overcast         hot     high    weak  yes
6    7  overcast        cool   normal  strong  yes
11  12  overcast        mild     high  strong  yes
12  13  overcast         hot   normal    weak  yes

 Info gain for sub entropy 0.9402859586706309

 Data for sub-entropy     id outlook temperature humidity    wind play
3    4   rainy        mild     high    weak  yes
4    5   rainy        cool   normal    weak  yes
5    6   rainy        cool   normal  strong   no
9   10   rainy        mild   normal    weak  yes
13  14   rainy        mild     high  strong   no

 Info gain for sub entropy 0.593517889222535

 Data for sub-entropy     id outlook temperature humidity    wind play
0    1   sunny         hot     high    weak   no
1    2   sunny         hot     high  strong   no
7    8   sunny        mild    