In [9]:
import pandas as pd
import numpy as np
from math import log

In [10]:
class Node:
    def __init__(self):
        self.children = []
        self.value = ""
        self.isLeaf = False
        self.pred = ""



In [11]:
def entropy(data):
        yes = 0.0
        no = 0.0
        for _, rows in data.iterrows():
            if rows["playFootball"] == "yes":
                 yes += 1
            elif rows["playFootball"] == "no":
                no += 1
        if yes == 0.0 or no == 0.0:
            return 0
        else:
            py = yes / (yes + no)
            pn = no / (yes + no)
            return -(py * log(py, 2) + pn * log(pn, 2))   #getting the formula and log is accessed from math.log


In [12]:
def info_gain(dataset, feature):
        attributes = np.unique(dataset[feature])
        gain = entropy(dataset)
        for attr in attributes:
            subdata = dataset[dataset[feature] == attr]
            sub_e = entropy(subdata)
            gain -= (float(len(subdata)) / float(len(dataset)) * sub_e)
        return gain
    


In [13]:
def ID3(dataset, features):
        root = Node()
        max_gain = 0
        max_feature = ""
        for feature in features:
            gain = info_gain(dataset, feature)
            if gain > max_gain:
                max_gain = gain
                max_feature = feature
        root.value = max_feature
        at = np.unique(dataset[max_feature])
        for a in at:
            subdata = dataset[dataset[max_feature] == a]
            if entropy(subdata) == 0.0:
                newNode = Node()
                newNode.isLeaf = True
                newNode.value = a
                newNode.pred = np.unique(subdata["playFootball"])[0]
                root.children.append(newNode)
            else:
                dummyNode = Node()
                dummyNode.value = a
                new_attrs = features.copy()
                new_attrs.remove(max_feature)
                child = ID3(subdata, new_attrs)
                dummyNode.children.append(child)
                root.children.append(dummyNode)
        return root



In [14]:
def printTree(root: Node, depth=0):
        for i in range(depth):
            print("\t", end="")
        print(root.value, end="")
        if root.isLeaf:
            print(" -> ", root.pred)
        print()
        for child in root.children:
            printTree(child, depth + 1)


In [15]:
def classify(root: Node, new):
        for child in root.children:
            if child.value == new[root.value]:
                if child.isLeaf:
                    print("Predicted Label for new example", new, " is:", child.pred)
                    return
                else:
                    classify(child.children[0], new)



In [16]:
dataset = pd.read_csv("PlayFootball.csv")
print("The DATASET")
print(dataset)
# print(dataset.head())
features = [feat for feat in dataset.columns if feat != "playFootball"]
print(features)
root = ID3(dataset, features)
print("----------------------------")
print()
print()
print()
print("Decision Tree is:")
printTree(root)
print("----------------------------")

The DATASET
   weather temperature humidity    wind playFootball
0    sunny         hot     high    weak           no
1    sunny         hot     high  strong           no
2   cloudy         hot     high    weak          yes
3    rainy        mild     high    weak          yes
4    rainy        cool   normal    weak          yes
5    rainy        cool   normal  strong           no
6   cloudy        cool   normal  strong          yes
7    sunny        mild     high    weak           no
8    sunny        cool   normal    weak          yes
9    rainy        mild   normal    weak          yes
10   sunny        mild   normal  strong          yes
11  cloudy        mild     high  strong          yes
12  cloudy         hot   normal    weak          yes
13   rainy        mild     high  strong           no
['weather', 'temperature', 'humidity', 'wind']
----------------------------



Decision Tree is:
weather
	cloudy ->  yes

	rainy
		wind
			strong ->  no

			weak ->  yes

	sunny
		humidity
			h