In [45]:
import pandas as pd
import math
import numpy as np

data = pd.read_csv("4-dataset.csv")
features = [feat for feat in data]
features.remove("answer")

class Node:
    def __init__(self):
        self.children = []
        self.value = ''
        self.isLeaf = False
        self.pred = ''
        
        
def entropy(examples):
    pos = 0.0
    neg = 0.0
    for _, row in examples.iterrows():
        if row['answer'] == 'yes':
            pos += 1
        else:
            neg += 1
            
    if pos == 0.0 or neg == 0.0:
        return 0.0
    elif pos == neg:
        return 1.0
    else:
        p = pos / (pos + neg)
        n = neg / (pos + neg)
        return -(p * math.log(p, 2) + n * math.log(n, 2))
    
    
def info(examples, attr):
    u = np.unique(examples[attr])
    gain = entropy(examples)
    for un in u:
        subdata = examples[examples[attr] == un]
        sub_e = entropy(subdata)
        gain -= (float(len(subdata)) / float(len(examples))) * sub_e
    return gain


def ID3(examples, attrs):
    root = Node()
    
    max_gain = 0
    max_feat = ''
    
    for feature in attrs:
        gain = info(examples, feature)
        if gain > max_gain:
            max_gain = gain
            max_feat = feature
    root.value = max_feat
    u = np.unique(examples[max_feat])
    for un in u:
        subdata = examples[examples[max_feat] == un]
        if entropy(subdata) == 0.0:
            newNode = Node()
            newNode.isLeaf = True
            newNode.value = un
            newNode.pred = np.unique(subdata['answer'])
            root.children.append(newNode)
        else:
            dNode = Node()
            dNode.value = un
            new_attrs = attrs.copy()
            new_attrs.remove(max_feat)
            child = ID3(subdata, new_attrs)
            dNode.children.append(child)
            root.children.append(dNode)
    return root


def printTree(root:Node, depth=0):
    for i in range(depth):
        print("\t", end = '')
    print(root.value, end = '')
    if root.isLeaf:
        print("-> ", root.pred)
    print()
    for child in root.children:
        printTree(child, depth + 1)
        
        
def classify(root:Node, new):
    for child in root.children:
        if child.value == new[root.value]:
            if child.isLeaf:
                print("Predicted label for new example\n", new, "is", child.pred)
                exit
            else:
                classify(child.children[0], new)
                
                
root = ID3(data, features)
print("Decision Tree is:")
printTree(root)
print("____________________________________________________________________________")


new = {"outlook":"overcast", "temperature":"hot", "humidity":"normal", "wind":"strong"}
classify(root, new)

new = {"outlook":"sunny", "temperature":"hot", "humidity":"high", "wind":"strong"}
classify(root, new)

Decision Tree is:
outlook
	overcast->  ['yes']

	rain
		wind
			strong->  ['no']

			weak->  ['yes']

	sunny
		humidity
			high->  ['no']

			normal->  ['yes']

____________________________________________________________________________
Predicted label for new example
 {'outlook': 'overcast', 'temperature': 'hot', 'humidity': 'normal', 'wind': 'strong'} is ['yes']
Predicted label for new example
 {'outlook': 'sunny', 'temperature': 'hot', 'humidity': 'high', 'wind': 'strong'} is ['no']
