## Code and Output

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import log

In [2]:
index = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
data = pd.DataFrame(
    {
    'Weather' : pd.Series(data = ['Sunny', 'Sunny', 'Windy', 'Rainy', 'Rainy', 'Rainy', 'Windy', 'Windy', 'Windy', 'Sunny'], index = index),
    'Parents' : pd.Series(data = ['Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'No', 'Yes', 'No'], index = index),
    'Money' : pd.Series(data = ['Rich', 'Rich', 'Rich', 'Poor', 'Rich', 'Poor', 'Poor', 'Rich', 'Rich', 'Rich'], index = index),
    'Decision' : pd.Series(data = ['Cinema', 'Tennis', 'Cinema', 'Cinema', 'Stay-In', 'Cinema', 'Cinema', 'Shopping', 'Cinema', 'Tennis'], index = index)
    }
)

In [3]:
data

Unnamed: 0,Weather,Parents,Money,Decision
1,Sunny,Yes,Rich,Cinema
2,Sunny,No,Rich,Tennis
3,Windy,Yes,Rich,Cinema
4,Rainy,Yes,Poor,Cinema
5,Rainy,No,Rich,Stay-In
6,Rainy,Yes,Poor,Cinema
7,Windy,No,Poor,Cinema
8,Windy,No,Rich,Shopping
9,Windy,Yes,Rich,Cinema
10,Sunny,No,Rich,Tennis


In [4]:
class Node:
    def __init__(self, attribute, end):
        self.label = attribute
        self.children = {}
        self.end = end


In [5]:
class DecisionTree:
    def __init__(self):
        pass
        
    def p_log_p (self, p):
        if p == 0:
            return 0
        return -1 * p * (log(p) / log(2)) 
    
    def entropy(self, df):
        den = df.shape[0]
        entropy = 0
        for outcome in set(df['Decision']):
            num = df[df['Decision'] == outcome].shape[0]
            entropy += self.p_log_p(num / den)
        return entropy
    
    def info_gain(self, df, attribute):
        ig = self.entropy(df)
        den = df.shape[0]
        for val in set(df[attribute]):
            num = df[df[attribute] == val].shape[0]
            ig -= (num / den) * self.entropy(df[df[attribute] == val])
        return ig
    
    def mode(self, df):
        max_vote = -np.inf
        label = None
        for outcome in df['Decision']:
            instances = df[df['Decision'] == outcome].shape[0]
            if instances > max_vote:
                max_vote = instances
                label = outcome
        return label
    
    def predict(self, X, root):
        if root.end == True:
            return root.label
        else:
            return self.predict(X, root.children[X[root.label]])
            
    
    def fit(self, df, attributes):
        if (len(attributes) == 0):
            root = Node(self.mode(df), True)
            return root
            
        max_ig = -np.inf
        chosen = None
        for attribute in attributes:
            ig = self.info_gain(df, attribute)
            if (ig > max_ig):
                max_ig = ig
                chosen = attribute
        attributes.remove(chosen)
        if (max_ig == 0):
            root = Node(self.mode(df), True)
            return root
        else:
            root = Node(chosen, False)
            for val in set(df[chosen]):
                root.children[val] = self.fit(df[df[chosen] == val], attributes.copy())
            return root
    
        

In [6]:
attributes = ['Weather', 'Parents', 'Money']
decision_tree = DecisionTree()
root = decision_tree.fit(data, attributes.copy())

In [7]:
X_test = data.drop(columns = ['Decision'])
for i in range(0, 5):
    y_pred = decision_tree.predict(X_test.iloc[i], root)
    print(X_test.iloc[[i]])
    print('\nThe prediction is : %s\n' %y_pred)

  Weather Parents Money
1   Sunny     Yes  Rich

The prediction is : Cinema

  Weather Parents Money
2   Sunny      No  Rich

The prediction is : Tennis

  Weather Parents Money
3   Windy     Yes  Rich

The prediction is : Cinema

  Weather Parents Money
4   Rainy     Yes  Poor

The prediction is : Cinema

  Weather Parents Money
5   Rainy      No  Rich

The prediction is : Stay-In

