In [1]:
"""
performs the id3 algorithm on the dataset

Attributes:
Attend;Hear;Coverage;Uncomfortable;TakeNotes

values:
no;no;no;yes;no
yes;no;no;no;no
yes;yes;yes;yes;yes
yes;yes;no;yes;yes
yes;no;yes;no;yes
no;no;yes;no;yes
no;yes;no;yes;no
yes;yes;yes;no;yes
"""

'\nperforms the id3 algorithm on the dataset\n\nAttributes:\nAttend;Hear;Coverage;Uncomfortable;TakeNotes\n\nvalues:\nno;no;no;yes;no\nyes;no;no;no;no\nyes;yes;yes;yes;yes\nyes;yes;no;yes;yes\nyes;no;yes;no;yes\nno;no;yes;no;yes\nno;yes;no;yes;no\nyes;yes;yes;no;yes\n'

In [2]:
from math import log2
from collections import defaultdict

def B(q):
    if q==0 or q==1:
        return 0
    else:
        return -(q*log2(q)+(1-q)*log2(1-q))

def entropy(examples, target_attr):
    value_freq = {}
    data_entropy = 0.0

    # Calculate the frequency of each value in the target attribute
    for record in examples:
        if record[target_attr] in value_freq:
            value_freq[record[target_attr]] += 1.0
        else:
            value_freq[record[target_attr]] = 1.0

    # Calculate the entropy of the data for the target attribute
    for freq in value_freq.values():
        data_entropy += (-freq/len(examples)) * log2(freq/len(examples))

    return data_entropy

def information_gain(examples, attr, target_attr):
    val_freq = {}
    subset_entropy = 0.0

    # Calculate the frequency of each value in the attribute
    for record in examples:
        if record[attr] in val_freq:
            val_freq[record[attr]] += 1.0
        else:
            val_freq[record[attr]] = 1.0

    # Calculate the sum of the entropy for each subset of records weighted by their probability
    for value in val_freq.keys():
        val_prob = val_freq[value] / sum(val_freq.values())
        data_subset = [record for record in examples if record[attr] == value]
        subset_entropy += val_prob * entropy(data_subset, target_attr)

    # Subtract the entropy of the chosen attribute from the entropy of the whole data set
    return (entropy(examples, target_attr) - subset_entropy)


def id3(examples, attributes, target_attr, default_class=None):
    # Check if the dataset is empty or attributes are exhausted
    if not examples:
        return default_class
    elif len(attributes) == 0:
        return majority_value(examples, target_attr)
    else:
        # Find the attribute that maximizes the information gain
        gains = [(information_gain(examples, attr, target_attr), attr) for attr in attributes]
        max_gain, best_attr = max(gains, key=lambda x: x[0])

        # Create a new decision tree/node with the best attribute
        if max_gain == 0:
            return majority_value(examples, target_attr)
        
        tree = {best_attr: {}}
        # Remove the best attribute from the list of attributes
        new_attrs = [attr for attr in attributes if attr != best_attr]
        
        # Grow the tree
        for attr_val, subset in split_data(examples, best_attr):
            subtree = id3(subset, new_attrs, target_attr, default_class)
            tree[best_attr][attr_val] = subtree

        return tree
    
    
def majority_value(examples, target_attr):
    # Return the most common output value among a set of examples, mostly used as a tie-breaker
    values = [record[target_attr] for record in examples]
    return max(set(values), key=values.count)

def split_data(examples, attr):
    # Splits a dataset into subsets based on all values of an attribute
    subsets = defaultdict(list)
    for record in examples:
        subsets[record[attr]].append(record)
    return subsets.items()



In [3]:
"""Attributes:
Attend;Hear;Coverage;Uncomfortable;TakeNotes

values:
no;no;no;yes;no
yes;no;no;no;no
yes;yes;yes;yes;yes
yes;yes;no;yes;yes
yes;no;yes;no;yes
no;no;yes;no;yes
no;yes;no;yes;no
yes;yes;yes;no;yes"""

data = [
    {'Attend': 'no', 'Hear': 'no', 'Coverage': 'no', 'Uncomfortable': 'yes', 'TakeNotes': 'no'},
    {'Attend': 'yes', 'Hear': 'no', 'Coverage': 'no', 'Uncomfortable': 'no', 'TakeNotes': 'no'},
    {'Attend': 'yes', 'Hear': 'yes', 'Coverage': 'yes', 'Uncomfortable': 'yes', 'TakeNotes': 'yes'},
    {'Attend': 'yes', 'Hear': 'yes', 'Coverage': 'no', 'Uncomfortable': 'yes', 'TakeNotes': 'yes'},
    {'Attend': 'yes', 'Hear': 'no', 'Coverage': 'yes', 'Uncomfortable': 'no', 'TakeNotes': 'yes'},
    {'Attend': 'no', 'Hear': 'no', 'Coverage': 'yes', 'Uncomfortable': 'no', 'TakeNotes': 'yes'},
    {'Attend': 'no', 'Hear': 'yes', 'Coverage': 'no', 'Uncomfortable': 'yes', 'TakeNotes': 'no'},
    {'Attend': 'yes', 'Hear': 'yes', 'Coverage': 'yes', 'Uncomfortable': 'no', 'TakeNotes': 'yes'},
]

attributes = ['Attend', 'Hear', 'Coverage', 'Uncomfortable']
target_attr = 'TakeNotes'

# Build the tree
decision_tree = id3(data, attributes, target_attr)

print(decision_tree)

{'Coverage': {'no': {'Attend': {'no': 'no', 'yes': {'Hear': {'no': 'no', 'yes': 'yes'}}}}, 'yes': 'yes'}}


In [4]:
def draw_tree(tree, indent=''):
    for node in tree:
        print(indent + node)
        if isinstance(tree[node], dict):
            draw_tree(tree[node], indent + '  ')
        else:
            print(indent + '  ' + tree[node])
            
draw_tree(decision_tree)

Coverage
  no
    Attend
      no
        no
      yes
        Hear
          no
            no
          yes
            yes
  yes
    yes


In [9]:
""" 
data:
0,0,0,0,0
0,1,1,1,0
0,0,1,0,0
1,0,1,1,1
1,1,1,0,1
0,0,1,0,0
1,1,1,1,1
0,0,0,0,0
0,0,1,0,0
1,0,0,0,0
"""

data = [
    {'Class': 'no',  'Food': 'no',  'Busy': 'no',  'Motivated': 'no',  'Go to Gym': 'no'},
    {'Class': 'no',  'Food': 'yes', 'Busy': 'no',  'Motivated': 'no',  'Go to Gym': 'no'},
    {'Class': 'no',  'Food': 'no',  'Busy': 'yes', 'Motivated': 'no',  'Go to Gym': 'no'},
    {'Class': 'yes', 'Food': 'no',  'Busy': 'yes', 'Motivated': 'yes', 'Go to Gym': 'yes'},
    {'Class': 'yes', 'Food': 'yes', 'Busy': 'yes', 'Motivated': 'no',  'Go to Gym': 'yes'},
    {'Class': 'no',  'Food': 'no',  'Busy': 'yes', 'Motivated': 'no',  'Go to Gym': 'no'},
    {'Class': 'yes', 'Food': 'yes', 'Busy': 'yes', 'Motivated': 'yes', 'Go to Gym': 'yes'},
    {'Class': 'no',  'Food': 'no',  'Busy': 'no',  'Motivated': 'no',  'Go to Gym': 'no'},
    {'Class': 'no',  'Food': 'no',  'Busy': 'yes', 'Motivated': 'no',  'Go to Gym': 'no'},
    {'Class': 'yes', 'Food': 'no',  'Busy': 'no',  'Motivated': 'no',  'Go to Gym': 'no'},
]

attributes = ['Class', 'Food', 'Busy', 'Motivated']
target_attr = 'Go to Gym'

# Build the tree
decision_tree = id3(data, attributes, target_attr)

print(decision_tree)

draw_tree(decision_tree)

{'Class': {'no': 'no', 'yes': {'Busy': {'yes': 'yes', 'no': 'no'}}}}
Class
  no
    no
  yes
    Busy
      yes
        yes
      no
        no
