In [8]:
import pandas as pd
import math
from collections import Counter


def load_data(fp):
    return pd.read_excel(fp)


def entropy(data):
    n = len(data)
    if n == 0:
        return 0
    cc = Counter(data)
    probs = [count / n for count in cc.values()]
    return -sum(p * math.log2(p) for p in probs if p > 0)


def info_gain(data, att, tar):
    oe = entropy(data[tar])  
    values = data[att].unique()
    we = 0
    for v in values:
        subset = data[data[att] == v]
        we += (len(subset) / len(data)) * entropy(subset[tar])
    return oe - we


def gini_index(data, tar):
    n = len(data)
    if n == 0:
        return 0
    cc = Counter(data[tar])
    probs = [count / n for count in cc.values()]
    return 1 - sum(p**2 for p in probs)


def gini_gain(data, att, tar):
    oe = gini_index(data, tar)
    values = data[att].unique()
    we = 0
    for v in values:
        subset = data[data[att] == v]
        we += (len(subset) / len(data)) * gini_index(subset, tar)
    return oe - we


def best_att_id3(data, att, tar):
    bg = -1
    ba = None
    for a in att:
        gain = info_gain(data, a, tar) 
        if gain > bg:
            bg = gain
            ba = a
    return ba


def best_att_cart(data, att, tar):
    bg = -1
    ba = None
    for a in att:
        gain = gini_gain(data, a, tar)  
        if gain > bg:
            bg = gain
            ba = a
    return ba


def build_dt_id3(data, att, tar):
    if len(data) == 0:
        return None
    if len(set(data[tar])) == 1:
        return data[tar].iloc[0] 
   
    ba = best_att_id3(data, att, tar)  
    if ba is None:
        raise ValueError("No valid attribute selected.")
   
    tree = {ba: {}}
    att_val = data[ba].unique()
   
    for v in att_val:
        s_data = data[data[ba] == v]
        s_att = [a for a in att if a != ba]
        subtree = build_dt_id3(s_data, s_att, tar)  
        tree[ba][v] = subtree
   
    return tree


def build_dt_cart(data, att, tar):
    if len(data) == 0:
        return None
    if len(set(data[tar])) == 1:
        return data[tar].iloc[0]  
   
    ba = best_att_cart(data, att, tar)  
    if ba is None:
        raise ValueError("No valid attribute selected.")
   
    tree = {ba: {}}
    att_val = data[ba].unique()
   
    if len(att_val) > 2:  
        att_val = [att_val[0], att_val[1]]  

    for v in att_val:
        s_data = data[data[ba] == v]
        s_att = [a for a in att if a != ba]
        subtree = build_dt_cart(s_data, s_att, tar)  
        tree[ba][v] = subtree
   
    return tree


dataset = pd.read_csv('C:\\Users\\2022503013\\Downloads\\data.csv')

attr = list(dataset.columns)
target = 'buys_computer'
attr.remove(target)



tree_id3 = build_dt_id3(df, attr, target)
print("Decision Tree Using ID3:")
pprint(tree_id3)


tree_cart = build_dt_cart(df, att, tar)
print("Decision Tree Using CART:")
pprint(tree_cart)

Decision Tree Using ID3:
{'age': {'middle_aged': 'yes',
         'senior': {'credit_rating': {'excellent': 'no', 'fair': 'yes'}},
         'youth': {'student': {'no': 'no', 'yes': 'yes'}}}}
Decision Tree Using CART:
{'age': {'middle_aged': 'yes',
         'youth': {'student': {'no': 'no', 'yes': 'yes'}}}}
