In [1]:
import pandas as pd
import numpy as np
data = pd.read_csv("dataset.csv")
features = [feat for feat in data]
features.remove("answer")

class Node:
    def __init__(self):
        self.children = []
        self.value = ""
        self.isLeaf = False
        self.pred = ""

def gini_index(examples):
    pos = len(examples[examples["answer"] == "yes"])
    neg = len(examples[examples["answer"] == "no"])
    total = len(examples)
    
    if total == 0:
        return 0.0
    
    p_pos = pos / total
    p_neg = neg / total
    
    return 1 - (p_pos**2 + p_neg**2)

def info_gain_c45(examples, attr):
    uniq = np.unique(examples[attr])
    gain = gini_index(examples)
    
    for u in uniq:
        subdata = examples[examples[attr] == u]
        sub_gini = gini_index(subdata)
        gain -= (len(subdata) / len(examples)) * sub_gini
    
    return gain

def CART(examples, attrs):
    root = Node()

    min_gini = float('inf')
    best_attr = ""
    
    for feature in attrs:
        gain = info_gain_c45(examples, feature)
        
        if gain < min_gini:
            min_gini = gain
            best_attr = feature
    
    root.value = best_attr
    
    uniq = np.unique(examples[best_attr])
    
    for u in uniq:
        subdata = examples[examples[best_attr] == u]
        
        if gini_index(subdata) == 0.0:
            newNode = Node()
            newNode.isLeaf = True
            newNode.value = u
            newNode.pred = np.unique(subdata["answer"])
            root.children.append(newNode)
        else:
            dummyNode = Node()
            dummyNode.value = u
            new_attrs = attrs.copy()
            new_attrs.remove(best_attr)
            child = CART(subdata, new_attrs)
            dummyNode.children.append(child)
            root.children.append(dummyNode)

    return root

def printTreeCART(root: Node, depth=0):
    for i in range(depth):
        print("\t", end="")
    print(root.value, end="")
    if root.isLeaf:
        print(" -> ", root.pred)
    print()
    for child in root.children:
        printTreeCART(child, depth + 1)

def classifyCART(root: Node, new):
    for child in root.children:
        if child.value == new[root.value]:
            if child.isLeaf:
                print ("Predicted Label for new example", new," is:", child.pred)
                return
            else:
                classifyCART(child.children[0], new)

# Assuming "dataset.csv" is your dataset file
data = pd.read_csv("dataset.csv")
features = [feat for feat in data]
features.remove("answer")

root_cart = CART(data, features)
print("CART Decision Tree is:")
printTreeCART(root_cart)
print ("------------------")

new_example = {"outlook": "sunny", "temperature": "hot", "humidity": "normal", "wind": "strong"}
classifyCART(root_cart, new_example)


CART Decision Tree is:
temperature
	cool
		humidity
			normal
				outlook
					overcast ->  ['yes']

					rain
						wind
							strong ->  ['no']

							weak ->  ['yes']

					sunny ->  ['yes']

	hot
		humidity
			high
				wind
					strong ->  ['no']

					weak
						outlook
							overcast ->  ['yes']

							sunny ->  ['no']

			normal ->  ['yes']

	mild
		wind
			strong
				humidity
					high
						outlook
							overcast ->  ['yes']

							rain ->  ['no']

					normal ->  ['yes']

			weak
				humidity
					high
						outlook
							rain ->  ['yes']

							sunny ->  ['no']

					normal ->  ['yes']

------------------
Predicted Label for new example {'outlook': 'sunny', 'temperature': 'hot', 'humidity': 'normal', 'wind': 'strong'}  is: ['yes']
