In [1]:
import numpy as np
import pandas as pd
import math

In [2]:
def gini_impurity(data, target_attr):
    values = [row[target_attr] for row in data]
    value_counts = {value: values.count(value) for value in set(values)}

    gini = 1.0
    for count in value_counts.values():
        prob = count/len(data)
        gini -= prob**2

    return gini

In [3]:
def gini_gain(data, target_attr, feature):
    total_gini = gini_impurity(data, target_attr)

    values = set(row[feature] for row in data)

    weighted_gini = 0.0
    for value in values:
        subset = [row for row in data if row[feature]==value]
        prob = len(subset)/len(data)
        weighted_gini += prob*gini_impurity(subset, target_attr)

    return total_gini - weighted_gini
    

In [4]:
def best_feature(data, target_attr, features):
    gini_gains = {feature: gini_gain(data, target_attr, feature) for feature in features}
    best_feature = max(gini_gains, key=gini_gains.get)
    return best_feature

In [5]:
def cart_tree(data, target_attr, features):
    target_values = [row[target_attr] for row in data]
    if target_values.count(target_values[0])==len(target_values):
        return target_values[0]
    
    if not features:
        return max(set(target_values), key=target_values.count)
    
    best = best_feature(data, target_attr, features)

    tree = {best:{}}
    features.remove(best)

    feature_values = set(row[best] for row in data)
    for value in feature_values:
        subset = [row for row in data if row[best]==value]
        subtree = cart_tree(subset, target_attr, features)
        tree[best][value] = subtree

    return tree

In [6]:
def predict(tree, sample):
    if not isinstance(tree, dict):
        return tree
    
    feature = next(iter(tree))
    feature_value = sample[feature]
    subtree = tree[feature][feature_value]

    if subtree is None:
        return None
    return predict(subtree, sample)

In [7]:
data = [
    {'Outlook': 'Sunny', 'Temp.': 85, 'Humidity': 85, 'Wind': 'Weak', 'Decision': 'No'},
    {'Outlook': 'Sunny', 'Temp.': 80, 'Humidity': 90, 'Wind': 'Strong', 'Decision': 'No'},
    {'Outlook': 'Overcast', 'Temp.': 83, 'Humidity': 78, 'Wind': 'Weak', 'Decision': 'Yes'},
    {'Outlook': 'Rain', 'Temp.': 70, 'Humidity': 96, 'Wind': 'Weak', 'Decision': 'Yes'},
    {'Outlook': 'Rain', 'Temp.': 68, 'Humidity': 80, 'Wind': 'Weak', 'Decision': 'Yes'},
    {'Outlook': 'Rain', 'Temp.': 65, 'Humidity': 70, 'Wind': 'Strong', 'Decision': 'No'},
    {'Outlook': 'Overcast', 'Temp.': 64, 'Humidity': 65, 'Wind': 'Strong', 'Decision': 'Yes'},
    {'Outlook': 'Sunny', 'Temp.': 72, 'Humidity': 95, 'Wind': 'Weak', 'Decision': 'No'},
    {'Outlook': 'Sunny', 'Temp.': 69, 'Humidity': 70, 'Wind': 'Weak', 'Decision': 'Yes'},
    {'Outlook': 'Rain', 'Temp.': 75, 'Humidity': 80, 'Wind': 'Weak', 'Decision': 'Yes'},
    {'Outlook': 'Sunny', 'Temp.': 75, 'Humidity': 70, 'Wind': 'Strong', 'Decision': 'Yes'},
    {'Outlook': 'Overcast', 'Temp.': 72, 'Humidity': 90, 'Wind': 'Strong', 'Decision': 'Yes'},
    {'Outlook': 'Overcast', 'Temp.': 81, 'Humidity': 75, 'Wind': 'Weak', 'Decision': 'Yes'},
    {'Outlook': 'Rain', 'Temp.': 71, 'Humidity': 80, 'Wind': 'Strong', 'Decision': 'No'}
]

target_attr = 'Decision'
features = ['Outlook', 'Temp.', 'Humidity', 'Wind']

cart_model = cart_tree(data, target_attr, features)
print(f'Decision Tree : {cart_model}')

new_sample = {'Outlook': 'Sunny', 'Temp.': 75, 'Humidity': 70, 'Wind': 'weak'}
preds = predict(cart_model, new_sample)

print(f'Prediction: {preds}')

Decision Tree : {'Temp.': {64: 'Yes', 65: 'No', 68: 'Yes', 69: 'Yes', 70: 'Yes', 71: 'No', 72: {'Outlook': {'Sunny': 'No', 'Overcast': 'Yes'}}, 75: 'Yes', 80: 'No', 81: 'Yes', 83: 'Yes', 85: 'No'}}
Prediction: Yes
