<a href="https://colab.research.google.com/github/tejasmaskar09/bml-exp/blob/main/decison_tree_exp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
from collections import Counter

# Entropy Calculation
def entropy(y):
    counts = Counter(y)
    total = len(y)
    return -sum((count/total) * np.log2(count/total) for count in counts.values())

# Split Dataset
def split_dataset(X, y, feature, value):
    mask = X[feature] == value
    return X[mask].drop(columns=[feature]), y[mask]

# ID3 Recursive Algorithm
def id3(X, y, features):
    if len(set(y)) == 1:
        return y.iloc[0]
    if len(features) == 0:
        return Counter(y).most_common(1)[0][0]

    base_entropy = entropy(y)
    best_info_gain = -1
    best_feature = None

    for feature in features:
        values = X[feature].unique()
        new_entropy = 0
        for value in values:
            sub_X, sub_y = split_dataset(X, y, feature, value)
            prob = len(sub_y) / len(y)
            new_entropy += prob * entropy(sub_y)
        info_gain = base_entropy - new_entropy
        if info_gain > best_info_gain:
            best_info_gain = info_gain
            best_feature = feature

    if best_feature is None:
        return Counter(y).most_common(1)[0][0]

    tree = {best_feature: {}}
    for value in X[best_feature].unique():
        sub_X, sub_y = split_dataset(X, y, best_feature, value)
        tree[best_feature][value] = id3(sub_X, sub_y, [f for f in features if f != best_feature])
    return tree

# Sample Dataset
data = {
    'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny'],
    'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Mild', 'Mild'],
    'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'High', 'Normal'],
    'Wind': ['Weak', 'Strong', 'Weak', 'Weak', 'Weak', 'Strong', 'Strong', 'Weak'],
    'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes']
}

df = pd.DataFrame(data)
X = df.drop(columns=['PlayTennis'])
y = df['PlayTennis']

# Build the tree
decision_tree = id3(X, y, list(X.columns))
print("Decision Tree (Custom):")
print(decision_tree)


Decision Tree (Custom):
{'Temperature': {'Hot': {'Outlook': {'Sunny': 'No', 'Overcast': 'Yes'}}, 'Mild': 'Yes', 'Cool': {'Wind': {'Weak': 'Yes', 'Strong': 'No'}}}}
