<a href="https://colab.research.google.com/github/shr968/marvel/blob/main/ID3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import pandas as pd
from collections import Counter

def entropy(y):
    values, counts = np.unique(y, return_counts=True)
    probs = counts / counts.sum()
    return -np.sum(probs * np.log2(probs))

def split_dataset(X, y, feature, value):
    mask = X[:, feature] == value
    return X[mask], y[mask]

def best_attribute(X, y):
    base_entropy = entropy(y)
    best_gain = 0
    best_feature = -1

    for feature in range(X.shape[1]):
        values = np.unique(X[:, feature])
        new_entropy = sum(
            (len(split_dataset(X, y, feature, v)[1]) / len(y)) * entropy(split_dataset(X, y, feature, v)[1])
            for v in values
        )
        info_gain = base_entropy - new_entropy
        if info_gain > best_gain:
            best_gain = info_gain
            best_feature = feature

    return best_feature

class DecisionTree:
    def __init__(self):
        self.tree = None

    def fit(self, X, y):
        self.tree = self._build_tree(X, y)

    def _build_tree(self, X, y):
        if len(set(y)) == 1:
            return y[0]
        if X.shape[1] == 0:
            return Counter(y).most_common(1)[0][0]

        best_feature = best_attribute(X, y)
        tree = {best_feature: {}}

        for value in np.unique(X[:, best_feature]):
            X_sub, y_sub = split_dataset(X, y, best_feature, value)
            tree[best_feature][value] = self._build_tree(
                np.delete(X_sub, best_feature, axis=1), y_sub
            )

        return tree

data = pd.DataFrame({
    'Outlook': ['Sunny', 'Overcast', 'Rain', 'Rain', 'Overcast'],
    'Temperature': ['Hot', 'Hot', 'Mild', 'Cool', 'Cool'],
    'PlayTennis': ['No', 'Yes', 'Yes', 'No', 'Yes']
})

data = data.apply(lambda col: pd.factorize(col)[0])
X, y = data.iloc[:, :-1].values, data.iloc[:, -1].values

dt = DecisionTree()
dt.fit(X, y)
import json

def convert_to_python_types(obj):
    """ Recursively convert np.int64 keys and values to standard Python int. """
    if isinstance(obj, dict):
        return {int(k): convert_to_python_types(v) for k, v in obj.items()}
    elif isinstance(obj, np.integer):
        return int(obj)
    else:
        return obj

tree_python = convert_to_python_types(dt.tree)
print("Decision Tree:", json.dumps(tree_python, indent=2))



Decision Tree: {
  "0": {
    "0": 0,
    "1": 1,
    "2": {
      "0": {
        "1": 1,
        "2": 0
      }
    }
  }
}
