In [3]:
import numpy as np
from collections import Counter

In [25]:
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

    def is_leaf(self):
        return self.value is not None

class dt:
    def __init__(self, max_depth=10, min_splits=2):
        self.max_depth = max_depth
        self.min_splits = min_splits
        self.root = None

    def fit(self, X, y):
        self.root = self._grow_tree(X, y)

    def _grow_tree(self, X, y, depth=0):
        num_samples, num_features = X.shape
        num_labels = len(np.unique(y))

        if depth >= self.max_depth or num_labels == 1 or num_samples < self.min_splits:
            leaf_val = self._majority_class(y)
            return Node(value=leaf_val)

        best_feat, best_thresh = self._best_split(X, y, num_features)
        if best_feat is None:
            return Node(value=self._majority_class(y))

        indices_left = X[:, best_feat] <= best_thresh
        X_left, y_left = X[indices_left], y[indices_left]
        X_right, y_right = X[~indices_left], y[~indices_left]

        if len(y_left) == 0 or len(y_right) == 0:
            return Node(value=self._majority_class(y))

        left_child = self._grow_tree(X_left, y_left, depth + 1)
        right_child = self._grow_tree(X_right, y_right, depth + 1)
        return Node(feature=best_feat, threshold=best_thresh, left=left_child, right=right_child)

    def _best_split(self, X, y, num_feat):
        best_gini = float('inf')
        best_feature, best_threshold = None, None

        for feat_index in range(num_feat):
            thresholds = np.unique(X[:, feat_index])
            for threshold in thresholds:
                left_indices = X[:, feat_index] < threshold
                right_indices = ~left_indices

                if len(y[left_indices]) == 0 or len(y[right_indices]) == 0:
                    continue

                gini = self._gini_split(y[left_indices], y[right_indices])
                if gini < best_gini:
                    best_gini = gini
                    best_feature = feat_index
                    best_threshold = threshold

        return best_feature, best_threshold

    def _gini_split(self, y_left, y_right):
        def gini(y):
            class_counts = np.bincount(y)
            probabilities = class_counts / len(y)
            return 1.0 - np.sum(probabilities ** 2)

        n = len(y_left) + len(y_right)
        gini_left = gini(y_left)
        gini_right = gini(y_right)
        weighted_gini = (len(y_left) / n) * gini_left + (len(y_right) / n) * gini_right
        return weighted_gini

    def _majority_class(self, y):
        most_common = Counter(y).most_common(1)[0][0]
        return most_common

    def predict(self, X):
        return np.array([self._predict(inputs, self.root) for inputs in X])

    def _predict(self, x, node):
        while not node.is_leaf():
            if x[node.feature] <= node.threshold:
                node = node.left
            else:
                node = node.right
        return node.value

In [27]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset
X, y = load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train tree
tree = dt(max_depth=5)
tree.fit(X_train, y_train)

# Evaluate
preds = tree.predict(X_test)
print("Accuracy:", accuracy_score(y_test, preds))

Accuracy: 1.0
