In [None]:
import numpy as np

class DecisionTreeClassifier:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.tree = self._build_tree(X, y, depth=0)

    def _build_tree(self, X, y, depth):
        num_samples, num_features = X.shape
        num_classes = len(np.unique(y))

        # Stopping criteria
        if (depth == self.max_depth) or (num_classes == 1):
            return np.bincount(y).argmax()

        # Find best split
        best_split = self._find_best_split(X, y)

        if best_split is None:
            return np.bincount(y).argmax()

        feature_idx, threshold = best_split
        left_indices = X[:, feature_idx] <= threshold
        right_indices = ~left_indices

        # Recursively build tree
        left_tree = self._build_tree(X[left_indices], y[left_indices], depth + 1)
        right_tree = self._build_tree(X[right_indices], y[right_indices], depth + 1)

        return (feature_idx, threshold, left_tree, right_tree)

    def _find_best_split(self, X, y):
        best_split = None
        best_gini = float('inf')
        num_samples, num_features = X.shape

        for feature_idx in range(num_features):
            thresholds = np.unique(X[:, feature_idx])
            for threshold in thresholds:
                left_indices = X[:, feature_idx] <= threshold
                right_indices = ~left_indices

                gini = self._calculate_gini_index(y[left_indices], y[right_indices])
                if gini < best_gini:
                    best_gini = gini
                    best_split = (feature_idx, threshold)

        return best_split

    def _calculate_gini_index(self, left_labels, right_labels):
        total_samples = len(left_labels) + len(right_labels)
        p_left = len(left_labels) / total_samples
        p_right = len(right_labels) / total_samples

        gini_left = 1 - sum([(np.sum(left_labels == c) / len(left_labels)) ** 2 for c in np.unique(left_labels)])
        gini_right = 1 - sum([(np.sum(right_labels == c) / len(right_labels)) ** 2 for c in np.unique(right_labels)])

        gini_index = p_left * gini_left + p_right * gini_right
        return gini_index

    def predict(self, X):
        predictions = np.array([self._traverse_tree(x, self.tree) for x in X])
        return predictions

    def _traverse_tree(self, x, node):
        if isinstance(node, np.int64):
            return node

        feature_idx, threshold, left_tree, right_tree = node
        if x[feature_idx] <= threshold:
            return self._traverse_tree(x, left_tree)
        else:
            return self._traverse_tree(x, right_tree)

    def print_tree(self):
        self._print_node(self.tree)

    def _print_node(self, node, depth=0):
        if isinstance(node, np.int64):
            print("  " * depth, "Class:", node)
        else:
            feature_idx, threshold, left_tree, right_tree = node
            print("  " * depth, f"Feature {feature_idx} <= {threshold}")
            print("  " * (depth + 1), "Left:")
            self._print_node(left_tree, depth + 1)
            print("  " * (depth + 1), "Right:")
            self._print_node(right_tree, depth + 1)

# Example usage
X_train = np.array([[1, 2], [2, 3], [3, 4], [4, 5]])
y_train = np.array([0, 0, 1, 1])

# Initialize and train the decision tree classifier
clf = DecisionTreeClassifier(max_depth=2)
clf.fit(X_train, y_train)

# Print decision tree structure
print("Decision Tree Structure:")
clf.print_tree()

# Predictions
X_test = np.array([[2, 2], [3, 3]])
predictions = clf.predict(X_test)
print("Predictions:", predictions)


Decision Tree Structure:
 Feature 0 <= 2
   Left:
   Class: 0
   Right:
   Class: 1
Predictions: [0 1]
