In [1]:
import numpy as np

In [None]:
class DecisionTreeClassifier:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree = None

    def fit(self, X_train, y_train):
        self.tree = self._build_tree(X_train, y_train, 0)

    def predict(self, X_test):
        return np.array([self._predict_one(x, self.tree) for x in X_test])

    def _best_split(self, X, y):
        best_gini = float('inf')
        best_split = None
        n_samples, n_features = X.shape

        for feature_idx in range(n_features):
            thresholds = np.unique(X[:, feature_idx])
            for threshold in thresholds:
                left_mask = X[:, feature_idx] <= threshold
                right_mask = X[:, feature_idx] > threshold
                left_y, right_y = y[left_mask], y[right_mask]
                gini = self._gini_split(left_y, right_y)
                
                if gini < best_gini:
                    best_gini = gini
                    best_split = {
                        'feature_idx': feature_idx,
                        'threshold': threshold,
                        'left_y': left_y,
                        'right_y': right_y,
                        'left_mask': left_mask,
                        'right_mask': right_mask
                    }

        return best_split

    def _gini(self, y):
        m = len(y)
        if m == 0:
            return 0
        return 1 - sum((np.sum(y == c) / m) ** 2 for c in np.unique(y))

    def _gini_split(self, left_y, right_y):
        left_size = len(left_y)
        right_size = len(right_y)
        m = left_size + right_size
        return (left_size / m) * self._gini(left_y) + (right_size / m) * self._gini(right_y)

    def _build_tree(self, X, y, depth):
        num_samples_per_class = [np.sum(y == i) for i in np.unique(y)]
        predicted_class = np.argmax(num_samples_per_class)

        node = {
            'predicted_class': predicted_class
        }

        if depth < self.max_depth:
            split = self._best_split(X, y)
            if split is not None:
                node['feature_idx'] = split['feature_idx']
                node['threshold'] = split['threshold']
                node['left'] = self._build_tree(X[split['left_mask']], split['left_y'], depth + 1)
                node['right'] = self._build_tree(X[split['right_mask']], split['right_y'], depth + 1)

        return node

    def _predict_one(self, x, tree):
        if 'feature_idx' not in tree:
            return tree['predicted_class']
        
        feature_val = x[tree['feature_idx']]
        branch = tree['left'] if feature_val <= tree['threshold'] else tree['right']
        return self._predict_one(x, branch)

In [None]:
tree = DecisionTreeClassifier(max_depth=3)

np.random.seed(240)
num_samples = 100
X_train = np.random.rand(num_samples, 2)
y_train = (np.random.rand(num_samples) > 0.5).astype(int)
X_test = np.array([[0.5, 0.5]])  # Single test instance

tree.fit(X_train, y_train)
predictions = tree.predict(X_test)
print(f"Predicted class label for X_test: {predictions[0]}")