In [1]:
import numpy as np

class DecisionTreeClassifier:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
    
    def fit(self, X, y):
        self.tree = self.build_tree(X, y)
    
    def predict(self, X):
        predictions = []
        for sample in X:
            prediction = self.traverse_tree(sample, self.tree)
            predictions.append(prediction)
        return np.array(predictions)
    
    def build_tree(self, X, y, depth=0):
        if depth == self.max_depth or len(set(y)) == 1:
            # Create a leaf node
            return self.create_leaf_node(y)
        
        best_feature, best_split_value = self.find_best_split(X, y)
        if best_feature is None or best_split_value is None:
            # Create a leaf node
            return self.create_leaf_node(y)
        
        left_indices = X[:, best_feature] < best_split_value
        right_indices = ~left_indices
        
        left_subtree = self.build_tree(X[left_indices], y[left_indices], depth + 1)
        right_subtree = self.build_tree(X[right_indices], y[right_indices], depth + 1)
        
        return {
            'feature': best_feature,
            'split_value': best_split_value,
            'left': left_subtree,
            'right': right_subtree
        }
    
    def find_best_split(self, X, y):
        best_feature = None
        best_split_value = None
        best_gini = float('inf')
        
        for feature in range(X.shape[1]):
            unique_values = np.unique(X[:, feature])
            for value in unique_values:
                left_indices = X[:, feature] < value
                right_indices = ~left_indices
                
                gini = self.calculate_gini_index(y[left_indices], y[right_indices])
                if gini < best_gini:
                    best_gini = gini
                    best_feature = feature
                    best_split_value = value
        
        return best_feature, best_split_value
    
    def calculate_gini_index(self, left_labels, right_labels):
        total_samples = len(left_labels) + len(right_labels)
        left_weight = len(left_labels) / total_samples
        right_weight = len(right_labels) / total_samples
        
        gini_left = 1 - np.sum(np.square(np.bincount(left_labels) / len(left_labels)))
        gini_right = 1 - np.sum(np.square(np.bincount(right_labels) / len(right_labels)))
        
        gini_index = left_weight * gini_left + right_weight * gini_right
        return gini_index
    
    def create_leaf_node(self, labels):
        return {'leaf': True, 'class': np.bincount(labels).argmax()}
    
    def traverse_tree(self, sample, node):
        if node['leaf']:
            return node['class']
        
        if sample[node['feature']] < node['split_value']:
            return self.traverse_tree(sample, node['left'])
        else:
            return self.traverse_tree(sample, node['right'])
