In [None]:
import numpy as np

# Define the Gini impurity calculation
def gini_impurity(groups, classes):
    n_instances = float(sum(len(group) for group in groups))
    gini = 0.0
    for group in groups:
        size = float(len(group))
        if size == 0:
            continue
        score = 0.0
        for class_val in classes:
            proportion = [row[-1] for row in group].count(class_val) / size
            score += proportion * proportion
        gini += (1.0 - score) * (size / n_instances)
    return gini

# Split dataset
def test_split(index, value, dataset):
    left, right = [], []
    for row in dataset:
        if row[index] < value:
            left.append(row)
        else:
            right.append(row)
    return left, right

# Select the best split
def get_split(dataset):
    class_values = list(set(row[-1] for row in dataset))
    best_index, best_value, best_score, best_groups = 999, 999, 999, None
    for index in range(len(dataset[0]) - 1):
        for row in dataset:
            groups = test_split(index, row[index], dataset)
            gini = gini_impurity(groups, class_values)
            if gini < best_score:
                best_index, best_value, best_score, best_groups = index, row[index], gini, groups
    return {'index': best_index, 'value': best_value, 'groups': best_groups}

# Create a terminal node
def to_terminal(group):
    outcomes = [row[-1] for row in group]
    return max(set(outcomes), key=outcomes.count)

# Recursive splitting
def split(node, max_depth, min_size, depth):
    left, right = node['groups']
    del(node['groups'])
    if not left or not right:
        node['left'] = node['right'] = to_terminal(left + right)
        return
    if depth >= max_depth:
        node['left'], node['right'] = to_terminal(left), to_terminal(right)
        return
    if len(left) <= min_size:
        node['left'] = to_terminal(left)
    else:
        node['left'] = get_split(left)
        split(node['left'], max_depth, min_size, depth+1)
    if len(right) <= min_size:
        node['right'] = to_terminal(right)
    else:
        node['right'] = get_split(right)
        split(node['right'], max_depth, min_size, depth+1)

# Build a decision tree
def build_tree(train, max_depth, min_size):
    root = get_split(train)
    split(root, max_depth, min_size, 1)
    return root

# Make a prediction
def predict(node, row):
    if row[node['index']] < node['value']:
        if isinstance(node['left'], dict):
            return predict(node['left'], row)
        else:
            return node['left']
    else:
        if isinstance(node['right'], dict):
            return predict(node['right'], row)
        else:
            return node['right']

# Example usage
if __name__ == "__main__":
    dataset = [
        [2.771244718, 1.784783929, 0],
        [1.728571309, 1.169761413, 0],
        [3.678319846, 2.81281357, 0],
        [3.961043357, 2.61995032, 0],
        [2.999208922, 2.209014212, 0],
        [7.497545867, 3.162953546, 1],
        [9.00220326, 3.339047188, 1],
        [7.444542326, 0.476683375, 1],
        [10.12493903, 3.234550982, 1],
        [6.642287351, 3.319983761, 1]
    ]
    max_depth = 3
    min_size = 1
    tree = build_tree(dataset, max_depth, min_size)
    for row in dataset:
        prediction = predict(tree, row)
        print(f"Expected={row[-1]}, Predicted={prediction}")


In [None]:
class DecisionTree:
    def __init__(self, max_depth=None):
        self.max_depth = max_depth
        self.tree = None

    def gini_impurity(self, groups, classes):
        # Calculate the Gini Impurity for a split
        total_samples = sum([len(group) for group in groups])
        gini = 0.0
        for group in groups:
            size = len(group)
            if size == 0:  # Avoid division by zero
                continue
            score = 0.0
            for class_val in classes:
                proportion = [row[-1] for row in group].count(class_val) / size
                score += proportion * proportion
            gini += (1.0 - score) * (size / total_samples)
        return gini

    def split(self, index, value, dataset):
        # Split the dataset into two groups
        left, right = [], []
        for row in dataset:
            if row[index] < value:
                left.append(row)
            else:
                right.append(row)
        return left, right

    def best_split(self, dataset):
        # Find the best split point
        class_values = list(set(row[-1] for row in dataset))
        best_index, best_value, best_score, best_groups = 999, 999, 999, None
        for index in range(len(dataset[0]) - 1):
            for row in dataset:
                groups = self.split(index, row[index], dataset)
                gini = self.gini_impurity(groups, class_values)
                if gini < best_score:
                    best_index, best_value, best_score, best_groups = index, row[index], gini, groups
        return {'index': best_index, 'value': best_value, 'groups': best_groups}

    def terminal_node(self, group):
        # Return the most common class in the group
        outcomes = [row[-1] for row in group]
        return max(set(outcomes), key=outcomes.count)

    def build_tree(self, node, depth):
        # Recursively build the tree
        left, right = node['groups']
        del(node['groups'])
        if not left or not right:  # If no split
            node['left'] = node['right'] = self.terminal_node(left + right)
            return
        if self.max_depth is not None and depth >= self.max_depth:  # Max depth check
            node['left'], node['right'] = self.terminal_node(left), self.terminal_node(right)
            return
        # Create left and right branches
        node['left'] = self.best_split(left)
        self.build_tree(node['left'], depth + 1)
        node['right'] = self.best_split(right)
        self.build_tree(node['right'], depth + 1)

    def fit(self, dataset):
        # Build the tree
        root = self.best_split(dataset)
        self.build_tree(root, 1)
        self.tree = root

    def predict_row(self, node, row):
        # Make a prediction with a single row
        if row[node['index']] < node['value']:
            if isinstance(node['left'], dict):
                return self.predict_row(node['left'], row)
            else:
                return node['left']
        else:
            if isinstance(node['right'], dict):
                return self.predict_row(node['right'], row)
            else:
                return node['right']

    def predict(self, dataset):
        # Make predictions for a dataset
        return [self.predict_row(self.tree, row) for row in dataset]


# Example Usage
if __name__ == "__main__":
    # Dataset: [feature1, feature2, ..., label]
    dataset = [
        [2.771244718, 1.784783929, 0],
        [1.728571309, 1.169761413, 0],
        [3.678319846, 2.81281357, 0],
        [3.961043357, 2.61995032, 0],
        [2.999208922, 2.209014212, 0],
        [7.497545867, 3.162953546, 1],
        [9.00220326, 3.339047188, 1],
        [7.444542326, 0.476683375, 1],
        [10.12493903, 3.234550982, 1],
        [6.642287351, 3.319983761, 1]
    ]

    tree = DecisionTree(max_depth=3)
    tree.fit(dataset)

    # Test predictions
    for row in dataset:
        prediction = tree.predict([row])
        print(f"Expected: {row[-1]}, Predicted: {prediction[0]}")
