In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Example dataset: Iris dataset
data = load_iris()
X = data.data
y = data.target

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Decision Tree class (already provided in the prompt)
class DecisionTree:
    def __init__(self, max_depth=3):
        self.max_depth = max_depth
        self.tree = None

    def gini_impurity(self, y):
        unique, counts = np.unique(y, return_counts=True)
        probabilities = counts / len(y)
        gini = 1 - np.sum(probabilities ** 2)
        return gini

    def split_data(self, X, y, feature_index, threshold):
        left_mask = X[:, feature_index] <= threshold
        right_mask = X[:, feature_index] > threshold
        return (X[left_mask], y[left_mask]), (X[right_mask], y[right_mask])

    def find_best_split(self, X, y):
        best_feature = None
        best_threshold = None
        best_impurity = float("inf")
        n_features = X.shape[1]

        for feature_index in range(n_features):
            thresholds = np.unique(X[:, feature_index])
            for threshold in thresholds:
                (left_X, left_y), (right_X, right_y) = self.split_data(X, y, feature_index, threshold)

                if len(left_y) == 0 or len(right_y) == 0:
                    continue

                left_impurity = self.gini_impurity(left_y)
                right_impurity = self.gini_impurity(right_y)
                weighted_impurity = (
                    len(left_y) / len(y) * left_impurity
                    + len(right_y) / len(y) * right_impurity
                )

                if weighted_impurity < best_impurity:
                    best_feature = feature_index
                    best_threshold = threshold
                    best_impurity = weighted_impurity

        return best_feature, best_threshold

    def build_tree(self, X, y, depth=0):
        if depth >= self.max_depth or len(np.unique(y)) == 1:
            return np.bincount(y).argmax()

        feature, threshold = self.find_best_split(X, y)
        if feature is None:
            return np.bincount(y).argmax()

        (left_X, left_y), (right_X, right_y) = self.split_data(X, y, feature, threshold)
        left_subtree = self.build_tree(left_X, left_y, depth + 1)
        right_subtree = self.build_tree(right_X, right_y, depth + 1)

        return {
            "feature": feature,
            "threshold": threshold,
            "left": left_subtree,
            "right": right_subtree,
        }

    def fit(self, X, y):
        self.tree = self.build_tree(X, y)

    def predict(self, X):
        def traverse_tree(x, node):
            if isinstance(node, dict):
                if x[node["feature"]] <= node["threshold"]:
                    return traverse_tree(x, node["left"])
                else:
                    return traverse_tree(x, node["right"])
            else:
                return node

        return np.array([traverse_tree(x, self.tree) for x in X])

# Instantiate and train the decision tree
model = DecisionTree(max_depth=3)
model.fit(X_train, y_train)

# Make predictions
predictions = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, predictions)
print("Predictions:", predictions)
print("Accuracy:", accuracy)

Predictions: [1 0 2 1 1 0 1 2 1 1 2 0 0 0 0 2 2 1 1 2 0 2 0 2 2 2 2 2 0 0 0 0 1 0 0 2 1
 0 0 0 2 2 1 0 0]
Accuracy: 0.9555555555555556
