In [61]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
# Convert categorical data to numerical labels
from sklearn.preprocessing import LabelEncoder


In [62]:
class DecisionTree:
    def __init__(self):
        self.tree = None

    def entropy(self, y):
        _, counts = np.unique(y, return_counts=True)
        probabilities = counts / len(y)
        return -np.sum(probabilities * np.log2(probabilities))

    def information_gain(self, X, y, feature_index, threshold):
        left_indices = X[:, feature_index] <= threshold
        right_indices = ~left_indices
        n = len(y)

        left_entropy = self.entropy(y[left_indices])
        right_entropy = self.entropy(y[right_indices])
        total_entropy = self.entropy(y)

        left_weight = np.sum(left_indices) / n
        right_weight = np.sum(right_indices) / n

        return total_entropy - (left_weight * left_entropy + right_weight * right_entropy)

    def find_best_split(self, X, y):
        best_information_gain = 0
        best_feature_index = None
        best_threshold = None

        for feature_index in range(X.shape[1]):
            thresholds = np.unique(X[:, feature_index])
            for threshold in thresholds:
                ig = self.information_gain(X, y, feature_index, threshold)
                if ig > best_information_gain:
                    best_information_gain = ig
                    best_feature_index = feature_index
                    best_threshold = threshold

        return best_feature_index, best_threshold

    def fit(self, X, y):
        def build_tree(X, y):
            if len(np.unique(y)) == 1:
                return {'class': y[0]}
            if len(X) == 0:
                return {'class': np.argmax(np.bincount(y))}

            feature_index, threshold = self.find_best_split(X, y)
            if feature_index is None:
                return {'class': np.argmax(np.bincount(y))}

            left_indices = X[:, feature_index] <= threshold
            right_indices = ~left_indices

            left_subtree = build_tree(X[left_indices], y[left_indices])
            right_subtree = build_tree(X[right_indices], y[right_indices])

            return {'feature_index': feature_index, 'threshold': threshold,
                    'left': left_subtree, 'right': right_subtree}

        self.tree = build_tree(X, y)

    def predict_instance(self, x, tree):
        if 'class' in tree:
            return tree['class']
        if isinstance(tree['threshold'], str):
            tree['threshold'] = float(tree['threshold'])
        if x[tree['feature_index']] <= tree['threshold']:
            return self.predict_instance(x, tree['left'])
        else:
            return self.predict_instance(x, tree['right'])
        
    def predict(self, X):
        predictions = []
        for x in X:
            predictions.append(self.predict_instance(x, self.tree))
        return np.array(predictions)

In [63]:
# Example usage with tennis dataset
X = np.array([
    ['Sunny', 'Hot', 'High', 'Weak'],
    ['Sunny', 'Hot', 'High', 'Strong'],
    ['Overcast', 'Hot', 'High', 'Weak'],
    ['Rain', 'Mild', 'High', 'Weak'],
    ['Rain', 'Cool', 'Normal', 'Weak'],
    ['Rain', 'Cool', 'Normal', 'Strong'],
    ['Overcast', 'Cool', 'Normal', 'Strong'],
    ['Sunny', 'Mild', 'High', 'Weak'],
    ['Sunny', 'Cool', 'Normal', 'Weak'],
    ['Rain', 'Mild', 'Normal', 'Weak'],
    ['Sunny', 'Mild', 'Normal', 'Strong'],
    ['Overcast', 'Mild', 'High', 'Strong'],
    ['Overcast', 'Hot', 'Normal', 'Weak'],
    ['Rain', 'Mild', 'High', 'Strong']
])


y = np.array(['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No'])


# Label encode categorical features
label_encoders = []
for i in range(X.shape[1]):
    encoder = LabelEncoder()
    X[:, i] = encoder.fit_transform(X[:, i])
    label_encoders.append(encoder)

# Ensure all features are of numeric data types
X = X.astype(float)

# Define mesh grid
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), np.linspace(y_min, y_max, 100))
