In [2]:
import numpy as np
import pandas as pd

# Function to calculate entropy
def entropy(y):
    classes, counts = np.unique(y, return_counts=True)
    probabilities = counts / len(y)
    entropy = -np.sum(probabilities * np.log2(probabilities))
    return entropy

# Function to calculate information gain
def information_gain(X, y, feature_index):
    parent_entropy = entropy(y)
    values, counts = np.unique(X[:, feature_index], return_counts=True)

    weighted_entropy = 0
    for i in range(len(values)):
        subset_y = y[X[:, feature_index] == values[i]]
        subset_entropy = entropy(subset_y)
        weighted_entropy += (counts[i] / np.sum(counts)) * subset_entropy

    gain = parent_entropy - weighted_entropy
    return gain

# Function to build the decision tree using ID3
def id3(X, y, features, depth=0):
    # Base cases
    if len(np.unique(y)) == 1:
        return np.unique(y)[0]

    if len(features) == 0:
        return np.bincount(y).argmax()

    # Calculate information gain for all features
    gains = [information_gain(X, y, i) for i in range(len(features))]
    best_feature_index = np.argmax(gains)
    best_feature = features[best_feature_index]

    tree = {best_feature: {}}

    # Split dataset by the best feature
    feature_values = np.unique(X[:, best_feature_index])
    for value in feature_values:
        subset_X = X[X[:, best_feature_index] == value]
        subset_y = y[X[:, best_feature_index] == value]

        if len(subset_y) == 0:
            tree[best_feature][value] = np.bincount(y).argmax()
        else:
            # Recursively build the tree
            new_features = features[:best_feature_index] + features[best_feature_index + 1:]
            tree[best_feature][value] = id3(subset_X[:, list(range(best_feature_index)) + list(range(best_feature_index + 1, X.shape[1]))],
                                           subset_y,
                                           new_features,
                                           depth + 1)

    return tree

# Example usage
data = {'Outlook': ['Sunny', 'Sunny', 'Overcast', 'Rain', 'Rain', 'Rain', 'Overcast', 'Sunny', 'Sunny', 'Rain', 'Sunny', 'Overcast', 'Overcast', 'Rain'],
        'Temperature': ['Hot', 'Hot', 'Hot', 'Mild', 'Cool', 'Cool', 'Cool', 'Mild', 'Cool', 'Mild', 'Mild', 'Mild', 'Hot', 'Mild'],
        'Humidity': ['High', 'High', 'High', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'Normal', 'Normal', 'High', 'Normal', 'High'],
        'Windy': [False, True, False, False, False, True, True, False, False, False, True, True, False, True],
        'PlayTennis': ['No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No']}

df = pd.DataFrame(data)
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values
features = df.columns[:-1].tolist()

# Build decision tree
decision_tree = id3(X, y, features)
print("Decision Tree:\n", decision_tree)


Decision Tree:
 {'Outlook': {'Overcast': 'Yes', 'Rain': {'Windy': {False: 'Yes', True: 'No'}}, 'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}}}
