In [1]:
import numpy as np
import pandas as pd

# Function to calculate entropy
def calculate_entropy(y):
    values, counts = np.unique(y, return_counts=True)
    probabilities = counts / len(y)
    return -np.sum(probabilities * np.log2(probabilities))

# Function to calculate information gain for a specific feature
def compute_info_gain(X, y, feature_index):
    unique_values = np.unique(X[:, feature_index])
    weighted_entropy = 0
    for value in unique_values:
        subset_y = y[X[:, feature_index] == value]
        weighted_entropy += (len(subset_y) / len(y)) * calculate_entropy(subset_y)
    return calculate_entropy(y) - weighted_entropy

# Function to find the best feature to split on
def find_best_feature(X, y):
    information_gains = [compute_info_gain(X, y, i) for i in range(X.shape[1])]
    return np.argmax(information_gains)

# Recursive function to build the decision tree
def build_decision_tree(X, y):
    if len(np.unique(y)) == 1:
        return np.unique(y)[0]
    if X.shape[1] == 0:
        return np.bincount(y).argmax()

    best_feature_index = find_best_feature(X, y)
    decision_tree_structure = {best_feature_index: {}}

    for value in np.unique(X[:, best_feature_index]):
        sub_X = X[X[:, best_feature_index] == value]
        sub_y = y[X[:, best_feature_index] == value]
        decision_tree_structure[best_feature_index][value] = build_decision_tree(np.delete(sub_X, best_feature_index, axis=1), sub_y)

    return decision_tree_structure

# Load the dataset
data = pd.read_csv('/content/sample_data/play_tennis.csv')
features = data.iloc[:, :-1].values
target = data.iloc[:, -1].values

# Build the decision tree
constructed_tree = build_decision_tree(features, target)

# Print the constructed decision tree
print("\n--- Constructed Decision Tree ---")
print(constructed_tree)



--- Constructed Decision Tree ---
{0: {'D1': 'No', 'D10': 'Yes', 'D11': 'Yes', 'D12': 'Yes', 'D13': 'Yes', 'D14': 'No', 'D2': 'No', 'D3': 'Yes', 'D4': 'Yes', 'D5': 'Yes', 'D6': 'No', 'D7': 'Yes', 'D8': 'No', 'D9': 'Yes'}}
