In [10]:
import numpy as np
import pandas as pd
import math

# Define the dataset
data = {
    'age': ['<=30', '<=30', '31...40', '>40', '>40', '>40', '31...40', '<=30', '<=30', '>40', '<=30', '31...40', '31...40', '>40'],
    'income': ['high', 'high', 'high', 'medium', 'low', 'low', 'low', 'medium', 'low', 'medium', 'medium', 'medium', 'high', 'medium'],
    'student': ['no', 'no', 'no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no'],
    'credit_rating': ['fair', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'fair', 'fair', 'excellent', 'excellent', 'fair', 'excellent'],
    'buys_computer': ['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
}

# Create a DataFrame
df = pd.DataFrame(data)

# Calculate the entropy of the target variable
def entropy(data):
    total_samples = len(data)
    if total_samples == 0:
        return 0

    unique_labels = data['buys_computer'].unique()
    entropy_value = 0

    for label in unique_labels:
        label_count = len(data[data['buys_computer'] == label])
        probability = label_count / total_samples
        entropy_value -= probability * math.log2(probability)

    return entropy_value

entropy_root = entropy(df)

# Calculate the conditional entropy and information gain for each feature
features = ['age', 'income', 'student', 'credit_rating']
information_gains = {}

for feature in features:
    weighted_entropy = 0

    for value in df[feature].unique():
        subset = df[df[feature] == value]
        subset_entropy = entropy(subset)
        weight = len(subset) / len(df)
        weighted_entropy += weight * subset_entropy

    information_gain = entropy_root - weighted_entropy
    information_gains[feature] = information_gain

# Select the feature with the highest information gain as the root node
root_node = max(information_gains, key=information_gains.get)

print("Information Gains:")
for feature, gain in information_gains.items():
    print(f"{feature}: {gain}")

print(f"\nThe root node for the decision tree is '{root_node}' with an information gain of {information_gains[root_node]}")


Information Gains:
age: 0.24674981977443933
income: 0.02922256565895487
student: 0.15183550136234159
credit_rating: 0.04812703040826949

The root node for the decision tree is 'age' with an information gain of 0.24674981977443933
