In [1]:
# Import necessary libraries
import numpy as np
from collections import Counter
from math import log2

# Function to calculate entropy
def calculate_entropy(values):
    total_count = len(values)
    value_counts = Counter(values)
    prob_dist = [count / total_count for count in value_counts.values()]
    return -sum(p * log2(p) for p in prob_dist if p > 0)

# Function to calculate information gain
def information_gain(parent, left_child, right_child):
    # Calculate the entropy of the parent node
    parent_entropy = calculate_entropy(parent)
    
    # Calculate the weighted entropy of the children
    n = len(parent)
    n_left = len(left_child)
    n_right = len(right_child)
    weighted_entropy = (n_left / n) * calculate_entropy(left_child) + (n_right / n) * calculate_entropy(right_child)
    
    # Information gain is the difference between the parent entropy and the weighted child entropy
    return parent_entropy - weighted_entropy

# Example dataset
# Feature: Weather (0 = Rainy, 1 = Sunny)
# Target: Play Outside? (0 = No, 1 = Yes)
data = [
    {'Weather': 1, 'Play Outside?': 1},
    {'Weather': 1, 'Play Outside?': 1},
    {'Weather': 0, 'Play Outside?': 0},
    {'Weather': 0, 'Play Outside?': 0},
    {'Weather': 1, 'Play Outside?': 1},
]

# Separate the data based on the feature (Weather)
parent = [row['Play Outside?'] for row in data]
left_child = [row['Play Outside?'] for row in data if row['Weather'] == 0]  # Rainy
right_child = [row['Play Outside?'] for row in data if row['Weather'] == 1]  # Sunny

# Calculate information gain from splitting on the "Weather" feature
info_gain = information_gain(parent, left_child, right_child)

# Print the result
print(f"Information Gain from splitting on 'Weather': {info_gain:.4f} bits")

Information Gain from splitting on 'Weather': 0.9710 bits
