In [1]:
# Import necessary libraries
from collections import Counter

# Function to calculate Gini Index
def gini_index(values):
    total_count = len(values)
    value_counts = Counter(values)
    prob_dist = [count / total_count for count in value_counts.values()]
    return 1 - sum(p ** 2 for p in prob_dist)

# Example dataset
# Feature: Gender (0 = Female, 1 = Male)
# Target: Purchased (0 = No, 1 = Yes)
data = [
    {'Gender': 1, 'Purchased': 1},
    {'Gender': 1, 'Purchased': 1},
    {'Gender': 1, 'Purchased': 0},
    {'Gender': 0, 'Purchased': 0},
    {'Gender': 0, 'Purchased': 1},
    {'Gender': 0, 'Purchased': 0},
    {'Gender': 1, 'Purchased': 0},
    {'Gender': 0, 'Purchased': 1},
    {'Gender': 1, 'Purchased': 1},
]

# Separate the data based on the feature (Gender)
parent = [row['Purchased'] for row in data]
left_child = [row['Purchased'] for row in data if row['Gender'] == 0]  # Female
right_child = [row['Purchased'] for row in data if row['Gender'] == 1]  # Male

# Calculate Gini Index for the parent and children
parent_gini = gini_index(parent)
left_child_gini = gini_index(left_child)
right_child_gini = gini_index(right_child)

# Weighted Gini Index for the split
n = len(parent)
n_left = len(left_child)
n_right = len(right_child)
weighted_gini = (n_left / n) * left_child_gini + (n_right / n) * right_child_gini

# Print the results
print(f"Gini Index of the parent: {parent_gini:.4f}")
print(f"Gini Index of the left child (Female): {left_child_gini:.4f}")
print(f"Gini Index of the right child (Male): {right_child_gini:.4f}")
print(f"Weighted Gini Index after the split: {weighted_gini:.4f}")

Gini Index of the parent: 0.4938
Gini Index of the left child (Female): 0.5000
Gini Index of the right child (Male): 0.4800
Weighted Gini Index after the split: 0.4889
