In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from collections import Counter
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

# Load Iris dataset
iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target)

# Function to calculate entropy
def entropy(y):
    counts = np.bincount(y)
    probabilities = counts / len(y)
    return -np.sum([p * np.log2(p) for p in probabilities if p > 0])

# Function to calculate information gain
def information_gain(X_column, y):
    # Parent entropy
    parent_entropy = entropy(y)

    # Calculate the weighted entropy of each child (split by unique values)
    values, counts = np.unique(X_column, return_counts=True)
    weighted_entropy = np.sum([(counts[i] / np.sum(counts)) * entropy(y[X_column == v]) for i, v in enumerate(values)])
    
    # Information gain is the difference between parent entropy and the weighted child entropy
    return parent_entropy - weighted_entropy

# Calculate the entropy of the entire dataset (root entropy)
root_entropy = entropy(y)
print(f"Entropy of the entire dataset: {root_entropy:.4f}")

# Calculate information gain for each feature
for feature in X.columns:
    gain = information_gain(X[feature], y)
    print(f"Information Gain for {feature}: {gain:.4f}")

# Compare with scikit-learn's DecisionTreeClassifier
clf = DecisionTreeClassifier(criterion='entropy', random_state=42)
clf.fit(X, y)

# Get the feature chosen as the root node by scikit-learn
tree_root_feature = iris.feature_names[clf.tree_.feature[0]]
print(f"\nRoot node feature selected by scikit-learn: {tree_root_feature}")

# Visualize the decision tree
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 8))
tree.plot_tree(clf, feature_names=iris.feature_names, class_names=iris.target_names, filled=True)
plt.show()
