# 🌳 Decision Trees & Random Forest Example

This notebook demonstrates:
- Gini & Entropy metrics
- Overfitting
- Building & Visualizing Decision Tree
- Building a Random Forest Classifier

In [None]:
!pip install graphviz scikit-learn matplotlib seaborn

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

In [None]:
# Load Iris Dataset
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name='species')

In [None]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Decision Tree (Gini)
dtree = DecisionTreeClassifier(criterion='gini', max_depth=3, random_state=42)
dtree.fit(X_train, y_train)

plt.figure(figsize=(12,8))
plot_tree(dtree, feature_names=data.feature_names, class_names=data.target_names, filled=True)
plt.title("Decision Tree (Gini Index, Depth=3)")
plt.show()

In [None]:
# Evaluate Decision Tree
y_pred_dt = dtree.predict(X_test)
print("Decision Tree Accuracy:", accuracy_score(y_test, y_pred_dt))
print(classification_report(y_test, y_pred_dt, target_names=data.target_names))

In [None]:
# Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))
print(classification_report(y_test, y_pred_rf, target_names=data.target_names))

In [None]:
# Gini vs Entropy Example
def gini(p):
    return 1 - sum([i**2 for i in p])

def entropy(p):
    return -sum([i*np.log2(i) for i in p if i > 0])

probs = [0.5, 0.5]
print("Gini:", gini(probs))
print("Entropy:", entropy(probs))

In [None]:
# Overfitting Demo
dtree_overfit = DecisionTreeClassifier(criterion='entropy', max_depth=None)
dtree_overfit.fit(X_train, y_train)

print("Overfit Tree Train Accuracy:", dtree_overfit.score(X_train, y_train))
print("Overfit Tree Test Accuracy:", dtree_overfit.score(X_test, y_test))