In [1]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
data = load_iris()
X, y = data.data, data.target

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Single Decision Tree (for comparison)
dt = DecisionTreeClassifier(max_depth=3, random_state=42)
dt.fit(X_train, y_train)
dt_train_pred = dt.predict(X_train)
dt_test_pred = dt.predict(X_test)

# Random Forest
rf = RandomForestClassifier(n_estimators=10, max_depth=4, random_state=42)  # 100 trees
""" 
n_estimators = Number of Decision Tress to build
max_depth = max number of nodes
 """
rf.fit(X_train, y_train)
rf_train_pred = rf.predict(X_train)
rf_test_pred = rf.predict(X_test)

# Accuracy
dt_train_acc = accuracy_score(y_train, dt_train_pred)
dt_test_acc = accuracy_score(y_test, dt_test_pred)
rf_train_acc = accuracy_score(y_train, rf_train_pred)
rf_test_acc = accuracy_score(y_test, rf_test_pred)

# Print results
print("Single Decision Tree:")
print(f"Training Accuracy: {dt_train_acc:.4f}")
print(f"Test Accuracy: {dt_test_acc:.4f}\n")

print("Random Forest (10 trees):")
print(f"Training Accuracy: {rf_train_acc:.4f}")
print(f"Test Accuracy: {rf_test_acc:.4f}")

# Feature importance from Random Forest
print("\nFeature Importances:")
for name, importance in zip(data.feature_names, rf.feature_importances_):
    print(f"{name}: {importance:.4f}")

Single Decision Tree:
Training Accuracy: 0.9583
Test Accuracy: 1.0000

Random Forest (10 trees):
Training Accuracy: 0.9833
Test Accuracy: 1.0000

Feature Importances:
sepal length (cm): 0.1434
sepal width (cm): 0.0230
petal length (cm): 0.4207
petal width (cm): 0.4129
