In [3]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Load the iris dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Single Decision Tree (for comparison)
dt = DecisionTreeClassifier(max_depth=3, random_state=42)
dt.fit(X_train, y_train)
dt_train_pred = dt.predict(X_train)
dt_test_pred = dt.predict(X_test)

# Random Forest
rf = RandomForestClassifier(n_estimators=10, max_depth=4, random_state=42)  # 10 trees
""" 
n_estimators = Number of Decision Tress to build
max_depth = max number of nodes
 """
rf.fit(X_train, y_train)
rf_train_pred = rf.predict(X_train)
rf_test_pred = rf.predict(X_test)

# Accuracy
dt_train_acc = accuracy_score(y_train, dt_train_pred)
dt_test_acc = accuracy_score(y_test, dt_test_pred)
rf_train_acc = accuracy_score(y_train, rf_train_pred)
rf_test_acc = accuracy_score(y_test, rf_test_pred)

# Print results
print("Single Decision Tree:")
print(f"Training Accuracy: {dt_train_acc:.4f}")
print(f"Test Accuracy: {dt_test_acc:.4f}\n")

print("Random Forest (10 trees):")
print(f"Training Accuracy: {rf_train_acc:.4f}")
print(f"Test Accuracy: {rf_test_acc:.4f}")

# Feature importance from Random Forest
print("\nFeature Importances:")
for name, importance in zip(data.feature_names, rf.feature_importances_):
    print(f"{name}: {importance:.4f}")

Single Decision Tree:
Training Accuracy: 0.9780
Test Accuracy: 0.9474

Random Forest (10 trees):
Training Accuracy: 0.9890
Test Accuracy: 0.9561

Feature Importances:
mean radius: 0.0649
mean texture: 0.0050
mean perimeter: 0.0602
mean area: 0.0159
mean smoothness: 0.0047
mean compactness: 0.0040
mean concavity: 0.0061
mean concave points: 0.1276
mean symmetry: 0.0000
mean fractal dimension: 0.0041
radius error: 0.0657
texture error: 0.0000
perimeter error: 0.0016
area error: 0.0677
smoothness error: 0.0063
compactness error: 0.0010
concavity error: 0.0122
concave points error: 0.0015
symmetry error: 0.0001
fractal dimension error: 0.0044
worst radius: 0.0987
worst texture: 0.0217
worst perimeter: 0.0272
worst area: 0.1052
worst smoothness: 0.0083
worst compactness: 0.0364
worst concavity: 0.0227
worst concave points: 0.2222
worst symmetry: 0.0036
worst fractal dimension: 0.0010
