In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer


In [2]:
X, y = load_breast_cancer(return_X_y=True)


In [3]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [5]:
model = RandomForestClassifier(
    n_estimators=100,
    max_depth=None,
    random_state=42
)
model.fit(X_train, y_train)
preds = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, preds))


Accuracy: 0.9649122807017544


In [20]:
model = RandomForestClassifier(
    n_estimators=5,
    max_depth=None,
    random_state=42
)
model.fit(X_train, y_train)
preds = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, preds))

Accuracy: 0.9473684210526315


In [22]:
model = RandomForestClassifier(
    n_estimators=200,
    max_depth=10,
    random_state=42
)
model.fit(X_train, y_train)
preds = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, preds))

Accuracy: 0.9649122807017544


In [24]:
import pandas as pd

feature_importance = pd.Series(
    model.feature_importances_,
    index=load_breast_cancer().feature_names
).sort_values(ascending=False)

print(feature_importance)

worst area                 0.128510
worst concave points       0.128365
worst perimeter            0.127080
mean concave points        0.119750
worst radius               0.069274
mean concavity             0.055773
mean radius                0.050440
mean area                  0.042591
worst concavity            0.040713
mean perimeter             0.038352
area error                 0.023449
worst texture              0.020966
worst compactness          0.017708
radius error               0.016454
mean compactness           0.014870
mean texture               0.014745
worst smoothness           0.013092
concavity error            0.009888
worst symmetry             0.009794
mean smoothness            0.008586
perimeter error            0.007893
fractal dimension error    0.006160
worst fractal dimension    0.005221
compactness error          0.005046
symmetry error             0.004703
mean fractal dimension     0.004557
smoothness error           0.004261
texture error              0

Interview-Ready Explanations ⭐
❓ Why Random Forest works well here?

➡ Combines multiple trees to reduce overfitting and handle non-linear feature interactions.

❓ Effect of n_estimators?

➡ More trees improve stability but increase computation time.

❓ Effect of max_depth?

➡ Controls overfitting by limiting tree complexity.

❓ How does Random Forest compute feature importance?

➡ Based on average decrease in impurity across all trees.