In [1]:
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
import pandas as pd

In [2]:
x, y = make_classification(n_samples=1000, n_classes=2, weights=[0.7, 0.3], random_state=42)

x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42
)

**Random Forest (bagging with decision trees)**

In [3]:
rf = RandomForestClassifier(
    n_estimators=100,
    max_depth=10,
    min_samples_split=5,
    min_samples_leaf=2,
    max_features='sqrt',
    bootstrap=True,
    random_state=42
)
rf.fit(x_train, y_train)

y_pred = rf.predict(x_test)
print(f"Random Forest Accuracy: {accuracy_score(y_test, y_pred):.3f}")

Random Forest Accuracy: 0.880


In [9]:
# Feature importance
importance = pd.DataFrame({
    'feature': [f"feature_{i}" for i in range(x_train.shape[1])],
    'importance': rf.feature_importances_
}).sort_values('importance', ascending=False)
print("\nFeature Importance:")
print(importance)


Feature Importance:
       feature  importance
5    feature_5    0.356490
18  feature_18    0.218953
1    feature_1    0.113235
14  feature_14    0.080738
6    feature_6    0.019518
11  feature_11    0.018939
7    feature_7    0.017605
3    feature_3    0.017319
10  feature_10    0.016091
16  feature_16    0.015471
15  feature_15    0.014006
2    feature_2    0.013861
8    feature_8    0.013257
0    feature_0    0.013138
12  feature_12    0.012983
19  feature_19    0.012618
13  feature_13    0.011917
4    feature_4    0.011848
9    feature_9    0.011848
17  feature_17    0.010166


**Tuned Random Forest**

In [14]:
rf_tuned = RandomForestClassifier(
    n_estimators=200,           # More trees = better, but slower
    max_depth=15,               # Control tree depth
    min_samples_split=10,       # Prevent overfitting
    min_samples_leaf=4,         # Minimum samples in leaf
    max_features='sqrt',        # 'sqrt', 'log2', or number
    max_samples=0.8,            # Bootstrap sample size
    bootstrap=True,
    oob_score=True,             # Calculate out-of-bag score
    random_state=42
)
rf_tuned.fit(x_train, y_train)
print(f"Tuned RF Accuracy: {accuracy_score(y_test, rf_tuned.predict(x_test)):.3f}")

Tuned RF Accuracy: 0.865
