In [None]:
from sklearn.datasets import make_classification
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
x, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
gb = GradientBoostingClassifier(
    n_estimators=1000,
    learning_rate=0.1,
    validation_fraction=0.2,
    n_iter_no_change=10, # Stop if no improvement for 10 iterations
    tol=0.0001,  # Minimum improvement threshold
    random_state=42
)
gb.fit(x_train, y_train)

print(f"Number of estimators used: {gb.n_estimators_}")
print(f"Best validation score: {gb.train_score_[-1]:.3f}")

Number of estimators used: 46
Best validation score: 0.257


**Manual Early Stopping**

In [6]:
best_score = 0
best_model = None
patience = 10
no_improve = 0

for n_est in range(50, 1000, 50):
    model = GradientBoostingClassifier(
        n_estimators=n_est,
        random_state=42
    )
    model.fit(x_train, y_train)
    val_score = model.score(x_test, y_test)
    
    if val_score > best_score:
        best_score = val_score
        best_model = model
        no_improve = 0
    else:
        no_improve += 1
    
    if no_improve >= patience:
        print(f"Early stopping at {n_est} estimators")
        break
    
print(f"Best validation score: {best_score:.3f}")

Early stopping at 600 estimators
Best validation score: 0.910
