In [1]:
# Author: Roi Yehoshua <roiyeho@gmail.com>
# July 2024
# License: MIT

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [3]:
from xgboost import XGBClassifier

clf = XGBClassifier(random_state=42)
clf.fit(X_train, y_train)

In [4]:
print(f'Train accuracy: {clf.score(X_train, y_train):.4f}')
print(f'Test accuracy: {clf.score(X_test, y_test):.4f}')

Train accuracy: 1.0000
Test accuracy: 0.9650


Early Stopping

In [5]:
# Split the training data into 90% training and 10% validation 
X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, test_size=0.1, random_state=42
)

# Initialize the classifier with early stopping
clf = XGBClassifier(early_stopping_rounds=5, random_state=42)

# Train the model with a validation set
clf.fit(X_train, y_train, eval_set=[(X_val, y_val)])

[0]	validation_0-logloss:0.42570
[1]	validation_0-logloss:0.32638
[2]	validation_0-logloss:0.27454
[3]	validation_0-logloss:0.23944
[4]	validation_0-logloss:0.21755
[5]	validation_0-logloss:0.20466
[6]	validation_0-logloss:0.18016
[7]	validation_0-logloss:0.17542
[8]	validation_0-logloss:0.17582
[9]	validation_0-logloss:0.16546
[10]	validation_0-logloss:0.16362
[11]	validation_0-logloss:0.15749
[12]	validation_0-logloss:0.14579
[13]	validation_0-logloss:0.14345
[14]	validation_0-logloss:0.13468
[15]	validation_0-logloss:0.13026
[16]	validation_0-logloss:0.12127
[17]	validation_0-logloss:0.12079
[18]	validation_0-logloss:0.11741
[19]	validation_0-logloss:0.11326
[20]	validation_0-logloss:0.11209
[21]	validation_0-logloss:0.11141
[22]	validation_0-logloss:0.10687
[23]	validation_0-logloss:0.10298
[24]	validation_0-logloss:0.10520
[25]	validation_0-logloss:0.10359
[26]	validation_0-logloss:0.09914
[27]	validation_0-logloss:0.10062
[28]	validation_0-logloss:0.10339
[29]	validation_0-loglos

In [6]:
print(f'Train accuracy: {clf.score(X_train, y_train):.4f}')
print(f'Test accuracy: {clf.score(X_test, y_test):.4f}')

Train accuracy: 1.0000
Test accuracy: 0.9650


In [7]:
print('Best iteration:', clf.best_iteration)

Best iteration: 26
