In [2]:
# Import necessary libraries
import xgboost as xgb
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt


In [3]:
# Load a sample dataset (e.g., Breast Cancer dataset)
data = load_breast_cancer()
X = data.data
y = data.target

# Split the dataset into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)


In [4]:
# Initialize the XGBClassifier
xgb_clf = xgb.XGBClassifier(
    objective='binary:logistic',  # For binary classification
    eval_metric='logloss',        # Evaluation metric
    use_label_encoder=False       # Disable label encoder warning
)

# Train the model using early stopping (with 10 rounds without improvement)
evals = [(X_train, y_train), (X_val, y_val)]  # Define validation set
xgb_clf.fit(X_train, y_train, 
            eval_set=evals, 
            early_stopping_rounds=10, 
            verbose=True)


TypeError: XGBClassifier.fit() got an unexpected keyword argument 'early_stopping_rounds'

In [None]:
# Plot training vs validation error
results = xgb_clf.evals_result()

# Extract training and validation error
epochs = len(results['validation_0']['logloss'])
x_axis = range(0, epochs)

# Plot log loss for training and validation
plt.figure(figsize=(10, 6))
plt.plot(x_axis, results['validation_0']['logloss'], label='Train')
plt.plot(x_axis, results['validation_1']['logloss'], label='Validation')
plt.xlabel('Iterations')
plt.ylabel('Log Loss')
plt.title('Training vs Validation Log Loss')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
# Evaluate the model on the validation set
y_pred = xgb_clf.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {accuracy:.4f}")
