# Quick XGBoost Demo
This demo use Wisconsin breast cancer diagnostic to buld XGBoost modles,
with hyperparameter selected by `GridSearchCV`.

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import roc_auc_score
from sklearn.pipeline import Pipeline
from xgboost import XGBClassifier
import pandas as pd

## Load and Split Data

In [None]:
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

## Model Pipeline and Grid Search

In [None]:
xgb_model = Pipeline([
    ("model", XGBClassifier(
        objective="binary:logistic",
        eval_metric="auc",
        random_state=42
    ))
])

param_grid = {
    "model__max_depth": [1, 2, 3],
    "model__n_estimators": [2, 3, 4, 5],
}
gs = GridSearchCV(
    estimator=xgb_model,
    param_grid=param_grid,
    scoring="roc_auc",
    cv=5,
    n_jobs=-1,
    verbose=1
)

## Fit and Evalueate Model
Gredi search for best hyperparameter and fit model.
Evaluate model performance with AUC

In [None]:
gs.fit(X_train, y_train)
print(f"Best parameters:{gs.best_params_}, best cross-val AUC {gs.best_score_}")

best_model = gs.best_estimator_
proba = best_model.predict_proba(X_test)[:, 1]
auc = roc_auc_score(y_test, proba)
print(f"Test AUC: {auc:.3f}")

Fitting 5 folds for each of 12 candidates, totalling 60 fits
Best parameters:{'model__max_depth': 3, 'model__n_estimators': 5}, best cross-val AUC 0.9795665634674924
Best cross-val AUC: 0.9795665634674924
Test AUC: 0.985
