In [None]:
import joblib
import pandas as pd
import numpy as np
from xgboost import XGBClassifier
from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold
from sklearn.metrics import roc_auc_score, classification_report, confusion_matrix

In [None]:
X_train, y_train, X_val, y_val = joblib.load('dados_processados_dev.joblib')

In [None]:
counter = y_train.value_counts()
estimate = counter[0] / counter[1]

In [None]:
xgb = XGBClassifier(
    scale_pos_weight=estimate,
    use_label_encoder=False,
    eval_metric='auc',
    random_state=42
)

In [None]:
param_dist = {
    'n_estimators': [100, 300, 500],
    'max_depth': [3, 6, 10],
    'learning_rate': [0.01, 0.05, 0.1],
    'subsample': [0.7, 0.8, 0.9],
    'colsample_bytree': [0.7, 0.8, 0.9]
}

In [None]:
cv_strategy = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [None]:
random_search_xgb = RandomizedSearchCV(
    estimator=xgb,
    param_distributions=param_dist,
    n_iter=10, # Testa 10 combinações aleatórias
    scoring='roc_auc',
    cv=cv_strategy,
    verbose=2,
    n_jobs=-1,
    random_state=42
)

In [None]:
random_search_xgb.fit(X_train, y_train)

Fitting 5 folds for each of 10 candidates, totalling 50 fits


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [None]:
best_xgb = random_search_xgb.best_estimator_
y_val_probs = best_xgb.predict_proba(X_val)[:, 1]
y_val_pred = best_xgb.predict(X_val)

In [None]:
print(f"\nMelhores Parâmetros: {random_search_xgb.best_params_}")
print(f"ROC-AUC de Validação: {roc_auc_score(y_val, y_val_probs):.4f}")


Melhores Parâmetros: {'subsample': 0.7, 'n_estimators': 100, 'max_depth': 6, 'learning_rate': 0.01, 'colsample_bytree': 0.7}
ROC-AUC de Validação: 0.9625
