In [None]:
!pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-25.7.0-py3-none-any.whl.metadata (12 kB)
Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-25.7.0-py3-none-any.whl (26 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-25.7.0 scikit-optimize-0.10.2


In [None]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from skopt import gp_minimize
from skopt.space import Integer, Real, Categorical
from skopt.utils import use_named_args

# Dataset
X, y = load_breast_cancer(return_X_y=True)

# Espaço de busca
space = [
    # RandomForest
    Integer(50, 200, name="n_estimators_rf"),
    Integer(2, 15, name="max_depth_rf"),

    # SVM
    Real(0.01, 10.0, name="C_svm"),
    Categorical(["linear", "rbf"], name="kernel_svm"),

    # XGBoost
    Integer(50, 200, name="n_estimators_xgb"),
    Real(0.01, 0.3, name="learning_rate_xgb"),
    Integer(2, 10, name="max_depth_xgb"),

    # Voting type
    Categorical(["soft", "hard"], name="voting_type")
]

# Função objetivo
@use_named_args(space)
def objective(**params):
    rf = RandomForestClassifier(
        n_estimators=params["n_estimators_rf"],
        max_depth=params["max_depth_rf"],
        random_state=42
    )

    svc = SVC(
        C=params["C_svm"],
        kernel=params["kernel_svm"],
        probability=True,  # necessário para voting="soft"
        random_state=42
    )

    xgb = XGBClassifier(
        n_estimators=params["n_estimators_xgb"],
        learning_rate=params["learning_rate_xgb"],
        max_depth=params["max_depth_xgb"],
        eval_metric="logloss",
        random_state=42
    )

    # Meta-modelo: Logistic Regression
    meta_lr = LogisticRegression(max_iter=1000, random_state=42)

    # Stacking (RF + SVM como base, LogisticRegression como meta-modelo)
    stacking = StackingClassifier(
        estimators=[("rf", rf), ("svc", svc),("xgb", xgb)],
        final_estimator=meta_lr,
        passthrough=True
    )

    # Voting (RF + SVM + XGB)
    voting = VotingClassifier(
        estimators=[("rf", rf), ("svc", svc), ("xgb", xgb)],
        voting=params["voting_type"]
    )

    # Avaliar ambos
    scores_stack = cross_val_score(stacking, X, y, cv=5, scoring="accuracy")
    scores_vote = cross_val_score(voting, X, y, cv=5, scoring="accuracy")

    # Combinação (50% stacking, 50% voting)
    return -(0.5 * np.mean(scores_stack) + 0.5 * np.mean(scores_vote))

# Rodar otimização
res = gp_minimize(objective, space, n_calls=40, random_state=42)

print("Melhor score combinado:", -res.fun)
print("Melhores hiperparâmetros:", res.x)
print("Nomeados:", dict(zip([dim.name for dim in space], res.x)))


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bs