In [11]:
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils.validation import check_X_y, check_array, check_is_fitted

class SimpleBoostingClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, base_models, num_rounds=50, learning_rate=0.1):
        self.base_models = base_models
        self.num_rounds = num_rounds
        self.learning_rate = learning_rate
        self.models = []
        self.alphas = []

    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.classes_ = np.unique(y)
        
        # Inicializar pesos
        weights = np.ones(len(y)) / len(y)
        
        for _ in range(self.num_rounds):
            errors = np.zeros(len(self.base_models))
            predictions = np.zeros((len(self.base_models), len(y)))
            
            # Entrenar modelos base y calcular errores
            for i, model in enumerate(self.base_models):
                model.fit(X, y, sample_weight=weights)
                predictions[i] = model.predict(X)
                errors[i] = np.sum(weights * (predictions[i] != y))
            
            # Seleccionar el mejor modelo
            best_model_index = np.argmin(errors)
            best_model = self.base_models[best_model_index]
            best_pred = predictions[best_model_index]
            
            # Calcular alpha
            error = errors[best_model_index]
            alpha = self.learning_rate * (np.log((1 - error) / error) + np.log(len(self.classes_) - 1))
            
            # Actualizar pesos
            weights *= np.exp(alpha * (best_pred != y))
            weights /= np.sum(weights)
            
            # Guardar modelo y alpha
            self.models.append(best_model)
            self.alphas.append(alpha)
        
        # Convertir alphas a un array de numpy
        self.alphas = np.array(self.alphas)
        
        return self

    def predict(self, X):
        check_is_fitted(self)
        X = check_array(X)
        
        predictions = np.zeros((len(self.models), X.shape[0]))
        for i, model in enumerate(self.models):
            predictions[i] = model.predict(X)
        
        weighted_preds = np.sum(self.alphas[:, np.newaxis] * predictions, axis=0)
        
        # Si solo hay dos clases, usamos un umbral de 0
        if len(self.classes_) == 2:
            return self.classes_[(weighted_preds > 0).astype(int)]

In [12]:
from sklearn.model_selection import cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
import numpy as np

# Crear datos de ejemplo
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

# Definir los números de rondas a probar
round_numbers = [10, 50, 100, 200, 500]

# Almacenar los resultados
mean_scores = []

for num_rounds in round_numbers:
    # Crear modelos base
    base_models = [DecisionTreeClassifier(max_depth=1) for _ in range(10)]
    
    # Crear el modelo de boosting
    boosting_model = SimpleBoostingClassifier(base_models, num_rounds=num_rounds, learning_rate=0.1)
    
    # Realizar validación cruzada
    scores = cross_val_score(boosting_model, X, y, cv=5)
    
    # Almacenar la puntuación media
    mean_scores.append(np.mean(scores))

# Encontrar el mejor número de rondas
best_rounds = round_numbers[np.argmax(mean_scores)]

print("Resultados:")
for rounds, score in zip(round_numbers, mean_scores):
    print(f"Rondas: {rounds}, Puntuación media: {score:.4f}")
print(f"\nMejor número de rondas: {best_rounds}")