# 🧠 AdaBoost do Zero
Neste notebook, vamos implementar o algoritmo **AdaBoost** com base em árvores de decisão rasas (stumps), do zero.

Etapas:
- Inicialização dos pesos
- Treinamento sequencial de stumps
- Atualização de pesos
- Cálculo de alphas (pesos dos modelos)
- Votação final ponderada

In [15]:
# Dataset: Diabetes do OpenML
from sklearn.datasets import fetch_openml
import pandas as pd
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix

data = fetch_openml(name='diabetes', version=1, as_frame=True)
df = data.frame.copy()
X = df.drop(columns='class')
X = pd.get_dummies(X, drop_first=True)  # evita multicolinearidade
y = df['class'].map({'tested_negative': 0, 'tested_positive': 1}).astype(float)

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.3, random_state=42)

In [16]:
# Função AdaBoost do zero
def adaboost_train(X, y, n_estimators=10):
    n = len(y)
    w = np.ones(n) / n  # pesos iniciais
    models = []
    alphas = []

    for m in range(n_estimators):
        stump = DecisionTreeClassifier(max_depth=1)
        stump.fit(X, y, sample_weight=w)
        pred = stump.predict(X)

        err = np.sum(w * (pred != y)) / np.sum(w)
        alpha = 0.5 * np.log((1 - err) / (err + 1e-10))

        # Atualizar pesos
        w *= np.exp(-alpha * y * pred)
        w /= np.sum(w)

        models.append(stump)
        alphas.append(alpha)

    return models, alphas

In [17]:
# Previsão com AdaBoost
def adaboost_predict(X, models, alphas):
    final_pred = np.zeros(len(X))
    for model, alpha in zip(models, alphas):
        final_pred += alpha * model.predict(X)
    return np.sign(final_pred)

In [18]:
# Treinar AdaBoost manual
models, alphas = adaboost_train(X_train, y_train, n_estimators=20)

# Avaliação
y_pred = adaboost_predict(X_test, models, alphas)

print("Acurácia:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

Acurácia: 0.6926406926406926
[[118  32]
 [ 39  42]]
              precision    recall  f1-score   support

         0.0       0.75      0.79      0.77       150
         1.0       0.57      0.52      0.54        81

    accuracy                           0.69       231
   macro avg       0.66      0.65      0.66       231
weighted avg       0.69      0.69      0.69       231



In [20]:
from sklearn.ensemble import AdaBoostClassifier

# Modelo com os mesmos parâmetros do manual: base_estimator com max_depth=1
base_stump = DecisionTreeClassifier(random_state=42)
ada_sklearn = AdaBoostClassifier(estimator=base_stump, n_estimators=20, random_state=42)

# Treinar
ada_sklearn.fit(X_train, y_train)

# Prever
y_pred_sklearn = ada_sklearn.predict(X_test)

# Avaliar
print("AdaBoost (sklearn):")
print("Acurácia:", accuracy_score(y_test, y_pred_sklearn))
print(confusion_matrix(y_test, y_pred_sklearn))
print(classification_report(y_test, y_pred_sklearn))

AdaBoost (sklearn):
Acurácia: 0.7316017316017316
[[124  26]
 [ 36  45]]
              precision    recall  f1-score   support

         0.0       0.78      0.83      0.80       150
         1.0       0.63      0.56      0.59        81

    accuracy                           0.73       231
   macro avg       0.70      0.69      0.70       231
weighted avg       0.73      0.73      0.73       231

