
# Capítulo 6 — Avaliação de Modelos
**Objetivo:** comparar estratégias de validação e inspecionar *bias/variance*.
- Holdout vs. Cross-Validation
- *Learning curves* (curvas de aprendizado)
- *Calibration curve* (para probabilidades)


In [None]:

import numpy as np, matplotlib.pyplot as plt
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import learning_curve, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.calibration import CalibrationDisplay
from sklearn.model_selection import train_test_split

ds = load_breast_cancer()
X, y = ds.data, ds.target

pipe = Pipeline([("scaler", StandardScaler()), ("clf", LogisticRegression(max_iter=1000))])
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

train_sizes, train_scores, test_scores = learning_curve(
    pipe, X, y, cv=cv, scoring="accuracy", train_sizes=np.linspace(0.1, 1.0, 5), n_jobs=-1
)
plt.figure()
plt.plot(train_sizes, train_scores.mean(axis=1), marker="o", label="Treino")
plt.plot(train_sizes, test_scores.mean(axis=1), marker="s", label="Validação")
plt.xlabel("Tamanho do treino"); plt.ylabel("Acurácia"); plt.title("Learning Curve")
plt.legend(); plt.show()

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)
pipe.fit(X_train, y_train)
disp = CalibrationDisplay.from_estimator(pipe, X_test, y_test, n_bins=10)
disp.ax_.set_title("Calibration Curve")
plt.show()
