In [1]:
# --- SOLUCIÓN PASO 1 ---
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier

# --- SOLUCIÓN PASO 2 ---
data = load_breast_cancer()
X, y = data.data, data.target

X_temp, X_test, y_temp, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    stratify=y, 
    random_state=42
)
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, 
    test_size=0.25,  # 0.25 * 0.8 = 0.2 (20% del total)
    stratify=y_temp,
    random_state=42
)

# --- SOLUCIÓN PASO 3 ---
pipe_lr = make_pipeline(
    StandardScaler(), 
    LogisticRegression(max_iter=1000)
)

pipe_knn = make_pipeline(
    StandardScaler(),
    KNeighborsClassifier(n_neighbors=5)
)

pipe_lr.fit(X_train, y_train)
pipe_knn.fit(X_train, y_train)

# --- SOLUCIÓN PASO 4 ---
y_pred_val_lr = pipe_lr.predict(X_val)
y_pred_val_knn = pipe_knn.predict(X_val)

lr_acc = accuracy_score(y_val, y_pred_val_lr)
knn_acc = accuracy_score(y_val, y_pred_val_knn)

print("Precisión Regresión Logística (validation):", lr_acc)
print("Precisión KNN (validation):", knn_acc)

if lr_acc > knn_acc:
    best_model = pipe_lr
    print("\nMejor modelo: Regresión Logística")
else:
    best_model = pipe_knn
    print("\nMejor modelo: KNN")

y_pred_test = best_model.predict(X_test)
test_accuracy = accuracy_score(y_test, y_pred_test)
print(f"\nPrecisión del mejor modelo en TEST: {test_accuracy:.4f}")

Precisión Regresión Logística (validation): 0.9912280701754386
Precisión KNN (validation): 0.956140350877193

Mejor modelo: Regresión Logística

Precisión del mejor modelo en TEST: 0.9825
