In [None]:
import warnings
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression, RidgeClassifier, Lasso
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report
from sklearn.base import BaseEstimator, ClassifierMixin

warnings.filterwarnings("ignore")

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer/breast-cancer.data"
columnas = [
    "Class", "age", "menopause", "tumor-size", "inv-nodes",
    "node-caps", "deg-malig", "breast", "breast-quad", "irradiat"
]
df = pd.read_csv(url, names=columnas).dropna()
df["Class"] = df["Class"].map({"no-recurrence-events": 0, "recurrence-events": 1})

X = df.drop("Class", axis=1)
y = df["Class"]

categorical_features = X.columns.tolist()
preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(drop='first', handle_unknown='ignore', sparse_output=False), categorical_features)
])

class LassoClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, alpha=1.0):
        self.alpha = alpha
        self.model = Lasso(alpha=alpha, max_iter=10000)

    def fit(self, X, y):
        self.model.fit(X, y)
        return self

    def predict(self, X):
        return (self.model.predict(X) > 0.5).astype(int)
models = {
    "Base": Pipeline([
        ("prep", preprocessor),
        ("scale", StandardScaler()),
        ("clf", LogisticRegression(max_iter=1000, solver='liblinear'))
    ]),
    "Ridge": Pipeline([
        ("prep", preprocessor),
        ("scale", StandardScaler()),
        ("clf", RidgeClassifier(alpha=10))
    ]),
    "Lasso": Pipeline([
        ("prep", preprocessor),
        ("scale", StandardScaler()),
        ("clf", LassoClassifier(alpha=1.0))
    ])
}

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2, random_state=42)

for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print(f"\n=== Modelo {name} ===")
    print(classification_report(y_test, y_pred, zero_division=0))

for name, model in models.items():
    acc = cross_val_score(model, X, y, cv=5, scoring='accuracy').mean()
    print(f"Accuracy promedio - {name}: {acc:.4f}")
