In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

import tensorflow as tf
from tensorflow import keras

SEED = 7
np.random.seed(SEED)
tf.random.set_seed(SEED)

# 1) Cargar datos
data = pd.read_csv("cancer.csv")
X = data.drop("diagnosis", axis=1)
y = data["diagnosis"]  # OJO: debe ser 0/1 (ver nota abajo)

d, n0 = X.shape
print("d =", d, " | n0 =", n0)

In [None]:
# 2) Separación inviolable: entrenamiento vs prueba
e = 0.80
X_train_full, X_test, y_train_full, y_test = train_test_split(
    X, y, test_size=(1 - e), random_state=SEED, stratify=y
)

In [None]:
# 3) Definir arquitectura (candidata)
def build_model(n0, n1=16, lr=1e-3):
    model = keras.Sequential([
        keras.layers.Input(shape=(n0,)),
        keras.layers.Dense(n1, activation="relu"),
        keras.layers.Dense(1, activation="sigmoid")
    ])
    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )
    return model

B = 32
E = 6


In [None]:
# =========================================================
# 4) VALIDACIÓN CRUZADA (solo con el conjunto de entrenamiento)
# =========================================================
K = 5

kf = KFold(n_splits=K, shuffle=True, random_state=SEED)

accs, losses = [], []

for fold, (tr, val) in enumerate(kf.split(X_train_full), 1):
    X_tr, X_val = X_train_full.iloc[tr], X_train_full.iloc[val]
    y_tr, y_val = y_train_full.iloc[tr], y_train_full.iloc[val]

    # Escalado dentro del fold (para evitar fuga de info)
    scaler = StandardScaler()
    X_tr = scaler.fit_transform(X_tr).astype(np.float32)
    X_val = scaler.transform(X_val).astype(np.float32)

    model = build_model(n0=n0, n1=16)

    model.fit(X_tr, y_tr, epochs=E, batch_size=B, verbose=0)

    val_loss, val_acc = model.evaluate(X_val, y_val, verbose=0)
    losses.append(val_loss)
    accs.append(val_acc)

    print(f"Fold {fold}: val_loss = {val_loss:.4f} | val_acc = {val_acc:.4f}")

losses = np.array(losses)
accs = np.array(accs)

print("\nResumen CV (sobre entrenamiento)")
print("val_losses:", np.round(losses, 4))
print("val_accs:  ", np.round(accs, 4))
print("LOSS media:", losses.mean().round(4), " | LOSS desv:", losses.std(ddof=1).round(4))
print("ACC  media:", accs.mean().round(4),   " | ACC  desv:", accs.std(ddof=1).round(4))

In [None]:
# =========================================================
# 5) ENTRENAR MODELO FINAL (con todo el entrenamiento)
#    Aquí ya fijas la arquitectura/hiperparámetros elegidos.
# =========================================================

scaler_final = StandardScaler()
X_train_final = scaler_final.fit_transform(X_train_full).astype(np.float32)
X_test_final  = scaler_final.transform(X_test).astype(np.float32)

############################################################
############################################################
############################################################
eta = 1e-3
E = 20
B = 16

modelo_final = keras.Sequential([
        keras.layers.Input(shape=(n0,)),
        keras.layers.Dense(16, activation="relu"),
        keras.layers.Dense(1, activation="sigmoid")
    ])

modelo_final.compile(
        optimizer=keras.optimizers.Adam(learning_rate=eta),
        loss="binary_crossentropy",
        metrics=["accuracy"]
    )

modelo_final.fit(X_train_final, y_train_full, epochs=E, batch_size=B, verbose=0)
############################################################
############################################################
############################################################

In [None]:
# =========================================================
# 6) EVALUACIÓN FINAL (una sola vez) en TEST inviolable
# =========================================================
y_proba = modelo_final.predict(X_test_final, verbose=0).ravel()
y_pred = (y_proba >= 0.5).astype(int)

print("\nEvaluación final en TEST (una sola vez)")
print("Accuracy test:", accuracy_score(y_test, y_pred))
print("\nMatriz de confusión:\n", confusion_matrix(y_test, y_pred))
print("\nReporte:\n", classification_report(y_test, y_pred))
