In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

from sklearn.datasets import make_moons, make_classification
from sklearn.model_selection import StratifiedKFold, cross_val_score, train_test_split
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn.pipeline import Pipeline
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

np.random.seed(42)
os.makedirs("figs", exist_ok=True)

# Helpers de ploteo para fronteras 2D
def plot_decision_boundary(clf, X, y, ax, title="", step=0.02, proba_class=1):
    # malla
    x_min, x_max = X[:,0].min()-0.8, X[:,0].max()+0.8
    y_min, y_max = X[:,1].min()-0.8, X[:,1].max()+0.8
    xx, yy = np.meshgrid(np.arange(x_min, x_max, step),
                         np.arange(y_min, y_max, step))
    # proba de clase 1 (si no tiene predict_proba, usar 0/1 de predict)
    if hasattr(clf, "predict_proba"):
        Z = clf.predict_proba(np.c_[xx.ravel(), yy.ravel()])[:, proba_class]
    else:
        Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    # fondo
    cs = ax.contourf(xx, yy, Z, alpha=0.25, levels=25)
    # fronteras al 0.5 si hay proba
    if hasattr(clf, "predict_proba"):
        ax.contour(xx, yy, Z, levels=[0.5], colors="k", linewidths=1.2)
    # puntos
    ax.scatter(X[y==0,0], X[y==0,1], s=20, c="tab:blue", edgecolors="k", label="Clase 0")
    ax.scatter(X[y==1,0], X[y==1,1], s=20, c="tab:red",  edgecolors="k", label="Clase 1")
    ax.set_title(title)
    ax.set_xlabel("$x_1$"); ax.set_ylabel("$x_2$")
    ax.set_xlim(x_min, x_max); ax.set_ylim(y_min, y_max)

In [None]:
X_knn, y_knn = make_moons(n_samples=1000, noise=0.25, random_state=42)
Xtr_knn, Xte_knn, ytr_knn, yte_knn = train_test_split(
    X_knn, y_knn, test_size=0.25, random_state=42, stratify=y_knn
)

Xtr_knn.shape, Xte_knn.shape, np.bincount(y_knn)

In [None]:
scaler_for_vi = StandardScaler()
Xtr_scaled = scaler_for_vi.fit_transform(Xtr_knn)
cov = np.cov(Xtr_scaled, rowvar=False)
VI = np.linalg.inv(cov + 1e-6*np.eye(cov.shape[0]))  # regularización

# Definimos configuraciones: (etiqueta, pipeline)
def knn_pipeline(metric, metric_params=None, weights="uniform", n_neighbors=7, cosine_norm=False, mahalanobis_VI=None):
    steps = [("sc", StandardScaler())]
    # Para coseno conviene normalizar (ángulo):
    if metric == "cosine" or cosine_norm:
        steps.append(("norm", Normalizer(norm="l2")))
    if metric == "mahalanobis":
        clf = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights,
                                   metric=metric, metric_params={"VI": mahalanobis_VI})
    else:
        clf = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights,
                                   metric=metric, metric_params=metric_params)
    steps.append(("knn", clf))
    return Pipeline(steps)

configs = [
    ("L2 / uniform",   knn_pipeline("minkowski", {"p":2}, weights="uniform")),
    ("L2 / distance",  knn_pipeline("minkowski", {"p":2}, weights="distance")),
    ("L1 / uniform",   knn_pipeline("minkowski", {"p":1}, weights="uniform")),
    ("L∞ / uniform",   knn_pipeline("chebyshev", None,    weights="uniform")),
    ("coseno / uniform", knn_pipeline("cosine", None, weights="uniform", cosine_norm=True)),
    ("Mahalanobis / uniform", knn_pipeline("mahalanobis", None, weights="uniform", mahalanobis_VI=VI)),
]

skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

means, stds = [], []
for name, pipe in configs:
    scores = cross_val_score(pipe, Xtr_knn, ytr_knn, cv=skf, scoring="accuracy", n_jobs=-1)
    means.append(scores.mean()); stds.append(scores.std())
    print(f"{name:22s}  CV acc: {scores.mean():.3f} ± {scores.std():.3f}")

# Barplot de CV
plt.figure(figsize=(8,4))
x = np.arange(len(configs))
plt.bar(x, means, yerr=stds, capsize=4)
plt.xticks(x, [c[0] for c in configs], rotation=20)
plt.ylabel("Accuracy (CV)")
plt.title("k-NN: comparación de métricas y pesos (CV 5-fold)")
plt.tight_layout()
plt.savefig("figs/knn_cv_metrics_bar.png", dpi=200, bbox_inches="tight")
plt.show()

# Fronteras (4 paneles principales)
fig, axes = plt.subplots(2, 3, figsize=(12,8))
axes = axes.ravel()
for ax, (name, pipe) in zip(axes, configs):
    pipe.fit(Xtr_knn, ytr_knn)
    plot_decision_boundary(pipe, Xtr_knn, ytr_knn, ax, title=name)
plt.tight_layout()
plt.savefig("figs/knn_boundaries_grid.png", dpi=200, bbox_inches="tight")
plt.show()

# Evaluación en test para el mejor según CV
best_idx = int(np.argmax(means))
best_name, best_pipe = configs[best_idx]
best_pipe.fit(Xtr_knn, ytr_knn)
acc_test = accuracy_score(yte_knn, best_pipe.predict(Xte_knn))
print(f"Mejor config por CV: {best_name} | Test acc: {acc_test:.3f}")

# Perceptron

In [None]:
from sklearn.datasets import make_circles
from sklearn.preprocessing import StandardScaler

# Círculos con ruido -> NO linealmente separable
X_perc, y_perc01 = make_circles(n_samples=600, factor=0.45, noise=0.18, random_state=12)
y_perc = np.where(y_perc01==1, 1, -1)  # etiquetas {-1, +1}

# Estandarizar
sc_p = StandardScaler().fit(X_perc)
X_perc_s = sc_p.transform(X_perc)

plt.figure(figsize=(4.8,4.2))
plt.scatter(X_perc_s[y_perc==-1,0], X_perc_s[y_perc==-1,1], s=15, c="tab:blue", edgecolors="k", label="y=-1")
plt.scatter(X_perc_s[y_perc==+1,0], X_perc_s[y_perc==+1,1], s=15, c="tab:red",  edgecolors="k", label="y=+1")
plt.legend(); plt.title("Círculos concéntricos (no lineal)")
plt.xlabel("$x_1$ (std)"); plt.ylabel("$x_2$ (std)")
plt.tight_layout(); plt.show()

In [None]:
def perceptron_train(X, y, eta=1.0, max_epochs=50, shuffle=True):
    n, d = X.shape
    w = np.zeros(d); b = 0.0
    history = []  # (w, b, errors)
    rng = np.random.RandomState(0)
    for epoch in range(max_epochs):
        idx = np.arange(n)
        if shuffle:
            rng.shuffle(idx)
        errors = 0
        for i in idx:
            margin = y[i]*(np.dot(w, X[i]) + b)
            if margin <= 0:
                w = w + eta*y[i]*X[i]
                b = b + eta*y[i]
                errors += 1
        history.append((w.copy(), b, errors))
        # En datos NO separables, lo normal es que errors > 0 siempre.
    return w, b, history

def plot_separator(ax, w, b, **kwargs):
    x_min, x_max = ax.get_xlim()
    x_vals = np.array([x_min, x_max])
    if abs(w[1]) < 1e-9:
        x0 = -b / (w[0] + 1e-9)
        ax.plot([x0, x0], ax.get_ylim(), **kwargs)
    else:
        y_vals = -(w[0]/w[1])*x_vals - b/(w[1]+1e-12)
        ax.plot(x_vals, y_vals, **kwargs)


w_star, b_star, hist = perceptron_train(X_perc_s, y_perc, eta=1.0, max_epochs=200, shuffle=True)

print(f"Épocas totales: {len(hist)}")
print(f"Errores en últimas 5 épocas: {[e for (_,_,e) in hist[-5:]]}")

# ----------------- PLOT FINAL -----------------
fig, ax = plt.subplots(figsize=(5.5,4.5))
ax.scatter(X_perc_s[y_perc==-1,0], X_perc_s[y_perc==-1,1], s=15, c="tab:blue", edgecolors="k", label="y=-1")
ax.scatter(X_perc_s[y_perc==+1,0], X_perc_s[y_perc==+1,1], s=15, c="tab:red",  edgecolors="k", label="y=+1")
ax.set_title("Perceptrón en dataset no lineal (no converge)")
ax.set_xlabel("$x_1$ (std)"); ax.set_ylabel("$x_2$ (std)")

# límites y recta
x_min, x_max = X_perc_s[:,0].min()-0.8, X_perc_s[:,0].max()+0.8
y_min, y_max = X_perc_s[:,1].min()-0.8, X_perc_s[:,1].max()+0.8
ax.set_xlim(x_min, x_max); ax.set_ylim(y_min, y_max)
plot_separator(ax, w_star, b_star, color="k", linewidth=2)

# etiqueta sobre la recta
xm = 0.5*(x_min+x_max)
if abs(w_star[1]) > 1e-9:
    ym = -(w_star[0]/w_star[1])*xm - b_star/(w_star[1]+1e-12)
    ax.text(xm, ym, r"$w^\top x + b = 0$", fontsize=10,
            bbox=dict(facecolor='white', edgecolor='none', alpha=0.8))
else:
    ax.text(-b_star/(w_star[0]+1e-9), 0.5*(y_min+y_max), r"$w^\top x + b = 0$", fontsize=10,
            bbox=dict(facecolor='white', edgecolor='none', alpha=0.8), rotation=90)

ax.legend(loc="upper right"); plt.tight_layout()
plt.show()

# ----------------- CURVA DE ERRORES -----------------
errors = [e for (_,_,e) in hist]
plt.figure(figsize=(6,3.5))
plt.plot(np.arange(1,len(errors)+1), errors, marker="o")
plt.xlabel("Época"); plt.ylabel("# errores (actualizaciones)")
plt.title("Perceptrón: errores por época (no separable)")
plt.grid(True); plt.tight_layout(); plt.show()