# 02 — Efecto del escalado en k-means y SVM

In [None]:

# %pip install numpy pandas matplotlib scikit-learn --quiet
import numpy as np, pandas as pd, matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.cluster import KMeans
from sklearn.svm import SVC
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from sklearn.metrics import silhouette_score
np.random.seed(7)


In [None]:

X, y = make_blobs(n_samples=600, centers=[[0,0],[10,0],[0,5]], cluster_std=[1.0,2.0,0.5], random_state=7)
X = np.c_[X[:,0]*50, X[:,1]]  # escala distinta


In [None]:

# KMeans sin escalar vs con z-score
km_raw = KMeans(n_clusters=3, n_init=10, random_state=7).fit(X)
lab_raw = km_raw.labels_
Xs = StandardScaler().fit_transform(X)
km_s = KMeans(n_clusters=3, n_init=10, random_state=7).fit(Xs)
lab_s = km_s.labels_
print("silhouette sin escalar:", silhouette_score(X, lab_raw))
print("silhouette con z-score:", silhouette_score(Xs, lab_s))


In [None]:

# SVM sin escalar vs con z-score
Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.3, random_state=7, stratify=y)
acc_raw = SVC(kernel="rbf").fit(Xtr,ytr).score(Xte,yte)
Xs = StandardScaler().fit_transform(X)
Xtr, Xte, ytr, yte = train_test_split(Xs, y, test_size=0.3, random_state=7, stratify=y)
acc_s = SVC(kernel="rbf").fit(Xtr,ytr).score(Xte,yte)
print("SVM acc sin escalar:", acc_raw, " | con z-score:", acc_s)


In [None]:

import matplotlib.pyplot as plt
fig, ax = plt.subplots(1,2, figsize=(10,4))
ax[0].scatter(X[:,0], X[:,1], s=8, c=lab_raw); ax[0].set_title("KMeans SIN escalar")
ax[1].scatter(X[:,0], X[:,1], s=8, c=lab_s); ax[1].set_title("KMeans CON z-score")
plt.show()
