# Anexo - Validación Clustering

In [1]:
import os
os.environ["OMP_NUM_THREADS"] = "1"

from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.metrics import silhouette_score, adjusted_rand_score, normalized_mutual_info_score
import numpy as np
import pandas as pd

# Genera datos de ejemplo
from sklearn.datasets import make_blobs
X, y = make_blobs(n_samples=3000, centers=3, random_state=42)

print("\tX\n",X[:3])
print("\ty\n",y[:3])

	X
 [[-6.39232195 -5.80817973]
 [-2.46538615  8.86728413]
 [-3.87607632  8.78952073]]
	y
 [2 0 0]


## K-Means 3 clústers

In [2]:
# Crea y ajusta el modelo K-Means
kmeans = KMeans(n_clusters=3, n_init="auto")
kmeans_labels = kmeans.fit_predict(X)

## Clustering Jerárquico 3 clústers

In [3]:
# Crea y ajusta el modelo Agglomerative Clustering
agg_clustering = AgglomerativeClustering(n_clusters = 3, 
                                         metric     = "manhattan", 
                                         linkage    = "complete")

agg_labels     = agg_clustering.fit_predict(X)

# Métricas de validación clustering.

### Silhouette

In [4]:
# Calcula las métricas de validación
silhouette_kmeans = silhouette_score(X, kmeans_labels)
silhouette_agg    = silhouette_score(X, agg_labels)

### Rand Index

In [5]:
rand_index_kmeans = adjusted_rand_score(y, kmeans_labels)
rand_index_agg    = adjusted_rand_score(y, agg_labels)

### Mutual Information

In [6]:
mutual_info_kmeans = normalized_mutual_info_score(y, kmeans_labels)
mutual_info_agg    = normalized_mutual_info_score(y, agg_labels)

### Resultados métricas

In [7]:
# Muestra los resultados
print("K-Means Silhouette Score:", silhouette_kmeans)
print("Agglomerative Clustering Silhouette Score:", silhouette_agg)

print("Adjusted Rand Index (K-Means):", rand_index_kmeans)
print("Adjusted Rand Index (Agglomerative):", rand_index_agg)

print("Normalized Mutual Information (K-Means):", mutual_info_kmeans)
print("Normalized Mutual Information (Agglomerative):", mutual_info_agg)

K-Means Silhouette Score: 0.8422938197385735
Agglomerative Clustering Silhouette Score: 0.8422938197385735
Adjusted Rand Index (K-Means): 1.0
Adjusted Rand Index (Agglomerative): 1.0
Normalized Mutual Information (K-Means): 1.0
Normalized Mutual Information (Agglomerative): 1.0


## Ejemplo social media

In [8]:
df = pd.read_csv("social_media.csv")

X = np.array(df[["op", "co", "ex","ag",'ne', 'wordcount']])
y = np.array(df['categoria'])

agg_clustering = AgglomerativeClustering(n_clusters = 3, 
                                         metric     = "manhattan", 
                                         linkage    = "complete")

agg_labels     = agg_clustering.fit_predict(X)

kmeans = KMeans(n_clusters=3, n_init="auto")
kmeans_labels = kmeans.fit_predict(X)

silhouette_kmeans = silhouette_score(X, kmeans_labels)
silhouette_agg    = silhouette_score(X, agg_labels)

print("K-Means Silhouette Score:", silhouette_kmeans)
print("Agglomerative Clustering Silhouette Score:", silhouette_agg)

K-Means Silhouette Score: 0.470472208728775
Agglomerative Clustering Silhouette Score: 0.45791794000067226
