In [None]:
#pip install hdbscan
#!pip install scikit-learn-extra

In [None]:
import numpy as np
import pandas as pd
from scipy.spatial import distance
from sklearn_extra.cluster import KMedoids
from scipy.cluster.hierarchy import linkage, fcluster, dendrogram
import matplotlib.pyplot as plt

In [None]:
d_gaus=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/clustering/distancia_gaussians.csv')
d_nonzero=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/clustering/non_zero_covariance.csv')
d_sizes=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/clustering/sizes.csv')
d_spiral=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/clustering/spiral.csv')
d_variances=pd.read_csv('/content/drive/MyDrive/Colab Notebooks/clustering/variances.csv')

In [None]:
# Función para generar una tabla con la cantidad de puntos en cada clúster
def generate_cluster_summary(labels):
    cluster_counts = pd.Series(labels).value_counts().sort_index()
    summary_df = pd.DataFrame({"Cluster": cluster_counts.index, "Number of Points": cluster_counts.values})
    return summary_df
def elbow_method_with_distances(distance_matrix, k_range):
    """
    Aplica el método del codo para encontrar el número óptimo de clusters utilizando una matriz de distancias.

    Parámetros:
    distance_matrix: ndarray de forma (n_samples, n_samples)
        La matriz de distancias entre los puntos.
    k_range: range
        Rango de valores de k para probar en el método del codo.

    Devuelve:
    None (traza una gráfica del método del codo)
    """
    inertia = []

    for k in k_range:
        kmedoids = KMedoids(n_clusters=k, metric='precomputed', random_state=0)
        kmedoids.fit(distance_matrix)
        inertia.append(kmedoids.inertia_)

    # Graficar el método del codo
    plt.figure(figsize=(8, 6))
    plt.plot(k_range, inertia, 'bo-')
    plt.xlabel('Número de clusters (k)')
    plt.ylabel('Inercia')
    plt.title('Método del codo')
    plt.show()
def apply_hierarchical_clustering(dist_matrix, n_clusters,criterion='inconsistent',method='single'):
    # linkage function requires a condensed distance matrix, so we need to convert it
    condensed_dist_matrix = distance.squareform(dist_matrix)
    Z = linkage(condensed_dist_matrix, method)
    labels = fcluster(Z, n_clusters, criterion)
    return Z, labels
def apply_kmedoids(dist_matrix, n_clusters):
    kmedoids = KMedoids(n_clusters=n_clusters, metric='precomputed', random_state=42)
    kmedoids.fit(dist_matrix)
    labels = kmedoids.labels_
    return labels

In [None]:
elbow_method_with_distances(d_gaus, range(1, 11))

In [None]:
n_clusters = 3
labels = apply_kmedoids(d_gaus, n_clusters)
cluster_summary = generate_cluster_summary(labels)

print(cluster_summary)

In [None]:
n_clusters = 3
Z, labels = apply_hierarchical_clustering(d_gaus, n_clusters,'maxclust','complete')
cluster_summary = generate_cluster_summary(labels)

print(cluster_summary)

# Dibujar dendrograma
plt.figure(figsize=(10, 7))
dendrogram(Z)
plt.title('Dendrogram')
plt.xlabel('Points')
plt.ylabel('Distance')
plt.show()

In [None]:
elbow_method_with_distances(d_sizes, range(1, 11))

In [None]:
n_clusters = 2
labels = apply_kmedoids(d_sizes, n_clusters)
cluster_summary = generate_cluster_summary(labels)

print(cluster_summary)

In [None]:
n_clusters = 2
Z, labels = apply_hierarchical_clustering(d_sizes, n_clusters,'maxclust','ward')
cluster_summary = generate_cluster_summary(labels)

print(cluster_summary)

# Dibujar dendrograma
plt.figure(figsize=(10, 7))
dendrogram(Z)
plt.title('Dendrogram')
plt.xlabel('Points')
plt.ylabel('Distance')
plt.show()

In [None]:
elbow_method_with_distances(d_variances, range(1, 11))

In [None]:
n_clusters = 2
labels = apply_kmedoids(d_variances, n_clusters)
cluster_summary = generate_cluster_summary(labels)

print(cluster_summary)

In [None]:
n_clusters = 2
Z, labels = apply_hierarchical_clustering(d_variances, n_clusters,'maxclust','ward')
cluster_summary = generate_cluster_summary(labels)

print(cluster_summary)

# Dibujar dendrograma
plt.figure(figsize=(10, 7))
dendrogram(Z)
plt.title('Dendrogram')
plt.xlabel('Points')
plt.ylabel('Distance')
plt.show()

In [None]:
elbow_method_with_distances(d_nonzero, range(1, 11))

In [None]:
n_clusters = 2
labels = apply_kmedoids(d_nonzero, n_clusters)
cluster_summary = generate_cluster_summary(labels)

print(cluster_summary)

In [None]:
n_clusters = 2
Z, labels = apply_hierarchical_clustering(d_nonzero, n_clusters,'maxclust','single')
cluster_summary = generate_cluster_summary(labels)

print(cluster_summary)

# Dibujar dendrograma
plt.figure(figsize=(10, 7))
dendrogram(Z)
plt.title('Dendrogram')
plt.xlabel('Points')
plt.ylabel('Distance')
plt.show()