In [1]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
import plotly.express as px
import plotly.io as pio

from clustering import (GaussianMixtures, SpearmanGaussianMixtures, MADSpearmanGaussianMixtures,
                        MedianInitSpearmanGaussianMixtures)
from utils import plot_gaussian_mixtures

pio.templates.default = 'ggplot2'

In [2]:
# Create a dataset of 2D distributions
n_clusters = 3
x_train, true_labels = make_blobs(n_samples=100, centers=n_clusters, random_state=42, center_box=[-10, 10])
x_train_scaled = StandardScaler().fit_transform(x_train)

In [None]:
px.scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], color=true_labels)

In [4]:
gmm = GaussianMixtures(n_clusters=n_clusters)
gmm.fit(x_train_scaled)

In [5]:
gmm.score(true_labels)

1.0

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [7]:
gmm = MADSpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [8]:
gmm.score(true_labels)

1.0

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [10]:
noise = (np.random.random((5, 2)) - 0.5) * 20
noise_labels = np.random.randint(0, 3, 5)

In [11]:
x_train = np.append(x_train, noise, axis=0)
true_labels = np.append(true_labels, noise_labels)
x_train_scaled = StandardScaler().fit_transform(x_train)

In [None]:
px.scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], color=true_labels)

In [16]:
gmm = GaussianMixtures(n_clusters=n_clusters)
gmm.fit(x_train_scaled)

In [17]:
gmm.score(true_labels)

0.7142857142857143

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [19]:
gmm = SpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [20]:
gmm.score(true_labels)

0.6571428571428571

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [22]:
gmm = MADSpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [23]:
gmm.score(true_labels)

0.6476190476190476

In [25]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [26]:
gmm = MedianInitSpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [27]:
gmm.score(true_labels)

0.6571428571428571

In [28]:
plot_gaussian_mixtures(gmm, x_train_scaled)

# Clusters más juntos

In [31]:
n_clusters = 3
x_train, true_labels = make_blobs(n_samples=100, centers=n_clusters, random_state=42, center_box=[-5, 5])
x_train_scaled = StandardScaler().fit_transform(x_train)

In [32]:
gmm = GaussianMixtures(n_clusters=n_clusters)
gmm.fit(x_train_scaled)

In [33]:
gmm.score(true_labels)

0.99

In [34]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [35]:
gmm = MADSpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [36]:
gmm.score(true_labels)

0.99

In [37]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [38]:
noise = (np.random.random((5, 2)) - 0.5) * 10
noise_labels = np.random.randint(0, 3, 5)
x_train = np.append(x_train, noise, axis=0)
true_labels = np.append(true_labels, noise_labels)
x_train_scaled = StandardScaler().fit_transform(x_train)

In [None]:
px.scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], color=true_labels)

In [40]:
gmm = GaussianMixtures(n_clusters=n_clusters)
gmm.fit(x_train_scaled)

In [41]:
gmm.score(true_labels)

0.9714285714285714

In [42]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [43]:
gmm = SpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [44]:
gmm.score(true_labels)

0.9619047619047619

In [45]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [46]:
gmm = MADSpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [47]:
gmm.score(true_labels)

0.9619047619047619

In [48]:
fig = px.scatter(x=gmm.centroids[:, 0], y=gmm.centroids[:, 1])
fig.update_traces(mode='markers', marker_size=12, marker_color='black', marker_symbol='x-thin', marker_line_width=2)
fig.add_scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], marker_color=gmm.labels, mode='markers')
fig.show()

# Aún más juntos

In [107]:
n_clusters = 3
x_train, true_labels = make_blobs(n_samples=100, centers=n_clusters, random_state=42, center_box=[-2.5, 2.5])
x_train_scaled = StandardScaler().fit_transform(x_train)

In [108]:
gmm = GaussianMixtures(n_clusters=n_clusters)
gmm.fit(x_train_scaled)

In [109]:
gmm.score(true_labels)

0.65

In [110]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [111]:
gmm = SpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [112]:
gmm.score(true_labels)

0.89

In [113]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [114]:
noise = (np.random.random((5, 2)) - 0.5) * 10
noise_labels = np.random.randint(0, 3, 5)
x_train = np.append(x_train, noise, axis=0)
true_labels = np.append(true_labels, noise_labels)
x_train_scaled = StandardScaler().fit_transform(x_train)

In [115]:
px.scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], color=true_labels)

In [116]:
gmm = GaussianMixtures(n_clusters=n_clusters)
gmm.fit(x_train_scaled)

In [117]:
gmm.score(true_labels)

0.6666666666666666

In [118]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [119]:
gmm = SpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [120]:
gmm.score(true_labels)

0.8571428571428571

In [121]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [122]:
gmm = MedianInitSpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [123]:
gmm.score(true_labels)

0.8571428571428571

In [124]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [125]:
gmm = MADSpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [126]:
gmm.score(true_labels)

0.6952380952380952

In [127]:
plot_gaussian_mixtures(gmm, x_train_scaled)