In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
import plotly.express as px
import plotly.io as pio

from clustering import (GaussianMixtures, SpearmanGaussianMixtures, MADSpearmanGaussianMixtures,
                        MedianInitSpearmanGaussianMixtures)
from utils import plot_gaussian_mixtures

pio.templates.default = 'ggplot2'

In [None]:
# Create a dataset of 2D distributions
n_clusters = 3
x_train, true_labels = make_blobs(n_samples=100, centers=n_clusters, random_state=42, center_box=[-10, 10])
x_train_scaled = StandardScaler().fit_transform(x_train)

In [None]:
px.scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], color=true_labels)

In [None]:
gmm = GaussianMixtures(n_clusters=n_clusters)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [None]:
gmm = MedianInitSpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [None]:
noise, noise_labels = make_blobs(n_samples=2, centers=[[100, 1]])
x_train = np.append(x_train, noise, axis=0)
true_labels = np.append(true_labels, noise_labels)
x_train_scaled = StandardScaler().fit_transform(x_train)

In [None]:
px.scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], color=true_labels)

In [None]:
gmm = GaussianMixtures(n_clusters=n_clusters)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
fig = px.scatter(x=gmm.centroids[:, 0], y=gmm.centroids[:, 1])
fig.update_traces(mode='markers', marker_size=12, marker_color='black', marker_symbol='x-thin', marker_line_width=2)
fig.add_scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], marker_color=gmm.labels, mode='markers')
fig.show()

In [None]:
gmm = SpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [None]:
gmm = MADSpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
fig = px.scatter(x=gmm.centroids[:, 0], y=gmm.centroids[:, 1])
fig.update_traces(mode='markers', marker_size=12, marker_color='black', marker_symbol='x-thin', marker_line_width=2)
fig.add_scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], marker_color=gmm.labels, mode='markers')
fig.show()

# Outlier no tan abrupto

In [None]:
n_clusters = 3
x_train, true_labels = make_blobs(n_samples=100, centers=n_clusters, random_state=42, center_box=[-10, 10])

In [None]:
noise, noise_labels = make_blobs(n_samples=2, centers=[[20, 1]])
x_train = np.append(x_train, noise, axis=0)
true_labels = np.append(true_labels, noise_labels)
x_train_scaled = StandardScaler().fit_transform(x_train)

In [None]:
px.scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], color=true_labels)

In [None]:
gmm = GaussianMixtures(n_clusters=n_clusters)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [None]:
gmm = SpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [None]:
gmm = MedianInitSpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [None]:
gmm = MADSpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

# Clusters más juntos

In [None]:
n_clusters = 3
x_train, true_labels = make_blobs(n_samples=100, centers=n_clusters, random_state=42, center_box=[-5, 5])

In [None]:
noise, noise_labels = make_blobs(n_samples=2, centers=[[20, 1]])
x_train = np.append(x_train, noise, axis=0)
true_labels = np.append(true_labels, noise_labels)
x_train_scaled = StandardScaler().fit_transform(x_train)

In [None]:
px.scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], color=true_labels)

In [None]:
gmm = GaussianMixtures(n_clusters=n_clusters)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [None]:
gmm = SpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [None]:
gmm = MADSpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
fig = px.scatter(x=gmm.centroids[:, 0], y=gmm.centroids[:, 1])
fig.update_traces(mode='markers', marker_size=12, marker_color='black', marker_symbol='x-thin', marker_line_width=2)
fig.add_scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], marker_color=gmm.labels, mode='markers')
fig.show()

# Aún más juntos

In [None]:
n_clusters = 3
x_train, true_labels = make_blobs(n_samples=100, centers=n_clusters, random_state=42, center_box=[-2.5, 2.5])
x_train_scaled = StandardScaler().fit_transform(x_train)

In [None]:
gmm = GaussianMixtures(n_clusters=n_clusters)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [None]:
gmm = SpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [None]:
noise, noise_labels = make_blobs(n_samples=2, centers=[[20, 1]])
x_train = np.append(x_train, noise, axis=0)
true_labels = np.append(true_labels, noise_labels)
x_train_scaled = StandardScaler().fit_transform(x_train)

In [None]:
px.scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], color=true_labels)

In [None]:
gmm = GaussianMixtures(n_clusters=n_clusters)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
fig = px.scatter(x=gmm.centroids[:, 0], y=gmm.centroids[:, 1])
fig.update_traces(mode='markers', marker_size=12, marker_color='black', marker_symbol='x-thin', marker_line_width=2)
fig.add_scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], marker_color=gmm.labels, mode='markers')
fig.show()

In [None]:
gmm = SpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [None]:
gmm = MedianInitSpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
plot_gaussian_mixtures(gmm, x_train_scaled)

In [None]:
gmm = MADSpearmanGaussianMixtures(n_clusters=n_clusters, max_iter=100)
gmm.fit(x_train_scaled)

In [None]:
gmm.score(true_labels)

In [None]:
fig = px.scatter(x=gmm.centroids[:, 0], y=gmm.centroids[:, 1])
fig.update_traces(mode='markers', marker_size=12, marker_color='black', marker_symbol='x-thin', marker_line_width=2)
fig.add_scatter(x=x_train_scaled[:, 0], y=x_train_scaled[:, 1], marker_color=gmm.labels, mode='markers')
fig.show()