In [1]:
import numpy as np
import math as m
import scipy as sc
import pandas as pd
from sklearn.datasets import make_blobs
import plotly.express as px

import logging
import sys

logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
)

COLORS = [
    "#f7dc6f",
    "#82e0aa",
    "#f1948a",
    "#499cef",
    "#f5b041",
    "#a569bd",
    "#e74c3c",
    "#2ecc71",
    "#3498db",
    "#e67e22",
    "#9b59b6",
    "#1abc9c",
    "#34495e",
    "#d35400",
    "#c0392b",
    "#16a085",
    "#2980b9",
    "#8e44ad",
]

def twospirals(n_points, noise=.5):
    n = np.sqrt(np.random.rand(n_points,1)) * 780 * (2*np.pi)/360
    d1x = -np.cos(n)*n + np.random.rand(n_points,1) * noise
    d1y = np.sin(n)*n + np.random.rand(n_points,1) * noise
    return (np.vstack((np.hstack((d1x,d1y)),np.hstack((-d1x,-d1y)))), np.hstack((np.zeros(n_points, dtype=int),np.ones(n_points, dtype=int))))

def plot_dataframe(df, title='', x='x', y='y', label='label'):
    df[label] = df[label].astype(str)
    fig = px.scatter(
        df, 
        x=x, 
        y=y, 
        symbol=label,
        color=label,
        color_discrete_sequence= COLORS, 
        title=title
    )
    fig.update_traces(marker=dict(size=5, symbol='circle'))

    return fig

def plot_centroid(fig, centroid, colors = ['#a569bd'], marker_size=10, symbol = 'star', name = 'Centroid'):
    fig.add_scatter(
        x=[centroid[0]], 
        y=[centroid[1]], 
        mode='markers',
        marker=dict(
            size = marker_size,
            color = colors,
            symbol = symbol,
        ), 
        name=name
    )
    return fig

def create_constraints(labels, probability=0.01, seed=0):
    n_points = len(labels)
    constraints = np.zeros((n_points, n_points), dtype=int)
    state = np.random.RandomState(seed=seed)
    for i in range(n_points):
        for j in range(i +1, n_points):
            if state.rand() < probability:
                if labels[i] == labels[j]:
                    constraints[i, j] = 1
                    constraints[j, i] = 1
                elif labels[i] != labels[j]:
                    constraints[i, j] = -1
                    constraints[j, i] = -1
    return constraints

In [2]:
X, y = make_blobs(n_samples=300, centers=10, cluster_std=1.0, random_state=42)
constraints = create_constraints(y, probability=0.01, seed=42)

In [3]:
from clustlib.nonparam.tvclust import TVClust
tvclust = TVClust(
    n_clusters=10, 
    constraints=constraints, 
    max_iter=100, 
    tol=1e-4,
)
tvclust.fit(X)

2025-06-11 11:26:27,772 - DEBUG - Initializing parameters for TVClust, n_clusters=10, p=2
2025-06-11 11:26:27,772 - DEBUG - Covariance inverse: (10, 2, 2)
2025-06-11 11:26:27,772 - DEBUG - Iteration 1/100
2025-06-11 11:26:27,773 - DEBUG - Updating responsibilities
2025-06-11 11:26:27,773 - DEBUG - Calculating the determinant of the covariance
2025-06-11 11:26:27,781 - DEBUG - Updating gamma
2025-06-11 11:26:27,781 - DEBUG - Updating beta
2025-06-11 11:26:27,781 - DEBUG - Updating mu
2025-06-11 11:26:27,782 - DEBUG - Updating W
2025-06-11 11:26:27,783 - DEBUG - Updating nu
2025-06-11 11:26:27,783 - DEBUG - Updating prior
2025-06-11 11:26:27,788 - DEBUG - Calculating the determinant of the covariance
2025-06-11 11:26:27,789 - DEBUG - Delta: -484.502999659641
2025-06-11 11:26:27,790 - DEBUG - Iteration 2/100
2025-06-11 11:26:27,790 - DEBUG - Updating responsibilities
2025-06-11 11:26:27,791 - DEBUG - Calculating the determinant of the covariance
2025-06-11 11:26:27,793 - DEBUG - Updating 

  self.centroids[i] = np.sum(weighted_sum, axis=0) / np.sum(self.__responsabilities[:, i][self._labels == i])


2025-06-11 11:26:27,998 - DEBUG - Calculating the determinant of the covariance
2025-06-11 11:26:28,001 - DEBUG - Delta: -1073.252062181913
2025-06-11 11:26:28,005 - DEBUG - Iteration 12/100
2025-06-11 11:26:28,006 - DEBUG - Updating responsibilities
2025-06-11 11:26:28,006 - DEBUG - Calculating the determinant of the covariance
2025-06-11 11:26:28,015 - DEBUG - Updating gamma
2025-06-11 11:26:28,015 - DEBUG - Updating beta
2025-06-11 11:26:28,016 - DEBUG - Updating mu
2025-06-11 11:26:28,017 - DEBUG - Updating W
2025-06-11 11:26:28,020 - DEBUG - Updating nu
2025-06-11 11:26:28,020 - DEBUG - Updating prior
2025-06-11 11:26:28,026 - DEBUG - Calculating the determinant of the covariance
2025-06-11 11:26:28,028 - DEBUG - Delta: 0.624238568163967
2025-06-11 11:26:28,029 - DEBUG - Iteration 13/100
2025-06-11 11:26:28,030 - DEBUG - Updating responsibilities
2025-06-11 11:26:28,030 - DEBUG - Calculating the determinant of the covariance
2025-06-11 11:26:28,036 - DEBUG - Updating gamma
2025-06

In [4]:
rdpm_centroids = tvclust.centroids
df = pd.DataFrame(X, columns=['x', 'y'])
df['label'] = tvclust._labels
fig = plot_dataframe(df, title='TVCluster', x='x', y='y', label='label')
for i, centroid in enumerate(rdpm_centroids):
    fig = plot_centroid(fig, centroid, colors=[COLORS[i % len(COLORS)]], marker_size=10, symbol='star', name=f'Centroid {i+1}')
fig.show()