In [112]:
import numpy as np
import math as m
import scipy as sc
import pandas as pd
from sklearn.datasets import make_blobs
import plotly.express as px

import logging
import sys

logging.basicConfig(
    level=logging.DEBUG,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.StreamHandler(sys.stdout)
    ]
)

COLORS = [
    "#f7dc6f",
    "#82e0aa",
    "#f1948a",
    "#499cef",
    "#f5b041",
    "#a569bd",
    "#e74c3c",
    "#2ecc71",
    "#3498db",
    "#e67e22",
    "#9b59b6",
    "#1abc9c",
    "#34495e",
    "#d35400",
    "#c0392b",
    "#16a085",
    "#2980b9",
    "#8e44ad",
]

def twospirals(n_points, noise=.5):
    n = np.sqrt(np.random.rand(n_points,1)) * 780 * (2*np.pi)/360
    d1x = -np.cos(n)*n + np.random.rand(n_points,1) * noise
    d1y = np.sin(n)*n + np.random.rand(n_points,1) * noise
    return (np.vstack((np.hstack((d1x,d1y)),np.hstack((-d1x,-d1y)))), np.hstack((np.zeros(n_points, dtype=int),np.ones(n_points, dtype=int))))

def plot_dataframe(df, title='', x='x', y='y', label='label'):
    df[label] = df[label].map(lambda x: str(x))
    fig = px.scatter(
        df.sort_values(by=label), 
        x=x, 
        y=y,
        color=label,
        color_discrete_map={str(i): COLORS[i] for i in range(len(COLORS))},
        title=title,
    )
    fig.update_traces(marker=dict(size=5, symbol='circle', opacity = 0.6))

    return fig

def plot_centroid(fig, centroid, colors = ['#a569bd'], marker_size=10, symbol = 'star', name = 'Centroid'):
    fig.add_scatter(
        x=[centroid[0]], 
        y=[centroid[1]], 
        mode='markers',
        marker=dict(
            size = marker_size,
            color = colors,
            symbol = symbol,
        ), 
        name=name
    )
    return fig

def create_constraints(labels, probability=0.01, seed=0):
    n_points = len(labels)
    constraints = np.zeros((n_points, n_points), dtype=int)
    state = np.random.RandomState(seed=seed)
    for i in range(n_points):
        for j in range(i +1, n_points):
            if state.rand() < probability:
                if labels[i] == labels[j]:
                    constraints[i, j] = 1
                    constraints[j, i] = 1
                elif labels[i] != labels[j]:
                    constraints[i, j] = -1
                    constraints[j, i] = -1
    return constraints

def draw_cl_constraints(fig, constraints, points):
    n_points = len(points)
    for i in range(n_points):
        for j in range(i + 1, n_points):
            if constraints[i, j] < 0:
                fig.add_scatter(
                    x=[points[i, 0], points[j, 0]], 
                    y=[points[i, 1], points[j, 1]], 
                    mode='lines',
                    line=dict(color='red', width=0.5, dash='dash'),
                )
    return fig

def draw_ml_constraints(fig, constraints, points):
    n_points = len(points)
    for i in range(n_points):
        for j in range(i + 1, n_points):
            if constraints[i, j] > 0:
                fig.add_scatter(
                    x=[points[i, 0], points[j, 0]], 
                    y=[points[i, 1], points[j, 1]], 
                    mode='lines',
                    line=dict(color='green', width=0.5),
                )
    return fig

In [103]:
X, y = make_blobs(n_samples=300, centers=10, cluster_std=1.0, random_state=42)
constraints = create_constraints(y, probability=0.01, seed=42)

In [114]:
from clustlib.nonparam.tvclust import TVClust
tvclust = TVClust(
    n_clusters=10, 
    constraints=constraints, 
    max_iter=10000, 
    tol=1e-4,
)
tvclust.fit(X)

2025-06-11 15:24:14,033 - DEBUG - Initializing parameters for TVClust, n_clusters=10, p=2
2025-06-11 15:24:14,036 - DEBUG - Covariance inverse: (10, 2, 2)
2025-06-11 15:24:14,037 - DEBUG - Iteration 1/10000
2025-06-11 15:24:14,038 - DEBUG - Updating responsibilities
2025-06-11 15:24:14,042 - DEBUG - Calculating the determinant of the covariance
2025-06-11 15:24:14,058 - DEBUG - Updating gamma
2025-06-11 15:24:14,059 - DEBUG - Updating beta
2025-06-11 15:24:14,059 - DEBUG - Updating mu
2025-06-11 15:24:14,060 - DEBUG - Updating W
2025-06-11 15:24:14,064 - DEBUG - Updating nu
2025-06-11 15:24:14,065 - DEBUG - Updating prior
2025-06-11 15:24:14,071 - DEBUG - Calculating the determinant of the covariance
2025-06-11 15:24:14,074 - DEBUG - Delta: -499.8061834650541
2025-06-11 15:24:14,077 - DEBUG - Iteration 2/10000
2025-06-11 15:24:14,077 - DEBUG - Updating responsibilities
2025-06-11 15:24:14,078 - DEBUG - Calculating the determinant of the covariance
2025-06-11 15:24:14,085 - DEBUG - Upda


invalid value encountered in divide



2025-06-11 15:24:14,278 - DEBUG - Updating gamma
2025-06-11 15:24:14,279 - DEBUG - Updating beta
2025-06-11 15:24:14,279 - DEBUG - Updating mu
2025-06-11 15:24:14,280 - DEBUG - Updating W
2025-06-11 15:24:14,280 - DEBUG - Updating nu
2025-06-11 15:24:14,281 - DEBUG - Updating prior
2025-06-11 15:24:14,285 - DEBUG - Calculating the determinant of the covariance
2025-06-11 15:24:14,286 - DEBUG - Delta: 0.5492452535673173
2025-06-11 15:24:14,286 - DEBUG - Iteration 9/10000
2025-06-11 15:24:14,287 - DEBUG - Updating responsibilities
2025-06-11 15:24:14,287 - DEBUG - Calculating the determinant of the covariance
2025-06-11 15:24:14,292 - DEBUG - Updating gamma
2025-06-11 15:24:14,293 - DEBUG - Updating beta
2025-06-11 15:24:14,293 - DEBUG - Updating mu
2025-06-11 15:24:14,293 - DEBUG - Updating W
2025-06-11 15:24:14,294 - DEBUG - Updating nu
2025-06-11 15:24:14,294 - DEBUG - Updating prior
2025-06-11 15:24:14,300 - DEBUG - Calculating the determinant of the covariance
2025-06-11 15:24:14,30

In [115]:
rdpm_centroids = tvclust.centroids
df = pd.DataFrame(X, columns=['x', 'y'])
df['label'] = tvclust._labels
fig = plot_dataframe(df, title='TVCluster', x='x', y='y', label='label')
for i, centroid in enumerate(rdpm_centroids):
    fig = plot_centroid(fig, centroid, colors=[COLORS[i % len(COLORS)]], marker_size=10, symbol='star', name=f'Centroid {i}')
fig.show()

In [110]:
df = pd.DataFrame(X, columns=['x', 'y'])
df['label'] = y
fig = plot_dataframe(df, title='Original Data', x='x', y='y', label='label')
fig = draw_cl_constraints(fig, constraints, X)
fig.show()


In [113]:
df = pd.DataFrame(X, columns=['x', 'y'])
df['label'] = y
fig = plot_dataframe(df, title='Original Data', x='x', y='y', label='label')
fig = draw_ml_constraints(fig, constraints, X)
fig.show()