In [1]:
import numpy as np
from sklearn.utils._param_validation import (
    validate_params, StrOptions
)

In [2]:
@validate_params(
    {"X": ["array-like"],
    "labels": ["array_like"]},
    prefer_skip_nested_validation=True,
)
def interclust_sld(X, labels):
    """
    Computes the single linkage distance between clusters.

    Parameters:
    X (array-like): Set of distances between each pair of instances (n_instances, n_instances).
    labels (array-like): Set of labels for the vectors (n_instances, ).

    Returns:
    dist (array-like): Distances between each possible pair of vectors in provided set.
    """
    
    if X.shape[0] != labels.shape[0]:
        raise ValueError(f"Invalid label array shape. Must be {X.shape[0]}, got {labels.shape[0]}")
        
    n = X.shape[0]
    labels_unique = np.unique(labels)
    labels_tiled = labels.reshape(-1,1).repeat(n, axis=1)
    dist = np.zeros((labels_unique.shape[0], labels_unique.shape[0]))
    for i in range(labels_unique.shape[0]):
        for j in range(labels_unique.shape[0]):
            dist[i, j] = np.min(X[np.logical_and(labels_unique[i] == labels_tiled, labels_unique[j] == labels_tiled.T)]) if i != j else np.inf
    return dist

In [3]:
@validate_params(
    {"X": ["array-like"],
    "labels": ["array_like"]},
    prefer_skip_nested_validation=True,
)
def intraclust_compdd(X, labels):
    """
    Computes the complete diameter distance for a set cluster.

    Parameters:
    X (array-like): Set of distances between each pair of instances (n_instances, n_instances).
    labels (array-like): Set of labels for the vectors (n_instances, ).

    Returns:
    dist (array-like): Distances between each possible pair of vectors in provided set.
    """
    
    if X.shape[0] != labels.shape[0]:
        raise ValueError(f"Invalid label array shape. Must be {X.shape[0]}, got {labels.shape[0]}")
        
    n = X.shape[0]
    labels_unique = np.unique(labels)
    labels_tiled = labels.reshape(-1,1).repeat(n, axis=1)
    dist = np.zeros((labels_unique.shape[0],))
    for i in range(labels_unique.shape[0]):
        dist[i] = np.max(X[np.logical_and(labels_unique[i] == labels_tiled, labels_unique[i] == labels_tiled.T)])
    return dist

In [4]:
from functools import partial


@validate_params(
    {"X": ["array-like"],
    "labels": ["array-like"],
    "intraclust_dist": [None, callable],
    "interclust_dist": [None, callable],
    "metric": [None, callable, StrOptions({"precomputed"})]},
    prefer_skip_nested_validation=True,
    )
def dunn_score(X, labels, intraclust_dist=None, interclust_dist=None, metric=None):
    """
    Compute the Dunn Index by given within-cluster distances (callable or precomputed) and
    between-cluster distances(callable or precomputed).

    Parameters:
    X (array-like): Set of vectors (n_instances, n_features) or set of distances between vectors (n_instances, n_instances) of `metric` has value 'precomputed'.
    labels (array-like): Class labels for each of the vectors(n_instances,).
    intraclust_dist (callable): Function that calculates the distance between two objects belonging to same cluster.
    interclust_dist (callable): Function that calculates the distance between two objects belonging to two different clusters.
    metric (callable, str): The way vector norms are defined.

    Returns:
    dunn (float): Dunn score.
    """

    if X.shape[0] != labels.shape[0]:
        return ValueError(f"Invalid label array shape. Must be {X.shape[0]}, got {labels.shape[0]}")
    
    if metric != "precomputed":
        n, m = X.shape 
        norm = metric if callable(metric) else partial(np.linalg.vector_norm, ord=2, axis=1)
        distances = norm(X.reshape(n, m, 1).repeat(n, axis=2) - X.reshape(n, m, 1).repeat(n, axis=2).T)
    else:
        distances = metric

    inter_dist = interclust_sld(distances, labels) if interclust_dist is None else interclust_dist(distances, labels)
    intra_dist = intraclust_compdd(distances, labels) if intraclust_dist is None else intraclust_dist(distances, labels)
    
    dunn = np.min(inter_dist) / np.max(intra_dist)
    return dunn

In [5]:
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans
from sklearn import metrics
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.cluster import KMeans

In [6]:
X, y = make_blobs(random_state=42)
kmeans = KMeans(n_clusters=3, random_state=42)

y_pred = kmeans.fit_predict(X)

dunn_score(X, y_pred)

np.float64(1.3200068371385423)

In [8]:
a = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])
b = a.reshape((3, 3))
b

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [9]:
np.linalg.norm(b, axis=1)

array([ 3.74165739,  8.77496439, 13.92838828])