In [23]:
import time
import warnings
from itertools import cycle, islice

import matplotlib.pyplot as plt
import numpy as np

from sklearn import cluster, datasets, mixture
from sklearn.neighbors import kneighbors_graph
from sklearn.preprocessing import StandardScaler
from sklearn.utils.validation import check_symmetric

In [2]:
n_samples = 500
seed = 30
noisy_circles = datasets.make_circles(
    n_samples=n_samples, factor=0.5, noise=0.05, random_state=seed
)
noisy_moons = datasets.make_moons(n_samples=n_samples, noise=0.05, random_state=seed)
blobs = datasets.make_blobs(n_samples=n_samples, random_state=seed)
rng = np.random.RandomState(seed)
no_structure = rng.rand(n_samples, 2), None

# Anisotropicly distributed data
random_state = 170
X, y = datasets.make_blobs(n_samples=n_samples, random_state=random_state)
transformation = [[0.6, -0.6], [-0.4, 0.8]]
X_aniso = np.dot(X, transformation)
aniso = (X_aniso, y)

# blobs with varied variances
varied = datasets.make_blobs(
    n_samples=n_samples, cluster_std=[1.0, 2.5, 0.5], random_state=random_state
)

In [4]:
plt.figure(figsize=(9 * 2 + 3, 13))
plt.subplots_adjust(
    left=0.02, right=0.98, bottom=0.001, top=0.95, wspace=0.05, hspace=0.01
)

plot_num = 1

default_base = {
    "quantile": 0.3,
    "eps": 0.3,
    "damping": 0.9,
    "preference": -200,
    "n_neighbors": 3,
    "n_clusters": 3,
    "min_samples": 7,
    "xi": 0.05,
    "min_cluster_size": 0.1,
    "allow_single_cluster": True,
    "hdbscan_min_cluster_size": 15,
    "hdbscan_min_samples": 3,
    "random_state": 42,
}

datasets = [
    (
        noisy_circles,
        {
            "damping": 0.77,
            "preference": -240,
            "quantile": 0.2,
            "n_clusters": 2,
            "min_samples": 7,
            "xi": 0.08,
        },
    ),
    (
        noisy_moons,
        {
            "damping": 0.75,
            "preference": -220,
            "n_clusters": 2,
            "min_samples": 7,
            "xi": 0.1,
        },
    ),
    (
        varied,
        {
            "eps": 0.18,
            "n_neighbors": 2,
            "min_samples": 7,
            "xi": 0.01,
            "min_cluster_size": 0.2,
        },
    ),
    (
        aniso,
        {
            "eps": 0.15,
            "n_neighbors": 2,
            "min_samples": 7,
            "xi": 0.1,
            "min_cluster_size": 0.2,
        },
    ),
    (blobs, {"min_samples": 7, "xi": 0.1, "min_cluster_size": 0.2}),
    (no_structure, {}),
]

<Figure size 2100x1300 with 0 Axes>

In [22]:
#get k-nearest-neighbors graphs
dataset_names = ["noisy_circles", "noisy_moons", "varied", "aniso", "blobs", "no_structure"]
nearest_neighbors_graphs = {}

for i_dataset, (dataset, algo_params) in enumerate(datasets):
    # update parameters with dataset-specific values
    params = default_base.copy()
    params.update(algo_params)

    X, y = dataset

    # normalize dataset for easier parameter selection
    X = StandardScaler().fit_transform(X)

    # connectivity matrix for structured Ward
    connectivity = kneighbors_graph(
        X, n_neighbors=params["n_neighbors"], include_self=False
    )

    #update the nearest neighbors dictionary
    nearest_neighbors_graphs[dataset_names[i_dataset]] = connectivity.toarray()

    


In [34]:
nearest_neighbors_symm_count = {}
nearest_neighbors_total_count = {}
for i_dataset, (dataset, mat) in enumerate(nearest_neighbors_graphs.items()):
    symm_count = 0
    total_count = 0
    for i in range(len(mat)):
        for j in range(len(mat[0])):

            #count the total number of 1s in the nearest neighbors matrix
            if mat[i][j] != 0:
                total_count += 1

            #count the total number of spots in the matrix where there is a 1 in both the current spot and its opposite (transposed) spot
            if mat[i][j] != 0 and mat[j][i] != 0:
                symm_count += 1

    nearest_neighbors_symm_count[dataset_names[i_dataset]] = symm_count
    nearest_neighbors_total_count[dataset_names[i_dataset]] = total_count
print(nearest_neighbors_symm_count)
print(nearest_neighbors_total_count)
                

{'noisy_circles': 1074, 'noisy_moons': 1082, 'varied': 670, 'aniso': 672, 'blobs': 1064, 'no_structure': 1136}
{'noisy_circles': 1500, 'noisy_moons': 1500, 'varied': 1000, 'aniso': 1000, 'blobs': 1500, 'no_structure': 1500}
