# Dendritic Spine Clustering

In [None]:
from spine_metrics import SpineMetricDataset
from notebook_widgets import SpineMeshDataset, intersection_ratios_mean_distance, create_dir
from spine_segmentation import apply_scale
from spine_fitter import SpineGrouping
from spine_clusterization import SpineClusterizer, DBSCANSpineClusterizer
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import silhouette_score
from typing import Optional
from scipy.spatial.distance import jensenshannon
import warnings
warnings.filterwarnings(action='ignore', category=FutureWarning)


dataset_path = "0.025 0.025 0.1 dataset"
scale = (1, 1, 1)
show_reduction_method = "tsne"
    
# load meshes and apply scale
spine_dataset = SpineMeshDataset().load(dataset_path)
spine_dataset.apply_scale(scale)

# load merged and reduced manual classification
manual_classification = SpineGrouping().load(f"{dataset_path}/manual_classification/manual_classification_merged_reduced.json")
manual_classification = manual_classification.get_spines_subset(spine_dataset.spine_names)

# load metrics
spine_metrics = SpineMetricDataset().load(f"{dataset_path}/metrics.csv")
spine_metrics = spine_metrics.get_spines_subset(manual_classification.samples)

# extract metric subsets
classic = spine_metrics.get_metrics_subset(['OpenAngle', 'CVD', "JunctionArea", 'AverageDistance', 'Length', 'Area', 'Volume', 'ConvexHullVolume', 'ConvexHullRatio', "LengthVolumeRatio", "LengthAreaRatio"])
chord = spine_metrics.get_metrics_subset(['OldChordDistribution'])

# set score function to mean distance between class over cluster distributions
score_func = lambda clusterizer: intersection_ratios_mean_distance(manual_classification, clusterizer.grouping, False)


# prepare folders for export
create_dir(f"{dataset_path}/clustering")
classic_save_path = f"{dataset_path}/clustering/classic"
create_dir(classic_save_path)
chord_save_path = f"{dataset_path}/clustering/chord/euclidean"
create_dir(f"{dataset_path}/clustering/chord")
create_dir(f"{dataset_path}/clustering/chord/euclidean")
chord_js_save_path = f"{dataset_path}/clustering/chord/jensen-shannon"
create_dir(f"{dataset_path}/clustering/chord/jensen-shannon")

In [None]:
# elbow method
def kmeans_elbow_score(clusterizer: SpineClusterizer) -> float:
    # sum of mean distances to cluster center
    output = 0
    for group in clusterizer.grouping.groups.values():
        center = sum(clusterizer.fit_metrics.row_as_array(spine_name) for spine_name in group)
        output += sum(np.inner(center - clusterizer.fit_metrics.row_as_array(spine_name),
                               center - clusterizer.fit_metrics.row_as_array(spine_name)) for spine_name in group)
    return output


def dbscan_elbow_score(clusterizer: DBSCANSpineClusterizer) -> float:
    # number of points with not enough neighbours close enough to form a cluster
    neigh = NearestNeighbors(n_neighbors=clusterizer.min_samples, metric=clusterizer.metric)
    data = clusterizer.fit_metrics.as_array()
    nbrs = neigh.fit(data)
    distances, indices = nbrs.kneighbors(data)
    # get distances to closest k-th neighbour
    distances = distances[:, -1]
    # sort distances in descending order
    distances = -np.sort(-distances, axis=0)
    for i in range(len(distances)):
        if clusterizer.eps > distances[i]:
            return i
    return len(distances)

def silhouette(clusterizer: SpineClusterizer, metric: Optional[callable] = None) -> float:
    datas = []
    labels = []
    for i, group in enumerate(clusterizer.grouping.groups.values()):
        datas.extend(clusterizer.fit_metrics.row_as_array(spine) for spine in group)
        labels.extend([i for _ in group])
    
    labels = np.array(labels)
    if metric is None:
        score = silhouette_score(datas, labels, metric=clusterizer.metric)
    else:
        score = silhouette_score(np.array([[metric(x1, x2) for x1 in datas] for x2 in datas]), labels, metric="precomputed")
    return score

def js_distance(x, y) -> float:
    return np.sqrt(jensenshannon(x, y))

## k-Means Classic Metrics

In [None]:
from notebook_widgets import k_means_clustering_experiment_widget

# score_func = lambda clusterizer: intersection_ratios_mean_distance(manual_classification, clusterizer.grouping, False)
#score_func = silhouette
score_func = kmeans_elbow_score

dim_reduction = ""

display(k_means_clustering_experiment_widget(classic, spine_metrics, spine_dataset, score_func,
                                             max_num_of_clusters=100, classification=manual_classification,
                                             save_folder=classic_save_path, dim_reduction=dim_reduction, show_method=show_reduction_method))

## k-Means Chord Histograms

In [None]:
from notebook_widgets import k_means_clustering_experiment_widget

# score_func = lambda clusterizer: intersection_ratios_mean_distance(manual_classification, clusterizer.grouping, False)
score_func = kmeans_elbow_score

display(k_means_clustering_experiment_widget(chord, spine_metrics, spine_dataset, score_func,
                                             max_num_of_clusters=100, classification=manual_classification,
                                             save_folder=chord_save_path, dim_reduction="", show_method=show_reduction_method))

## DBSCAN Classic Metrics

In [None]:
from notebook_widgets import dbscan_clustering_experiment_widget

min_eps = 0.2
max_eps = 6
eps_step = 0.1

# score_func = lambda clusterizer: intersection_ratios_mean_distance(manual_classification, clusterizer.grouping, False)
score_func = dbscan_elbow_score

display(dbscan_clustering_experiment_widget(classic, spine_metrics, spine_dataset, score_func,
                                            min_eps=min_eps, max_eps=max_eps, eps_step=eps_step, dim_reduction="pca", show_method=show_reduction_method,
                                            classification=manual_classification, save_folder=classic_save_path))

## DBSCAN Chord Histograms Euclidean Distance

In [None]:
from notebook_widgets import dbscan_clustering_experiment_widget

min_eps = 0.1
max_eps = 10
eps_step = 0.1

# score_func = lambda clusterizer: intersection_ratios_mean_distance(manual_classification, clusterizer.grouping, False)
score_func = dbscan_elbow_score

display(dbscan_clustering_experiment_widget(chord, spine_metrics, spine_dataset, score_func,
                                            min_eps=min_eps, max_eps=max_eps, eps_step=eps_step, dim_reduction="", show_method=show_reduction_method,
                                            classification=manual_classification, save_folder=chord_save_path))

## DBSCAN Chord Histograms Jensen — Shannon Distance

In [None]:
from notebook_widgets import dbscan_clustering_experiment_widget
from scipy.spatial.distance import jensenshannon
import numpy as np

min_eps = 0.1
max_eps = 1
eps_step = 0.01
use_pca = False

# score_func = lambda clusterizer: intersection_ratios_mean_distance(manual_classification, clusterizer.grouping, False)
score_func = dbscan_elbow_score

def js_distance(x, y) -> float:
    return np.sqrt(jensenshannon(x, y))

display(dbscan_clustering_experiment_widget(chord, spine_metrics, spine_dataset, score_func, metric=js_distance,
                                            min_eps=min_eps, max_eps=max_eps, eps_step=eps_step, use_pca=use_pca,
                                            classification=manual_classification, save_folder=chord_js_save_path))

## View clustering

In [None]:
from notebook_widgets import inspect_saved_groupings_widget

display(inspect_saved_groupings_widget(f"{dataset_path}/clustering", spine_dataset, spine_metrics,
                                       chord, classic, manual_classification))