In [None]:
from pathlib import Path
import pandas as pd
import numpy as np
import seaborn as sns
import seaborn.objects as so
import matplotlib.pyplot as plt

import umap
import hdbscan

from graph_neuralmorpho.morphometrics.morphopy import MorphopyFeatures

In [None]:
data_path = Path("/Users/kyle/Library/CloudStorage/Box-Box/FoxLab/data-microscopy/neuromorpho-benchmark")
morphopy_features_path = data_path / "processed/morphopy_features"

morphopy_features = MorphopyFeatures(morphopy_features_path)
radius_measure_masks = morphopy_features.data.columns.str.contains(r"thickness|volume|surface")
cols = morphopy_features.data.columns[~radius_measure_masks]
features = morphopy_features.data.copy()[cols]
features = features.dropna(axis=1)
neuron_name = morphopy_features.neurons
target = morphopy_features.target

### Dimensionality reduction

In [None]:
reducer = umap.UMAP(random_state=42, n_neighbors=10, min_dist=0.01)
reducer.fit(features.dropna(axis=1))
umap_embeddings = reducer.embedding_

In [None]:
df_umap_embeddings = pd.DataFrame(umap_embeddings, columns=["UMAP1", "UMAP2"])
df_umap_embeddings = df_umap_embeddings.assign(target=target)

fig, axs = plt.subplots(ncols=2, figsize=(20, 10))
sns.scatterplot(
    data=df_umap_embeddings,
    x="UMAP1",
    y="UMAP2",
    s=12,
    alpha=0.25,
    ax=axs[0],
)

sns.scatterplot(
    data=df_umap_embeddings,
    x="UMAP1",
    y="UMAP2",
    s=12,
    alpha=0.25,
    hue="target",
    palette="tab10",
    ax=axs[1],
)

axs[1].legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.0)

axs[0].set_title("MorphoPy Features")
axs[1].set_title("MorphoPy Features w/ Class Labels")

for ax in axs:
    ax.tick_params(axis="both", which="both", labelsize=18)
    ax.set_xlabel("UMAP1", fontsize=18)
    ax.set_ylabel("UMAP2", fontsize=18)
    ax.set_title(ax.get_title(), fontsize=24)

## Clustering

In [None]:
# clusterable_embedding = umap.UMAP(
#     n_neighbors=30, min_dist=0.0, n_components=2, random_state=42
# ).fit_transform(features.dropna(axis=1))

hdb_cluster = hdbscan.HDBSCAN(min_cluster_size=60, min_samples=100).fit(umap_embeddings)
cluster_labels = hdb_cluster.labels_

In [None]:
clustered = cluster_labels >= 0

fig, ax = plt.subplots(figsize=(10, 10))

sns.scatterplot(
    data=df_umap_embeddings[~clustered],
    x="UMAP1",
    y="UMAP2",
    ax=ax,
    s=8,
    alpha=0.8,
    color="lightgrey",
    linewidth=0,
)

sns.scatterplot(
    data=df_umap_embeddings[clustered],
    x="UMAP1",
    y="UMAP2",
    ax=ax,
    s=8,
    alpha=0.25,
    hue=cluster_labels[clustered],
    linewidth=0,
    palette="tab20",
)