# Display morphology population features

Copyright (c) 2025 Open Brain Institute

Authors: Michael W. Reimann

last modified: 01.2025

## Summary
This notebook analyzes a list of neuron morphology files. It extracts neurite or morphology features from them and displays the distribution of their values as a histogram. 
For details, see the [README](README.md).

## Imports and setting up platform authentication

Please follow the displayed instructions to authenticate.

In [None]:
from obi_auth import get_token
from entitysdk.client import Client
from entitysdk.models import ReconstructionMorphology
import pandas as pd

import neurom
import os

from ipywidgets import widgets
from neurom.check.runner import CheckRunner
from matplotlib import pyplot as plt

token = get_token(environment="staging", auth_mode="daf")
client = Client(environment="staging", token_manager=token)

## Define functions for accessing the data
Here, we define helper functions for downloading selected assets from the platform.

In [None]:
def dataframe_from_entity_ids(entity_ids):
    def pretty(m):
        def pretty_value(v):
            if isinstance(v, dict):
                if 'name' in v:
                    return v['name']
            return v
        ret = { p:pretty_value(v) for p,v in m.model_dump().items()}
        ret["entity_type"] = type(m)
        return ret

    entities = pd.DataFrame([
        pretty(client.get_entity(entity_id=entity_id,\
                                entity_type=ReconstructionMorphology))
        for entity_id in entity_ids
    ])
    return entities

def get_matching_asset_types(df_in, match="all"):
    assert len(df_in) >= 1, "At least one entity required!"
    per_entity_all_types = df_in["assets"].apply(lambda _x: {__x["content_type"] for __x in _x})
    extensions = per_entity_all_types.iloc[0]
    for row in per_entity_all_types.iloc[1:]:
        if match == "all":
            extensions = extensions.intersection(row)
        elif match == "any":
            extensions.update(row)
        else:
            raise ValueError(f"Unknown value for {match}")
    if "application/h5" in extensions:
        extensions.remove("application/h5")
    return list(extensions)

def filter_to_asset_type(df_in, target_type):
    def per_row(row):
        matches = [
            _asset for _asset in row["assets"]
            if _asset["content_type"] == target_type
            ]
        if len(matches) == 0:
            return pd.Series({"has_asset": False,
                              "load_asset_id": -1,
                              "load_asset_extension": os.path.split(target_type)[1]})
        return pd.Series({"has_asset": True,
                              "load_asset_id": matches[0]["id"],
                              "load_asset_extension": os.path.split(target_type)[1]})
    df_in = pd.concat([df_in, df_in.apply(per_row, axis=1)], axis=1)
    return df_in.loc[df_in["has_asset"]]

def load_morphology_assets_from_df(df_in, client):
    def per_row(row):
        m = neurom.load_morphology(
            client.download_content(
                entity_id=row["id"],
                entity_type=row["entity_type"],
                asset_id=row["load_asset_id"]
            ).decode("UTF-8"),
            reader=row["load_asset_extension"]
        )
        return m
    df_in["_morphology"] = df_in.apply(per_row, axis=1)
    return df_in

def to_pref_label(row):
    out = row.copy()
    for idx in out.index:
        entry = out[idx]
        if isinstance(entry, list):
            if len(entry) > 0:
                entry = entry[0]
        if isinstance(entry, dict):
            out[idx] = entry.get("pref_label", "None")
        elif not idx.startswith("_"):
            out[idx] = str(out[idx])
    return out


## Select entities to analyze

Here, we define which morphology entities to load and analyze.

Please fill in the unique IDs of the selected entities into the empty list below!

In [None]:
entity_ids = [
    # FILL IN THE IDs OF THE SELECTED MORPHOLOGY ENTITIES HERE!
]
if len(entity_ids) == 0: # For demonstration purposes: Take 10 random ones:
    for _item in client.search_entity(entity_type=ReconstructionMorphology):
        entity_ids.append(_item.id)
        if len(entity_ids) >= 10: break

entities = dataframe_from_entity_ids(entity_ids)

Next, we access the data and load it into the DataFrame

In [None]:
contents = load_morphology_assets_from_df(filter_to_asset_type(entities, 'application/asc'), client)
contents = contents.apply(to_pref_label, axis=1)

display(contents.head())

Helper functions for plotting

In [None]:
import pandas, numpy

def lookup(tp_feature, str_feature):
    def inner_func(obj, row, **kwargs):
        vals = neurom.features.get(str_feature, obj)
        if not hasattr(vals, "__iter__"):
                vals = [vals]
        ret = pandas.DataFrame({"value": vals})
        for idx in row.index:
            if not idx.startswith("_"): ret[idx] = row[idx]
        for k, v in kwargs.items(): ret[k] = v
        return ret
    
    def func(row):
        m = row["_morphology"]
        if tp_feature == 1:
            return inner_func(m, row)
        out = []
        for _nrt in m.neurites:
            out.append(inner_func(_nrt, row, neurite_type=str(_nrt.type)))
        return pandas.concat(out, axis=0)
    return func

def histogram_series(data, **kwargs):
    vals, bin_c = numpy.histogram(data, **kwargs)
    return pandas.Series(vals, index=0.5 * (bin_c[:-1] + bin_c[1:]))

Select the morphology feature to display from the dropdown menu.

In [None]:
function = widgets.Dropdown(
    options=
    [(k, (0, k)) for k, v in neurom.features._NEURITE_FEATURES.items()] +
    [(k, (1, k)) for k, v in neurom.features._MORPHOLOGY_FEATURES.items()], 
    description='Feature')
display(function)

Select from the dropdown menus by which parameter to group the data (or "None" for no grouping).

Select with the slider the number of bins to show.

In [None]:
data_df = pandas.concat(contents.apply(lookup(*function.value), axis=1).values, axis=0)
data_df["None"] = "None"
grouper = widgets.Dropdown(
    options=[_col for _col in data_df.columns if _col != "value"],
    description="Grouped by"
)
nbins = widgets.IntSlider(10, 5, 100, 1)
display(grouper)
display(nbins)

In [None]:
span = data_df["value"].max() - data_df["value"].min()
bins = numpy.linspace(data_df["value"].min(), data_df["value"].max() + 1E-9 * span, nbins.value + 1)
w = numpy.mean(numpy.diff(bins)) * 0.8
hist_df = data_df.groupby(grouper.value)["value"].apply(histogram_series, bins=bins).unstack(grouper.value)

fig = plt.figure(figsize=(6, 3))
ax = fig.gca()
ax.set_frame_on(False)

bot = numpy.zeros(len(hist_df))
for col in hist_df.columns:
    ax.bar(hist_df.index, hist_df[col].values, bottom=bot, label=col, width=w)
    bot += hist_df[col].values
ax.set_xlabel(function.value[1])
ax.set_ylabel("Count")
if len(hist_df.columns) < 10:
    plt.legend()