In [1]:
from pathlib import Path
from typing import Sequence, Any
from fibsem_tools import read
import matplotlib.pyplot as plt

In [2]:

def get_nested_attr(attrs, key: str | Sequence[str | int]) -> Any:
    key_list: Sequence[str | int]
    if isinstance(key, str):
        key_list = key.split("/")
    else:
        key_list = key
    if len(key_list) == 1:
        return attrs[key_list[0]]
    else:
        return get_nested_attr(attrs[key_list[0]], key_list[1:])

def get_present_count(size, counts, possibilities) -> int:
    possibilities.remove("present")
    not_present_sum = 0
    for possibility in possibilities:
        if possibility in counts:
            not_present_sum += counts[possibility]
    return size - not_present_sum

In [3]:
#datadir = Path("/groups/cellmap/cellmap/data")
datadir = Path("/nrs/cellmap/data")
gt_container = "staging/groundtruth.zarr"
#fly_data = ["jrc_mb-1a", "jrc_fly-vnc-1"]
fly_data = {"jrc_mb-1a": "recon-1/em/fibsem-uint8",
                "jrc_fly-vnc-1": "recon-1/em/fibsem-int16"}

In [4]:
crops = {}
for fly_ds, raw_ds  in fly_data.items():
    assert Path.exists(datadir / fly_ds)
    gt_path = datadir/fly_ds/gt_container
    raw_path = datadir / fly_ds / f"{fly_ds}.zarr" / raw_ds
    crops[fly_ds] =  [crop.name for crop in gt_path.iterdir() if crop.name.startswith("crop")]



In [5]:
annotated_classes = {}
annotated_classes["total"] = set()
for fly_ds, crop_list in crops.items():
    annotated_classes[fly_ds] = set()
    for crop in crop_list:
        crop_arr = read(datadir / fly_ds / gt_container / crop )
        annotated_classes[crop] = set(get_nested_attr(crop_arr.attrs, ["cellmap", "annotation", "class_names"]))
        annotated_classes[fly_ds] |= annotated_classes[crop]
    annotated_classes["total"] |= annotated_classes[fly_ds]
        


In [6]:
present_counts = {}
present_counts["total"] = {k: 0 for k in annotated_classes["total"]}
for fly_ds, crop_list in crops.items():
    present_counts[fly_ds] = {k: 0 for k in annotated_classes[fly_ds]}
    for crop in crop_list:
        present_counts[crop] = {k: 0 for k in annotated_classes[crop]}
        crop_arr = read(datadir / fly_ds / gt_container / crop )
        for lbl in annotated_classes[crop]:
            counts = get_nested_attr(crop_arr[lbl]["s0"].attrs, ["cellmap", "annotation", "complement_counts"])
            possibilities = set(get_nested_attr(crop_arr[lbl]["s0"].attrs, ["cellmap", "annotation", "annotation_type", "encoding"]).keys())
            present = get_present_count(crop_arr[lbl]["s0"].size, counts, possibilities)
            present_counts[crop][lbl] += present
            present_counts[fly_ds][lbl] += present
            present_counts["total"][lbl] += present

In [7]:
present_counts_nozeros_and_cyto = {}
for k, v in present_counts.items():
    present_counts_nozeros_and_cyto[k] = dict()
    for lbl in v:
        if v[lbl] > 0 and lbl != "cyto":
            present_counts_nozeros_and_cyto[k][lbl] = v[lbl]

In [82]:
def plot_hist(units):
    plt.figure(figsize=(20,6))
    for unit in units:
        lbls = list(present_counts_nozeros_and_cyto[unit].keys())
        lblcounts = [present_counts_nozeros_and_cyto[unit][lbl] for lbl in lbls]
        bars = plt.bar(lbls, lblcounts,width=0.9, align='center')
    if len(units) == 1:
        for bar, value in zip(bars, lblcounts):
            plt.text(bar.get_x() + bar.get_width() / 2, bar.get_height()+0.01 * max(lblcounts), f'{value:,}', ha='center', color='black', fontsize=8, fontweight='bold')
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()


In [84]:
plot_hist(["total"])