In [1]:
import os

import numpy as np
import yaml
import zarr
from pydantic import TypeAdapter

from organelle_mapping.config import DataConfig
from organelle_mapping import utils


In [2]:
LABELS = "../label.yaml"
DATA = "../data_8nm_org+mem.yaml"
TGT_FILE = "organelle_content.txt"

In [3]:
labels = yaml.safe_load(open(LABELS))
data = TypeAdapter(DataConfig).validate_python(yaml.safe_load(open(DATA)))
tgt_file = open(TGT_FILE, "w")

In [4]:
atomic_labels = {k: v for k, v in labels.items() if len(v) == 1}
compound_labels = {k: v for k, v in labels.items() if len(v) > 1}

In [5]:
for dataset, datainfo in data.datasets.items():
    tgt_file.write(dataset + "\n")
    tgt_file.write("-" * 20 + "\n")
    for crop in datainfo.labels.crops:
        for c in crop.split(","):
            zarr_crop = zarr.open(os.path.join(datainfo.labels.data,
                                               datainfo.labels.group, c),
                                  mode="r")
            annotated_labels = zarr_crop.attrs["cellmap"]["annotation"][
                "class_names"]

            zg, _, _, _ = utils.find_target_scale(
                zarr_crop[annotated_labels[0]], {
                    "x": 8,
                    "y": 8,
                    "z": 8
                })
            size = int(np.prod(zarr_crop[annotated_labels[0]][zg].shape))
            tgt_file.write(f"{c}: {size:.2e}\n")
            composition = []
            for label in set(
                    atomic_labels.keys()).intersection(annotated_labels):
                label_zarr = zarr_crop[label]["s0"]
                counts = label_zarr.attrs["cellmap"]["annotation"][
                    "complement_counts"]
                if "present" in counts and counts["present"] > 0:
                    composition.append(
                        (label,
                         float(counts["present"] / np.prod(label_zarr.shape))))
            composition = sorted(composition, key=lambda x: x[1], reverse=True)
            if not np.isclose(sum(fr for _, fr in composition), 1.0):
                msg = f"Warning: sum of fractions is not 1.0 ({sum(fr for _, fr in composition):.2f})"
                for label in set(
                        compound_labels.keys()).intersection(annotated_labels):
                    label_zarr = zarr_crop[label]["s0"]
                    counts = label_zarr.attrs["cellmap"]["annotation"][
                        "complement_counts"]
                    if "present" in counts and counts["present"] > 0:
                        composition.append((label,
                                            float(counts["present"] /
                                                  np.prod(label_zarr.shape))))
            for label, fraction in composition:
                tgt_file.write(
                    f"\t{label}: {fraction*100:.2f}%\n".expandtabs(4))
    tgt_file.write("-" * 60 + "\n")
tgt_file.close()
