In [41]:
# ruff: noqa
import argparse
import logging
import os
from os.path import join
import sys
import warnings

from spatialdata import read_zarr

sys.path.insert(1, "/dss/dsshome1/0C/ra98gaq/Git/cellseg-benchmark")

from cellseg_benchmark import sdata_utils as su

In [42]:
#warnings.filterwarnings("ignore")

In [43]:
logger = logging.getLogger("shape_mapping")
logger.setLevel(logging.INFO)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s]: %(message)s"))
logger.addHandler(handler)

In [44]:
def get_args(test_args=None):  # noqa: D103
    p = argparse.ArgumentParser(
    description="Creates a master sdata for a given sample, containing multiple segmentation results."
    )
    p.add_argument("sample", help="Sample name.")
    p.add_argument(
        "data_path",
        help="Path to folder with merscope output data (e.g. /cohort1/slide2/region0).",
    )
    p.add_argument(
        "zmode", choices=["z3"], help="Mode of master sdata. Either 'z3' or '3d' (currently only z3 is implemented)."
    )
    p.add_argument("data_dir", help="Output data folder.")
    p.add_argument(
        "--n_ficture",
        default=21,
        type=int,
        help="Consider Ficture model with n_ficture factors.",
    )
    p.add_argument("--run_date", type=str, help="run date (YYYYMMDD).", default=None)
    p.add_argument("--organism", type=str, help="organism.", default=None)
    p.add_argument("--slide", type=str, help="slide.", default=None)
    p.add_argument("--region", type=str, help="region.", default=None)
    p.add_argument("--cohort", type=str, help="cohort.", default=None)
    p.add_argument("--obs", action="append", default=[], metavar="KEY=VAL",
        help="Extra covariates to add to adata.obs (repeatable), e.g. --obs tissue=brain.")
    if test_args is not None:
        return p.parse_args(test_args)
    else:
        return p.parse_args()

In [13]:
# Simulate CLI arguments inside Jupyter
args = get_args(
    [
        "SynergyLung_s1_r0",
        "/dss/dssfs03/pn52re/pn52re-dss-0000/Synergy-projects-temp/MouseLung-Sijia/output-20250704_AGLiesz-Sijia-MouseLung-Slide1-Stroke_VMSC03901/region_R1-Stroke",
        "z3",
        "/dss/dssfs03/pn52re/pn52re-dss-0001/cellseg-benchmark",
        "--cohort",
        "SynergyLung",
        "--slide",
        "1",
        "--region",
        "0",
        "--organism",
        "mouse",
        "--run_date",
        "20250704",
        "--obs",
        "condition=stroke",
    ]
)

In [45]:
# Simulate CLI arguments inside Jupyter
args = get_args(
    [
        "SynergyLung_s2_r0",
        "/dss/dssfs03/pn52re/pn52re-dss-0000/Synergy-projects-temp/MouseLung-Sijia/output-20250718_AGLiesz-Sijia-MouseLung-Slide2-Control_VMSC03901/region_R1-Control",
        "z3",
        "/dss/dssfs03/pn52re/pn52re-dss-0001/cellseg-benchmark",
        "--cohort",
        "SynergyLung",
        "--slide",
        "2",
        "--region",
        "0",
        "--organism",
        "mouse",
        "--run_date",
        "20250718",
        "--obs",
        "condition=control",
    ]
)

In [46]:
args

Namespace(sample='SynergyLung_s2_r0', data_path='/dss/dssfs03/pn52re/pn52re-dss-0000/Synergy-projects-temp/MouseLung-Sijia/output-20250718_AGLiesz-Sijia-MouseLung-Slide2-Control_VMSC03901/region_R1-Control', zmode='z3', data_dir='/dss/dssfs03/pn52re/pn52re-dss-0001/cellseg-benchmark', n_ficture=21, run_date='20250718', organism='mouse', slide='2', region='0', cohort='SynergyLung', obs=['condition=control'])

In [47]:
NONES = {"", "None", "none", "null", "NULL", None}
for k in ["organism", "slide", "region", "cohort"]:
    if getattr(args, k) in NONES:
        setattr(args, k, None)

In [48]:
extra_obs = {}
for kv in args.obs:
    k, v = kv.split("=", 1)
    extra_obs[k] = None if v in NONES else v

In [49]:
extra_obs

{'condition': 'control'}

In [51]:
logger.info("Importing images and points...")
su.process_merscope(args.sample, args.data_dir, args.data_path, zmode=args.zmode)

2025-10-20 20:18:23,523 [INFO]: Importing images and points...
2025-10-20 20:18:23,523 [INFO]: Importing images and points...


[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/dss/dssfs03/pn52re/pn52re-dss-0001/cellseg-benchmark/samples/SynergyLung_s2_r0/[0m[95msdata_z3.zarr[0m             


version mismatch: detected: RasterFormatV02, requested: FormatV04
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)


In [52]:
sdata_path = join(args.data_dir, "samples", args.sample)
sdata_main = read_zarr(join(sdata_path, "sdata_z3.zarr"))

version mismatch: detected: RasterFormatV02, requested: FormatV04
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)


In [53]:
logger.info("Integrating segmentation data from available methods into main sdata...")
# only work on methods with actual data available
seg_methods = [
    method
    for method in os.listdir(join(sdata_path, "results"))
    if os.path.isdir(join(sdata_path, "results", method, "sdata.zarr"))
]

2025-10-20 20:34:00,367 [INFO]: Integrating segmentation data from available methods into main sdata...
2025-10-20 20:34:00,367 [INFO]: Integrating segmentation data from available methods into main sdata...


In [54]:
seg_methods

['Cellpose_1_Merlin']

In [55]:
su.integrate_segmentation_data(
    sdata_path,
    seg_methods,
    sdata_main,
    run_date=args.run_date,
    organism=args.organism,
    slide=args.slide,
    region=args.region,
    cohort=args.cohort,
    write_to_disk=True,
    data_path=args.data_path,
    logger=logger,
    **extra_obs,
)

  0%|          | 0/1 [00:00<?, ?it/s]2025-10-20 20:34:00,395 [INFO]: Adding Cellpose_1_Merlin...
2025-10-20 20:34:00,395 [INFO]: Adding Cellpose_1_Merlin...
2025-10-20 20:34:08,300 [INFO]: Adding shapes of Cellpose_1_Merlin...
2025-10-20 20:34:08,300 [INFO]: Adding shapes of Cellpose_1_Merlin...
2025-10-20 20:34:09,350 [INFO]: Adding adata for Cellpose_1_Merlin...
2025-10-20 20:34:09,350 [INFO]: Adding adata for Cellpose_1_Merlin...
  self._check_key(key, self.keys(), self._shared_keys)
2025-10-20 20:34:14,615 [INFO]: calculate volume metrics Cellpose_1_Merlin
2025-10-20 20:34:14,615 [INFO]: calculate volume metrics Cellpose_1_Merlin
  return module_get_attr_redirect(attr_name, deprecated_mapping=_DEPRECATED)
  return module_get_attr_redirect(attr_name, deprecated_mapping=_DEPRECATED)
  return module_get_attr_redirect(attr_name, deprecated_mapping=_DEPRECATED)
  return module_get_attr_redirect(attr_name, deprecated_mapping=_DEPRECATED)
  return module_get_attr_redirect(attr_name, depre

SpatialData object, with associated Zarr store: /dss/dssfs03/pn52re/pn52re-dss-0001/cellseg-benchmark/samples/SynergyLung_s2_r0/sdata_z3.zarr
├── Images
│     └── 'SynergyLung_s2_r0_z3': DataTree[cyx] (8, 78385, 119545), (8, 39192, 59772), (8, 19596, 29886), (8, 9798, 14943), (8, 4899, 7471)
├── Points
│     └── 'SynergyLung_s2_r0_transcripts': DataFrame with shape: (<Delayed>, 10) (2D points)
├── Shapes
│     └── 'boundaries_Cellpose_1_Merlin': GeoDataFrame shape: (172011, 9) (2D shapes)
└── Tables
      └── 'adata_Cellpose_1_Merlin': AnnData (172011, 451)
with coordinate systems:
    ▸ 'global', with elements:
        SynergyLung_s2_r0_z3 (Images), SynergyLung_s2_r0_transcripts (Points), boundaries_Cellpose_1_Merlin (Shapes)
    ▸ 'micron', with elements:
        SynergyLung_s2_r0_z3 (Images), SynergyLung_s2_r0_transcripts (Points), boundaries_Cellpose_1_Merlin (Shapes)
    ▸ 'pixel', with elements:
        SynergyLung_s2_r0_z3 (Images), SynergyLung_s2_r0_transcripts (Points), bounda

In [None]:
#################

In [None]:
# test sdata

In [56]:
sdata_main.tables["adata_Cellpose_1_Merlin"].obs.columns

Index(['fov', 'volume', 'center_x', 'center_y', 'min_x', 'min_y', 'max_x',
       'max_y', 'anisotropy', 'transcript_count', 'perimeter_area_ratio',
       'Txnip_raw', 'Txnip_high_pass', 'Fth1_raw', 'Fth1_high_pass',
       'DAPI_raw', 'DAPI_high_pass', 'Scgb1a1_raw', 'Scgb1a1_high_pass',
       'Col1_raw', 'Col1_high_pass', 'Sftpc_raw', 'Sftpc_high_pass',
       'Ifitm3_raw', 'Ifitm3_high_pass', 'PolyT_raw', 'PolyT_high_pass',
       'region', 'slide', 'dataset_id', 'cells_region', 'cell_id',
       'spt_region', 'area', 'volume_sum', 'volume_final', 'num_z_planes',
       'size_normalized', 'surface_to_volume_ratio', 'sphericity', 'solidity',
       'elongation', 'condition', 'run_date', 'organism', 'cohort', 'sample'],
      dtype='object')

In [57]:
sdata_main.tables["adata_Cellpose_1_Merlin"].obs

Unnamed: 0_level_0,fov,volume,center_x,center_y,min_x,min_y,max_x,max_y,anisotropy,transcript_count,...,size_normalized,surface_to_volume_ratio,sphericity,solidity,elongation,condition,run_date,organism,cohort,sample
EntityID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3032071400013100126,840,2059.148908,7216.183057,479.060704,7207.846355,470.933895,7224.305598,486.959026,1.031700,3,...,14.003907,0.256930,0.970690,0.994144,0.032101,control,20250718,mouse,SynergyLung,SynergyLung_s2_r0
3032071400014100180,918,2139.125086,7504.486833,498.349751,7496.258106,489.906420,7512.488711,506.379802,1.009113,3,...,14.273269,0.250883,0.979986,1.000000,0.070396,control,20250718,mouse,SynergyLung,SynergyLung_s2_r0
3032071400016100184,1155,1993.494412,8556.587687,298.372080,8549.290734,290.198104,8565.302442,306.362748,1.135827,569,...,13.778846,0.273867,0.882477,0.990295,0.042211,control,20250718,mouse,SynergyLung,SynergyLung_s2_r0
3032071400016100195,1155,1507.780768,8632.307395,307.804110,8625.148066,300.586582,8638.940023,314.807612,1.084933,1,...,11.983245,0.309654,0.912657,0.983984,0.109678,control,20250718,mouse,SynergyLung,SynergyLung_s2_r0
3032071400016100200,1155,1870.495178,8620.070741,312.002451,8611.488222,304.131019,8628.272423,319.113140,1.213755,124,...,13.347000,0.277245,0.917727,0.980912,0.163323,control,20250718,mouse,SynergyLung,SynergyLung_s2_r0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3032071400416100177,1116,2344.589468,8437.787779,8130.221066,8428.794190,8121.712069,8446.369621,8138.413806,1.076666,3,...,14.943033,0.243381,0.950079,0.987811,0.033608,control,20250718,mouse,SynergyLung,SynergyLung_s2_r0
3032071400416100183,1116,1980.095639,8309.034979,8145.903951,8301.228004,8137.594659,8316.478445,8154.072963,1.057263,5,...,13.732463,0.262205,0.969239,0.995752,0.046148,control,20250718,mouse,SynergyLung,SynergyLung_s2_r0
3032071400416100184,1116,1502.556981,8424.599640,8144.706371,8418.229115,8137.784821,8431.230806,8151.444653,1.068816,3,...,11.962469,0.303448,0.953675,0.992073,0.044607,control,20250718,mouse,SynergyLung,SynergyLung_s2_r0
3032071400416100188,1116,2648.907869,8360.541735,8154.771498,8351.756597,8143.784879,8369.405725,8164.687340,1.146514,4,...,15.883228,0.236730,0.888841,0.974470,0.026813,control,20250718,mouse,SynergyLung,SynergyLung_s2_r0
