1. Load MERFISH output and export as spatialdata object.  
Two approaches: sdata containing transcript table plus
    1. images from z=3 plane
    2. images from all z planes

2. Take boundaries and adata files from each segmentation approach and add to main sdata

In [1]:
import os
import json
import pandas as pd
import spatialdata as sd
import spatialdata_io



In [2]:
data_dir = os.path.abspath("../home/data")
!ls -l $data_dir

lrwxrwxrwx 1 ra98gaq pn52ra 35 Feb  4 15:06 /home/data -> /dss/dssfs03/pn52re/pn52re-dss-0001


In [3]:
archive_dir = os.path.abspath("../home/archive")
!ls -l $archive_dir

lrwxrwxrwx 1 ra98gaq pn52ra 35 Feb  4 17:22 /home/archive -> /dss/dssfs03/pn52re/pn52re-dss-0000


In [4]:
with open(os.path.join(data_dir, "cellseg-benchmark", "sample_paths.json")) as file:
    sample_paths = json.load(file)

# Create sdatas from Merscope output

In [5]:
import os
import spatialdata_io

def process_merscope(sample_name, data_dir, sample_paths, zmode):
    """Load and save a MERSCOPE sample as sdata with specified z_layers configuration."""
    if zmode not in {"z3", "3d"}:
        raise ValueError(f"Invalid zmode: {zmode}")
    sdata_file = os.path.join(data_dir, "cellseg-benchmark", "samples", sample_name, f"sdata_{zmode}.zarr")
    if os.path.exists(sdata_file):
        print(f"Skipping {sample_name}: {zmode} file already exists")
        return   
    sdata = spatialdata_io.merscope(
        sample_paths[sample_name],
        z_layers = 3 if zmode == "z3" else range(7),
        backend=None,
        cells_boundaries=False,
        cells_table=False,
        mosaic_images=True,
        transcripts=True,
        slide_name="_".join(sample_name.split("_")[:2]),
        region_name=sample_name.split("_")[2],
    )
    os.makedirs(os.path.dirname(sdata_file), exist_ok=True)
    sdata.write(sdata_file, overwrite=False)

In [6]:
# process foxf2_s2_r1
sample_name = "foxf2_s2_r1"
for zmode in ['z3', '3d']:
    process_merscope(sample_name, data_dir, sample_paths, zmode)

[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s2_r1/[0m[95msdata_z3.zarr[0m                                            
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s2_r1/[0m[95msdata_3d.zarr[0m                                            


In [7]:
# process all samples in z3
zmodes = ['z3']
for sample_name in sample_paths.keys():
    for zmode in zmodes:
        process_merscope(sample_name, data_dir, sample_paths, zmode)

[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s1_r0/[0m[95msdata_z3.zarr[0m                                            
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s1_r1/[0m[95msdata_z3.zarr[0m                                            
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s2_r0/[0m[95msdata_z3.zarr[0m                                            
Skipping foxf2_s2_r1: z3 file already exists
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s2_r2/[0m[95msdata_z3.zarr[0m                                            
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s3_r0/[0m[95msdata_z3.zarr[0m                                            
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s3_r1/[0m[95msdata_z3.zarr[0m                                            
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s4_r0/[0m[95msdata_z3.zarr[0m                                            
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s4_r1/[0m[95msdata_z3.zarr[0m                                            
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s5_r0/[0m[95msdata_z3.zarr[0m                                            
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s5_r1/[0m[95msdata_z3.zarr[0m                                            
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s6_r0/[0m[95msdata_z3.zarr[0m                                            
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s6_r1/[0m[95msdata_z3.zarr[0m                                            
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s6_r2/[0m[95msdata_z3.zarr[0m                                            
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s7_r0/[0m[95msdata_z3.zarr[0m                                            
[34mINFO    [0m The column [32m"global_x"[0m has now been renamed to [32m"x"[0m; the column [32m"x"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      
[34mINFO    [0m The column [32m"global_y"[0m has now been renamed to [32m"y"[0m; the column [32m"y"[0m was already present in the dataframe,   
         and will be dropped.                                                                                      


  return method.__get__(obj, cls)(*args, **kwargs)


[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         [35m/home/data/cellseg-benchmark/samples/foxf2_s7_r1/[0m[95msdata_z3.zarr[0m                                            


# Add data from segmentation outputs: shapes and adatas

In [5]:
sample_name = "foxf2_s2_r1"

In [6]:
# load main sdata
sdata_path = os.path.join(data_dir, "cellseg-benchmark", "samples", sample_name, "sdata_z3.zarr")
if 'sdata_main' not in locals():
    sdata_main = sd.read_zarr(sdata_path)

  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)
  compressor, fill_value = _kwargs_compat(compressor, fill_value, kwargs)


In [7]:
sdata_main

SpatialData object, with associated Zarr store: /dss/dssfs03/pn52re/pn52re-dss-0001/cellseg-benchmark/samples/foxf2_s2_r1/sdata_z3.zarr
├── Images
│     └── 'foxf2_s2_r1_z3': DataTree[cyx] (7, 54456, 65675), (7, 27228, 32837), (7, 13614, 16418), (7, 6807, 8209), (7, 3403, 4104)
└── Points
      └── 'foxf2_s2_r1_transcripts': DataFrame with shape: (<Delayed>, 9) (2D points)
with coordinate systems:
    ▸ 'global', with elements:
        foxf2_s2_r1_z3 (Images), foxf2_s2_r1_transcripts (Points)

In [8]:
# default MERLIN output from Merscope (cyto2 on DAPI/PolyT)

In [None]:
import spatialdata as sd
import spatialdata_io

def process_merlin_segmentation(sample_name, sample_paths, sdata_main, write_to_disk=True):
    """
    Process Merlin-specific segmentation data and add it to the main spatial data object.
    
    Args:
        sample_name: Name of the sample
        sample_paths: Dictionary mapping sample names to their file paths
        sdata_main: Main spatial data object to update
        write_to_disk: Whether to write elements to disk immediately
    
    Returns:
        Updated sdata_main object
    """
    
    seg_method = "Cellpose_1_Merlin"
    
    # Load Merscope data
    if f"boundaries_{seg_method}" not in sdata_main or f"adata_{seg_method}" not in sdata_main:
        sdata = spatialdata_io.merscope(
            sample_paths[sample_name],
            z_layers=3,
            backend=None,
            cells_boundaries=True,
            cells_table=True,
            mosaic_images=False,
            transcripts=False,
            slide_name="_".join(sample_name.split("_")[:2]),
            region_name=sample_name.split("_")[2],
        )
    
    # Handle boundaries
    if f"boundaries_{seg_method}" not in sdata_main:
        polygons = sd.deepcopy(sdata[f"{sample_name}_polygons"])
        sdata_main[f"boundaries_{seg_method}"] = polygons
        if write_to_disk:
            sdata_main.write_element(f"boundaries_{seg_method}")
    else:
        print(f"Skipping {seg_method} as boundaries_{seg_method} exist already.")
        
    # Handle table
    if f"adata_{seg_method}" not in sdata_main:
        adata = sd.deepcopy(sdata["table"])
        sdata_main[f"adata_{seg_method}"] = adata
        if write_to_disk:
            sdata_main.write_element(f"adata_{seg_method}")
     else:
        print(f"Skipping {seg_method} as adata_{seg_method} exist already.")
         
    return sdata_main

In [None]:
process_merlin_segmentation(sample_name, sample_paths, sdata_main, write_to_disk=True)

In [14]:
sdata_main

SpatialData object, with associated Zarr store: /dss/dssfs03/pn52re/pn52re-dss-0001/cellseg-benchmark/samples/foxf2_s2_r1/sdata_z3.zarr
├── Images
│     └── 'foxf2_s2_r1_z3': DataTree[cyx] (7, 54456, 65675), (7, 27228, 32837), (7, 13614, 16418), (7, 6807, 8209), (7, 3403, 4104)
├── Points
│     └── 'foxf2_s2_r1_transcripts': DataFrame with shape: (<Delayed>, 9) (2D points)
├── Shapes
│     └── 'boundaries_Cellpose_1_Merlin': GeoDataFrame shape: (107013, 9) (2D shapes)
└── Tables
      └── 'adata_Cellpose_1_Merlin': AnnData (107013, 500)
with coordinate systems:
    ▸ 'global', with elements:
        foxf2_s2_r1_z3 (Images), foxf2_s2_r1_transcripts (Points), boundaries_Cellpose_1_Merlin (Shapes)

In [15]:
# other segmentation approaches

In [29]:
seg_methods = os.listdir(os.path.join(data_dir, "cellseg-benchmark", "samples", sample_name, "results"))
seg_methods

['ComSeg',
 'Cellpose_1_nuclei_model',
 'Negative_Control_Rastered_50',
 'Cellpose_2_DAPI_PolyT',
 'Negative_Control_Rastered_20',
 'Ficture',
 'Baysor_3D',
 'Cellpose_1_DAPI_PolyT',
 'Negative_Control_Rastered_10',
 'Negative_Control_Voronoi',
 'Cellpose_2_DAPI_Transcripts',
 'Cellpose_1_DAPI_Transcripts',
 'Baysor_2D']

In [1]:
import glob
import os
import spatialdata as sd

def integrate_segmentation_data(data_dir, sample_name, seg_methods, sdata_main, write_to_disk=True):
    """
    Integrate segmentation data from multiple methods into the main spatial data object.
    
    Args:
        data_dir: Base directory for data
        sample_name: Name of the sample
        seg_methods: List of segmentation methods to process
        sdata_main: Main spatial data object to update
        write_to_disk: Whether to write elements to disk immediately
    
    Returns:
        Updated sdata_main object
    """    
    for seg_method in seg_methods:
        seg_path = os.path.join(data_dir, "cellseg-benchmark", "samples", sample_name, 
                               "results", seg_method, "sdata.zarr")
        if not os.path.exists(seg_path):
            continue
            
        if f"boundaries_{seg_method}" not in sdata_main or f"adata_{seg_method}" not in sdata_main:
            sdata = sd.read_zarr(seg_path)
        
        # Handle boundaries
        if f"boundaries_{seg_method}" not in sdata_main:
            base_dir = os.path.join(seg_path, "shapes")
            boundary_files = [os.path.basename(f) for f in glob.glob(os.path.join(base_dir, "*_boundaries"))]
            
            if len(boundary_files) == 1:
                sdata_main[f"boundaries_{seg_method}"] = sdata[boundary_files[0]]
                if write_to_disk:
                    sdata_main.write_element(f"boundaries_{seg_method}")
            elif len(boundary_files) > 1:
                print(f"Multiple *boundaries files found for {seg_method}. Skipping.")
        else:
            print(f"Skipping {seg_method} as boundaries_{seg_method} exist already.")
        
        # Handle tables
        if f"adata_{seg_method}" not in sdata_main:
            base_dir = os.path.join(seg_path, "tables")
            table_files = [os.path.basename(f) for f in glob.glob(os.path.join(base_dir, "table"))]
            
            if len(table_files) == 1:
                sdata_main[f"adata_{seg_method}"] = sdata[table_files[0]]
                if write_to_disk:
                    sdata_main.write_element(f"adata_{seg_method}")
            elif len(table_files) > 1:
                print(f"Multiple table files found for {seg_method}. Skipping.")
            else:
                print(f"Table file missing for {seg_method}. Skipping.")
        else:
            print(f"Skipping {seg_method} as adata_{seg_method} exist already.")
    
    return sdata_main



In [None]:
integrate_segmentation_data(data_dir, sample_name, seg_methods, sdata_main, write_to_disk=True)

In [37]:
sdata_main.elements_paths_on_disk()

['images/foxf2_s2_r1_z3',
 'points/foxf2_s2_r1_transcripts',
 'shapes/boundaries_Baysor_2D',
 'shapes/boundaries_Baysor_3D',
 'shapes/boundaries_Cellpose_1_DAPI_PolyT',
 'shapes/boundaries_Cellpose_1_DAPI_Transcripts',
 'shapes/boundaries_Cellpose_1_Merlin',
 'shapes/boundaries_Cellpose_1_nuclei_model',
 'shapes/boundaries_Cellpose_2_DAPI_PolyT',
 'shapes/boundaries_Cellpose_2_DAPI_Transcripts',
 'shapes/boundaries_ComSeg',
 'shapes/boundaries_Negative_Control_Voronoi',
 'tables/adata_Baysor_2D',
 'tables/adata_Baysor_3D',
 'tables/adata_Cellpose_1_DAPI_PolyT',
 'tables/adata_Cellpose_1_DAPI_Transcripts',
 'tables/adata_Cellpose_1_Merlin',
 'tables/adata_Cellpose_1_nuclei_model',
 'tables/adata_Cellpose_2_DAPI_PolyT',
 'tables/adata_Cellpose_2_DAPI_Transcripts',
 'tables/adata_ComSeg',
 'tables/adata_Negative_Control_Rastered_10',
 'tables/adata_Negative_Control_Rastered_20',
 'tables/adata_Negative_Control_Rastered_50',
 'tables/adata_Negative_Control_Voronoi']