diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index e540e324..c572fced 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -174,7 +174,7 @@ class MerscopeKeys(ModeEnum): VPT_NAME_BOUNDARIES = "cell_boundaries" # metadata - INSTANCE_KEY = "EntityID" + METADATA_CELL_KEY = "EntityID" COUNTS_CELL_KEY = "cell" CELL_X = "center_x" CELL_Y = "center_y" @@ -182,3 +182,4 @@ class MerscopeKeys(ModeEnum): GLOBAL_Y = "global_y" GLOBAL_Z = "global_z" Z_INDEX = "ZIndex" + REGION_KEY = "cells_region" diff --git a/src/spatialdata_io/readers/merscope.py b/src/spatialdata_io/readers/merscope.py index da347f9e..686dcf7c 100644 --- a/src/spatialdata_io/readers/merscope.py +++ b/src/spatialdata_io/readers/merscope.py @@ -1,7 +1,9 @@ from __future__ import annotations import re +from collections.abc import Mapping from pathlib import Path +from types import MappingProxyType from typing import Any import anndata @@ -79,6 +81,8 @@ def merscope( z_layers: int | list[int] | None = 3, region_name: str | None = None, slide_name: str | None = None, + imread_kwargs: Mapping[str, Any] = MappingProxyType({}), + image_models_kwargs: Mapping[str, Any] = MappingProxyType({}), ) -> SpatialData: """ Read *MERSCOPE* data from Vizgen. @@ -115,21 +119,38 @@ def merscope( Name of the region of interest, e.g., `'region_0'`. If `None` then the name of the `path` directory is used. slide_name Name of the slide/run. If `None` then the name of the parent directory of `path` is used (whose name starts with a date). + imread_kwargs + Keyword arguments to pass to the image reader. + image_models_kwargs + Keyword arguments to pass to the image models. Returns ------- :class:`spatialdata.SpatialData` """ + if "chunks" not in image_models_kwargs: + if isinstance(image_models_kwargs, MappingProxyType): + image_models_kwargs = {} + assert isinstance(image_models_kwargs, dict) + image_models_kwargs["chunks"] = (1, 4096, 4096) + if "scale_factors" not in image_models_kwargs: + if isinstance(image_models_kwargs, MappingProxyType): + image_models_kwargs = {} + assert isinstance(image_models_kwargs, dict) + image_models_kwargs["scale_factors"] = [2, 2, 2, 2] + path = Path(path) count_path, obs_path, boundaries_path = _get_file_paths(path, vpt_outputs) images_dir = path / MerscopeKeys.IMAGES_DIR - microns_to_pixels = np.genfromtxt(images_dir / MerscopeKeys.TRANSFORMATION_FILE) - microns_to_pixels = Affine(microns_to_pixels, input_axes=("x", "y"), output_axes=("x", "y")) + microns_to_pixels = Affine( + np.genfromtxt(images_dir / MerscopeKeys.TRANSFORMATION_FILE), input_axes=("x", "y"), output_axes=("x", "y") + ) - region_name = path.name if region_name is None else region_name + vizgen_region = path.name if region_name is None else region_name slide_name = path.parent.name if slide_name is None else slide_name - dataset_id = f"{slide_name}_{region_name}" + dataset_id = f"{slide_name}_{vizgen_region}" + region = f"{dataset_id}_polygons" # Images images = {} @@ -138,12 +159,16 @@ def merscope( stainings = _get_channel_names(images_dir) for z_layer in z_layers: - im = da.stack([imread(images_dir / f"mosaic_{stain}_z{z_layer}.tif").squeeze() for stain in stainings], axis=0) + im = da.stack( + [imread(images_dir / f"mosaic_{stain}_z{z_layer}.tif", **imread_kwargs).squeeze() for stain in stainings], + axis=0, + ) parsed_im = Image2DModel.parse( im, dims=("c", "y", "x"), - transformations={"pixels": Identity()}, + transformations={"microns": microns_to_pixels.inverse()}, c_coords=stainings, + **image_models_kwargs, ) images[f"{dataset_id}_z{z_layer}"] = parsed_im @@ -152,7 +177,7 @@ def merscope( transcripts = PointsModel.parse( transcript_df, coordinates={"x": MerscopeKeys.GLOBAL_X, "y": MerscopeKeys.GLOBAL_Y}, - transformations={"pixels": microns_to_pixels}, + transformations={"microns": Identity()}, ) categories = transcripts["gene"].compute().astype("category") gene_categorical = dd.from_pandas(categories, npartitions=transcripts.npartitions).reset_index(drop=True) @@ -164,31 +189,33 @@ def merscope( geo_df = geopandas.read_parquet(boundaries_path) geo_df = geo_df.rename_geometry("geometry") geo_df = geo_df[geo_df[MerscopeKeys.Z_INDEX] == 0] # Avoid duplicate boundaries on all z-levels - geo_df.index = geo_df[MerscopeKeys.INSTANCE_KEY].astype(str) + geo_df.geometry = geo_df.geometry.map(lambda x: x.geoms[0]) # The MultiPolygons contain only one polygon + geo_df.index = geo_df[MerscopeKeys.METADATA_CELL_KEY].astype(str) - polygons = ShapesModel.parse(geo_df, transformations={"pixels": microns_to_pixels}) + polygons = ShapesModel.parse(geo_df, transformations={"microns": Identity()}) shapes = {f"{dataset_id}_polygons": polygons} # Table data = pd.read_csv(count_path, index_col=0, dtype={MerscopeKeys.COUNTS_CELL_KEY: str}) - obs = pd.read_csv(obs_path, index_col=0, dtype={MerscopeKeys.INSTANCE_KEY: str}) + obs = pd.read_csv(obs_path, index_col=0, dtype={MerscopeKeys.METADATA_CELL_KEY: str}) is_gene = ~data.columns.str.lower().str.contains("blank") adata = anndata.AnnData(data.loc[:, is_gene], dtype=data.values.dtype, obs=obs) adata.obsm["blank"] = data.loc[:, ~is_gene] # blank fields are excluded from adata.X adata.obsm["spatial"] = adata.obs[[MerscopeKeys.CELL_X, MerscopeKeys.CELL_Y]].values - adata.obs["region"] = pd.Series(region_name, index=adata.obs_names, dtype="category") + adata.obs["region"] = pd.Series(vizgen_region, index=adata.obs_names, dtype="category") adata.obs["slide"] = pd.Series(slide_name, index=adata.obs_names, dtype="category") adata.obs["dataset_id"] = pd.Series(dataset_id, index=adata.obs_names, dtype="category") - adata.obs[MerscopeKeys.INSTANCE_KEY] = adata.obs.index + adata.obs[MerscopeKeys.REGION_KEY] = pd.Series(region, index=adata.obs_names, dtype="category") + adata.obs[MerscopeKeys.METADATA_CELL_KEY] = adata.obs.index table = TableModel.parse( adata, - region_key="region", - region=adata.obs["region"].cat.categories.tolist(), - instance_key=MerscopeKeys.INSTANCE_KEY.value, + region_key=MerscopeKeys.REGION_KEY.value, + region=region, + instance_key=MerscopeKeys.METADATA_CELL_KEY.value, ) return SpatialData(shapes=shapes, points=points, images=images, table=table)