Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
8df3521
merfish reader v0 (handle vpt outputs)
quentinblampey Apr 19, 2023
1ae190c
Merge branch 'main' into merfish
quentinblampey Apr 19, 2023
713b37c
merfish reader: minor fixes (assert and KEY.value)
quentinblampey Apr 19, 2023
511412d
add staining channel names to merfish reader
quentinblampey Apr 20, 2023
71a958e
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 20, 2023
a0dc6ec
points to categorical, splitting points by z value, debugging the tra…
LucaMarconato Apr 21, 2023
ca89463
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Apr 21, 2023
663a406
merfish reader support dict paths inputs
quentinblampey May 11, 2023
6370193
Merge branch 'main' into merfish
giovp Jun 9, 2023
8453fa7
fix some (but not all) pre-commit checks
giovp Jun 9, 2023
262e9f5
Merge branch 'merfish' of github.com:quentinblampey/spatialdata-io in…
quentinblampey Jun 12, 2023
7bc0efd
merscope: renaming + solving mypy type errors
quentinblampey Jun 13, 2023
9257b55
fix flake8 errors
quentinblampey Jun 13, 2023
ca7ae96
z-layers argument + consider only 2D stacks
quentinblampey Jun 14, 2023
2452a79
back to py3.9 Union typing syntax
quentinblampey Jun 14, 2023
0226e53
infer the dataset id using the directory path
quentinblampey Jun 19, 2023
71f075a
fix flake8 unindent warning
quentinblampey Jun 19, 2023
c488ccb
adding the merscope reader to the docs API
quentinblampey Jun 20, 2023
e74face
Merge branch 'main' of https://github.com/scverse/spatialdata-io into…
quentinblampey Jul 25, 2023
0c91580
merscope: handle big image + fix TableModel region
quentinblampey Jul 31, 2023
137fe8a
microns_to_pixel: in one line for mypy
quentinblampey Jul 31, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/spatialdata_io/_constants/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,12 @@ class MerscopeKeys(ModeEnum):
VPT_NAME_BOUNDARIES = "cell_boundaries"

# metadata
INSTANCE_KEY = "EntityID"
METADATA_CELL_KEY = "EntityID"
COUNTS_CELL_KEY = "cell"
CELL_X = "center_x"
CELL_Y = "center_y"
GLOBAL_X = "global_x"
GLOBAL_Y = "global_y"
GLOBAL_Z = "global_z"
Z_INDEX = "ZIndex"
REGION_KEY = "cells_region"
57 changes: 42 additions & 15 deletions src/spatialdata_io/readers/merscope.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from __future__ import annotations

import re
from collections.abc import Mapping
from pathlib import Path
from types import MappingProxyType
from typing import Any

import anndata
Expand Down Expand Up @@ -79,6 +81,8 @@ def merscope(
z_layers: int | list[int] | None = 3,
region_name: str | None = None,
slide_name: str | None = None,
imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
) -> SpatialData:
"""
Read *MERSCOPE* data from Vizgen.
Expand Down Expand Up @@ -115,21 +119,38 @@ def merscope(
Name of the region of interest, e.g., `'region_0'`. If `None` then the name of the `path` directory is used.
slide_name
Name of the slide/run. If `None` then the name of the parent directory of `path` is used (whose name starts with a date).
imread_kwargs
Keyword arguments to pass to the image reader.
image_models_kwargs
Keyword arguments to pass to the image models.

Returns
-------
:class:`spatialdata.SpatialData`
"""
if "chunks" not in image_models_kwargs:
if isinstance(image_models_kwargs, MappingProxyType):
image_models_kwargs = {}
assert isinstance(image_models_kwargs, dict)
image_models_kwargs["chunks"] = (1, 4096, 4096)
if "scale_factors" not in image_models_kwargs:
if isinstance(image_models_kwargs, MappingProxyType):
image_models_kwargs = {}
assert isinstance(image_models_kwargs, dict)
image_models_kwargs["scale_factors"] = [2, 2, 2, 2]

path = Path(path)
count_path, obs_path, boundaries_path = _get_file_paths(path, vpt_outputs)
images_dir = path / MerscopeKeys.IMAGES_DIR

microns_to_pixels = np.genfromtxt(images_dir / MerscopeKeys.TRANSFORMATION_FILE)
microns_to_pixels = Affine(microns_to_pixels, input_axes=("x", "y"), output_axes=("x", "y"))
microns_to_pixels = Affine(
np.genfromtxt(images_dir / MerscopeKeys.TRANSFORMATION_FILE), input_axes=("x", "y"), output_axes=("x", "y")
)

region_name = path.name if region_name is None else region_name
vizgen_region = path.name if region_name is None else region_name
slide_name = path.parent.name if slide_name is None else slide_name
dataset_id = f"{slide_name}_{region_name}"
dataset_id = f"{slide_name}_{vizgen_region}"
region = f"{dataset_id}_polygons"

# Images
images = {}
Expand All @@ -138,12 +159,16 @@ def merscope(

stainings = _get_channel_names(images_dir)
for z_layer in z_layers:
im = da.stack([imread(images_dir / f"mosaic_{stain}_z{z_layer}.tif").squeeze() for stain in stainings], axis=0)
im = da.stack(
[imread(images_dir / f"mosaic_{stain}_z{z_layer}.tif", **imread_kwargs).squeeze() for stain in stainings],
axis=0,
)
parsed_im = Image2DModel.parse(
im,
dims=("c", "y", "x"),
transformations={"pixels": Identity()},
transformations={"microns": microns_to_pixels.inverse()},
c_coords=stainings,
**image_models_kwargs,
)
images[f"{dataset_id}_z{z_layer}"] = parsed_im

Expand All @@ -152,7 +177,7 @@ def merscope(
transcripts = PointsModel.parse(
transcript_df,
coordinates={"x": MerscopeKeys.GLOBAL_X, "y": MerscopeKeys.GLOBAL_Y},
transformations={"pixels": microns_to_pixels},
transformations={"microns": Identity()},
)
categories = transcripts["gene"].compute().astype("category")
gene_categorical = dd.from_pandas(categories, npartitions=transcripts.npartitions).reset_index(drop=True)
Expand All @@ -164,31 +189,33 @@ def merscope(
geo_df = geopandas.read_parquet(boundaries_path)
geo_df = geo_df.rename_geometry("geometry")
geo_df = geo_df[geo_df[MerscopeKeys.Z_INDEX] == 0] # Avoid duplicate boundaries on all z-levels
geo_df.index = geo_df[MerscopeKeys.INSTANCE_KEY].astype(str)
geo_df.geometry = geo_df.geometry.map(lambda x: x.geoms[0]) # The MultiPolygons contain only one polygon
geo_df.index = geo_df[MerscopeKeys.METADATA_CELL_KEY].astype(str)

polygons = ShapesModel.parse(geo_df, transformations={"pixels": microns_to_pixels})
polygons = ShapesModel.parse(geo_df, transformations={"microns": Identity()})

shapes = {f"{dataset_id}_polygons": polygons}

# Table
data = pd.read_csv(count_path, index_col=0, dtype={MerscopeKeys.COUNTS_CELL_KEY: str})
obs = pd.read_csv(obs_path, index_col=0, dtype={MerscopeKeys.INSTANCE_KEY: str})
obs = pd.read_csv(obs_path, index_col=0, dtype={MerscopeKeys.METADATA_CELL_KEY: str})

is_gene = ~data.columns.str.lower().str.contains("blank")
adata = anndata.AnnData(data.loc[:, is_gene], dtype=data.values.dtype, obs=obs)

adata.obsm["blank"] = data.loc[:, ~is_gene] # blank fields are excluded from adata.X
adata.obsm["spatial"] = adata.obs[[MerscopeKeys.CELL_X, MerscopeKeys.CELL_Y]].values
adata.obs["region"] = pd.Series(region_name, index=adata.obs_names, dtype="category")
adata.obs["region"] = pd.Series(vizgen_region, index=adata.obs_names, dtype="category")
adata.obs["slide"] = pd.Series(slide_name, index=adata.obs_names, dtype="category")
adata.obs["dataset_id"] = pd.Series(dataset_id, index=adata.obs_names, dtype="category")
adata.obs[MerscopeKeys.INSTANCE_KEY] = adata.obs.index
adata.obs[MerscopeKeys.REGION_KEY] = pd.Series(region, index=adata.obs_names, dtype="category")
adata.obs[MerscopeKeys.METADATA_CELL_KEY] = adata.obs.index

table = TableModel.parse(
adata,
region_key="region",
region=adata.obs["region"].cat.categories.tolist(),
instance_key=MerscopeKeys.INSTANCE_KEY.value,
region_key=MerscopeKeys.REGION_KEY.value,
region=region,
instance_key=MerscopeKeys.METADATA_CELL_KEY.value,
)

return SpatialData(shapes=shapes, points=points, images=images, table=table)