From 8df352159ee494387f8479d752ed1bf0bd314c12 Mon Sep 17 00:00:00 2001
From: Blampey Quentin <quentin.blampey@student.ecp.fr>
Date: Wed, 19 Apr 2023 14:48:07 +0200
Subject: [PATCH 01/17] merfish reader v0 (handle vpt outputs)

---
 src/spatialdata_io/__init__.py              |   2 +
 src/spatialdata_io/_constants/_constants.py |  26 ++++
 src/spatialdata_io/readers/merfish.py       | 126 ++++++++++++++++++++
 3 files changed, 154 insertions(+)
 create mode 100644 src/spatialdata_io/readers/merfish.py
diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py
index 297c4435..739ca41c 100644
--- a/src/spatialdata_io/__init__.py
+++ b/src/spatialdata_io/__init__.py
@@ -2,6 +2,7 @@
 
 from spatialdata_io.readers.cosmx import cosmx
 from spatialdata_io.readers.mcmicro import mcmicro
+from spatialdata_io.readers.merfish import merfish
 from spatialdata_io.readers.steinbock import steinbock
 from spatialdata_io.readers.visium import visium
 from spatialdata_io.readers.xenium import xenium
@@ -12,6 +13,7 @@
     "cosmx",
     "mcmicro",
     "steinbock",
+    "merfish",
 ]
 
 __version__ = version("spatialdata-io")
diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py
index aee085d5..ba3d6cd4 100644
--- a/src/spatialdata_io/_constants/_constants.py
+++ b/src/spatialdata_io/_constants/_constants.py
@@ -120,3 +120,29 @@ class McmicroKeys(ModeEnum):
     COORDS_X = "X_centroid"
     COORDS_Y = "Y_centroid"
     INSTANCE_KEY = "CellID"
+
+
+@unique
+class MerfishKeys(ModeEnum):
+    """Keys for *MERFISH* data (*MERSCOPE* plateform from Vizgen)"""
+
+    # files and directories
+    IMAGES_DIR = "images"
+    TRANSFORMATION_FILE = "micron_to_mosaic_pixel_transform.csv"
+    TRANSCRIPTS_FILE = "detected_transcripts.csv"
+    BOUNDARIES_FILE = "cell_boundaries.parquet"
+    COUNTS_FILE = "cell_by_gene.csv"
+    CELL_METADATA_FILE = "cell_metadata.csv"
+
+    # VPT default outputs
+    CELLPOSE_BOUNDARIES = "cellpose_micron_space.parquet"
+    WATERSHED_BOUNDARIES = "watershed_micron_space.parquet"
+
+    # metadata
+    INSTANCE_KEY = "EntityID"
+    COUNTS_CELL_KEY = "cell"
+    CELL_X = "center_x"
+    CELL_Y = "center_y"
+    GLOBAL_X = "global_x"
+    GLOBAL_Y = "global_y"
+    GLOBAL_Z = "global_z"
diff --git a/src/spatialdata_io/readers/merfish.py b/src/spatialdata_io/readers/merfish.py
new file mode 100644
index 00000000..84251137
--- /dev/null
+++ b/src/spatialdata_io/readers/merfish.py
@@ -0,0 +1,126 @@
+import re
+from pathlib import Path
+from typing import Optional, Tuple, Union
+
+import anndata
+import geopandas
+import numpy as np
+import pandas as pd
+from dask import array as da
+from dask.dataframe import read_csv
+from dask_image.imread import imread
+from spatialdata import SpatialData
+from spatialdata.models import Image2DModel, PointsModel, ShapesModel, TableModel
+from spatialdata.transformations import Affine, Identity
+
+from spatialdata_io._constants._constants import MerfishKeys
+from spatialdata_io._docs import inject_docs
+
+
+def _scan_images(images_dir: Path) -> Tuple[set]:
+    exp = r"mosaic_(?P<stain>[\w|-]+[0-9]?)_z(?P<z>[0-9]+).tif"
+    matches = [re.search(exp, file.name) for file in images_dir.iterdir()]
+
+    stains = set(match.group("stain") for match in matches if match)
+    z_levels = set(match.group("z") for match in matches if match)
+
+    return stains, z_levels
+
+
+def _get_file_paths(path: Path, vpt_outputs: Optional[Union[Path, str, dict]]):
+    """Gets the file paths to (i) the file of transcript per cell, (ii) the cell metadata file, and (iii) the cell boundary file"""
+    if vpt_outputs is None:
+        return path / MerfishKeys.COUNTS_FILE, path / MerfishKeys.CELL_METADATA_FILE, path / MerfishKeys.BOUNDARIES_FILE
+
+    if isinstance(vpt_outputs, str) or isinstance(vpt_outputs, Path):
+        vpt_outputs = Path(vpt_outputs)
+
+        plausible_boundaries = [
+            vpt_outputs / MerfishKeys.CELLPOSE_BOUNDARIES,
+            vpt_outputs / MerfishKeys.WATERSHED_BOUNDARIES,
+        ]
+        valid_boundaries = [path for path in plausible_boundaries if path.exists()]
+
+        assert (
+            not valid_boundaries
+        ), f"Boundary file not found - expected to find one of these files: {', '.join(plausible_boundaries)}"
+
+        return vpt_outputs / MerfishKeys.COUNTS_FILE, vpt_outputs / MerfishKeys.CELL_METADATA_FILE, valid_boundaries[0]
+
+    if isinstance(vpt_outputs, dict):
+        raise NotImplementedError()
+
+    raise ValueError(f"'vpt_outputs' has to be either None, a str, a Path, or a dict. Found type {type(vpt_outputs)}.")
+
+
+@inject_docs(mf=MerfishKeys)
+def merfish(path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict]]) -> SpatialData:
+    """
+    Read *MERFISH* data from Vizgen.
+
+    Parameters
+    ----------
+    path
+        Path to the root directory containing the *Merfish* files (e.g., `detected_transcripts.csv`).
+    vpt_outputs
+        Optional arguments to indicate the output of the vizgen-postprocessing-tool (VPT), when used. If a folder path is provided, it looks inside the folder for the following files: ``{mf.COUNTS_FILE!r}``, ``{mf.CELL_METADATA_FILE!r}``, and a boundary parquet file. If a dictionnary, then the following keys can be provided: `cell_by_gene`, `cell_metadata`, `boundaries` with the desired path as the value.
+
+    Returns
+    -------
+    :class:`spatialdata.SpatialData`
+    """
+    path = Path(path)
+    count_path, obs_path, boundaries_path = _get_file_paths(path, vpt_outputs)
+    images_dir = path / MerfishKeys.IMAGES_DIR
+
+    microns_to_pixels = np.genfromtxt(images_dir / MerfishKeys.TRANSFORMATION_FILE)
+    microns_to_pixels = Affine(microns_to_pixels, input_axes=("x", "y"), output_axes=("x", "y"))
+
+    ### Images
+    images = {}
+
+    stains, z_levels = _scan_images(images_dir)
+    for z in z_levels:
+        im = da.stack([imread(images_dir / f"mosaic_{stain}_z{z}.tif").squeeze() for stain in stains], axis=0)
+        parsed_im = Image2DModel.parse(
+            im, dims=("c", "y", "x"), transformations={"pixels": Identity(), "microns": microns_to_pixels.inverse()}
+        )
+        images[f"z{z}"] = parsed_im
+
+    ### Transcripts
+    transcript_df = read_csv(path / MerfishKeys.TRANSCRIPTS_FILE)
+    transcripts = PointsModel.parse(
+        transcript_df,
+        coordinates={"x": MerfishKeys.GLOBAL_X, "y": MerfishKeys.GLOBAL_Y, "z": MerfishKeys.GLOBAL_Z},
+        transformations={"microns": Identity(), "pixels": microns_to_pixels},
+    )
+    points = {"transcripts": transcripts}
+
+    ### Polygons
+    geo_df = geopandas.read_parquet(boundaries_path)
+    geo_df = geo_df.rename_geometry("geometry")
+    geo_df.index = geo_df[MerfishKeys.INSTANCE_KEY].astype(str)
+
+    polygons = ShapesModel.parse(geo_df, transformations={"microns": Identity(), "pixels": microns_to_pixels})
+    shapes = {"polygons": polygons}
+
+    ### Table
+    data = pd.read_csv(count_path, index_col=0, dtype={MerfishKeys.COUNTS_CELL_KEY: str})
+    obs = pd.read_csv(obs_path, index_col=0, dtype={MerfishKeys.INSTANCE_KEY: str})
+
+    is_gene = ~data.columns.str.lower().str.contains("blank")
+    adata = anndata.AnnData(data.loc[:, is_gene], dtype=data.values.dtype, obs=obs)
+
+    adata.obsm["blank"] = data.loc[:, ~is_gene]  # blank fields are excluded from adata.X
+    adata.obsm["spatial"] = adata.obs[[MerfishKeys.CELL_X, MerfishKeys.CELL_Y]].values
+    adata.obs["region"] = pd.Series(path.stem, index=adata.obs_names, dtype="category")
+    adata.obs[MerfishKeys.INSTANCE_KEY] = adata.obs.index
+
+    table = TableModel.parse(
+        adata,
+        region_key="region",
+        region=adata.obs["region"].cat.categories.tolist(),
+        instance_key=MerfishKeys.INSTANCE_KEY,
+    )
+
+    return SpatialData(table=table, shapes=shapes, points=points, images=images)

From 713b37c7b2d4f2e03ce7e9b1131ef1354090021f Mon Sep 17 00:00:00 2001
From: Blampey Quentin <quentin.blampey@student.ecp.fr>
Date: Wed, 19 Apr 2023 16:49:43 +0200
Subject: [PATCH 02/17] merfish reader: minor fixes (assert and KEY.value)

---
 src/spatialdata_io/readers/merfish.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/spatialdata_io/readers/merfish.py b/src/spatialdata_io/readers/merfish.py
index 84251137..465236b6 100644
--- a/src/spatialdata_io/readers/merfish.py
+++ b/src/spatialdata_io/readers/merfish.py
@@ -42,7 +42,7 @@ def _get_file_paths(path: Path, vpt_outputs: Optional[Union[Path, str, dict]]):
         valid_boundaries = [path for path in plausible_boundaries if path.exists()]
 
         assert (
-            not valid_boundaries
+            valid_boundaries
         ), f"Boundary file not found - expected to find one of these files: {', '.join(plausible_boundaries)}"
 
         return vpt_outputs / MerfishKeys.COUNTS_FILE, vpt_outputs / MerfishKeys.CELL_METADATA_FILE, valid_boundaries[0]
@@ -120,7 +120,7 @@ def merfish(path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict]
         adata,
         region_key="region",
         region=adata.obs["region"].cat.categories.tolist(),
-        instance_key=MerfishKeys.INSTANCE_KEY,
+        instance_key=MerfishKeys.INSTANCE_KEY.value,
     )
 
     return SpatialData(table=table, shapes=shapes, points=points, images=images)

From 511412d41e167a998d0f06b0f08121b16b18f35f Mon Sep 17 00:00:00 2001
From: Blampey Quentin <quentin.blampey@student.ecp.fr>
Date: Thu, 20 Apr 2023 11:04:39 +0200
Subject: [PATCH 03/17] add staining channel names to merfish reader

---
 src/spatialdata_io/readers/merfish.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/src/spatialdata_io/readers/merfish.py b/src/spatialdata_io/readers/merfish.py
index 465236b6..76a6fb1d 100644
--- a/src/spatialdata_io/readers/merfish.py
+++ b/src/spatialdata_io/readers/merfish.py
@@ -1,6 +1,6 @@
 import re
 from pathlib import Path
-from typing import Optional, Tuple, Union
+from typing import List, Optional, Tuple, Union
 
 import anndata
 import geopandas
@@ -17,14 +17,15 @@
 from spatialdata_io._docs import inject_docs
 
 
-def _scan_images(images_dir: Path) -> Tuple[set]:
+def _scan_images(images_dir: Path) -> Tuple[List]:
+    """Searches inside the image directory and get all the different channels (stainings) and all the z-levels (usually 0...6)"""
     exp = r"mosaic_(?P<stain>[\w|-]+[0-9]?)_z(?P<z>[0-9]+).tif"
     matches = [re.search(exp, file.name) for file in images_dir.iterdir()]
 
-    stains = set(match.group("stain") for match in matches if match)
+    stainings = set(match.group("stain") for match in matches if match)
     z_levels = set(match.group("z") for match in matches if match)
 
-    return stains, z_levels
+    return list(stainings), list(z_levels)
 
 
 def _get_file_paths(path: Path, vpt_outputs: Optional[Union[Path, str, dict]]):
@@ -43,7 +44,7 @@ def _get_file_paths(path: Path, vpt_outputs: Optional[Union[Path, str, dict]]):
 
         assert (
             valid_boundaries
-        ), f"Boundary file not found - expected to find one of these files: {', '.join(plausible_boundaries)}"
+        ), f"Boundary file not found - expected to find one of these files: {', '.join(map(str, plausible_boundaries))}"
 
         return vpt_outputs / MerfishKeys.COUNTS_FILE, vpt_outputs / MerfishKeys.CELL_METADATA_FILE, valid_boundaries[0]
 
@@ -79,11 +80,14 @@ def merfish(path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict]
     ### Images
     images = {}
 
-    stains, z_levels = _scan_images(images_dir)
+    stainings, z_levels = _scan_images(images_dir)
     for z in z_levels:
-        im = da.stack([imread(images_dir / f"mosaic_{stain}_z{z}.tif").squeeze() for stain in stains], axis=0)
+        im = da.stack([imread(images_dir / f"mosaic_{stain}_z{z}.tif").squeeze() for stain in stainings], axis=0)
         parsed_im = Image2DModel.parse(
-            im, dims=("c", "y", "x"), transformations={"pixels": Identity(), "microns": microns_to_pixels.inverse()}
+            im,
+            dims=("c", "y", "x"),
+            transformations={"pixels": Identity(), "microns": microns_to_pixels.inverse()},
+            c_coords=stainings,
         )
         images[f"z{z}"] = parsed_im
 
@@ -123,4 +127,4 @@ def merfish(path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict]
         instance_key=MerfishKeys.INSTANCE_KEY.value,
     )
 
-    return SpatialData(table=table, shapes=shapes, points=points, images=images)
+    return SpatialData(shapes=shapes, points=points, images=images, table=table)

From 71a958e43a3fb50e003906e53f934fc4ed053212 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 20 Apr 2023 09:21:41 +0000
Subject: [PATCH 04/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spatialdata_io/readers/merfish.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/spatialdata_io/readers/merfish.py b/src/spatialdata_io/readers/merfish.py
index 76a6fb1d..79948e02 100644
--- a/src/spatialdata_io/readers/merfish.py
+++ b/src/spatialdata_io/readers/merfish.py
@@ -17,13 +17,13 @@
 from spatialdata_io._docs import inject_docs
 
 
-def _scan_images(images_dir: Path) -> Tuple[List]:
+def _scan_images(images_dir: Path) -> tuple[list]:
     """Searches inside the image directory and get all the different channels (stainings) and all the z-levels (usually 0...6)"""
     exp = r"mosaic_(?P<stain>[\w|-]+[0-9]?)_z(?P<z>[0-9]+).tif"
     matches = [re.search(exp, file.name) for file in images_dir.iterdir()]
 
-    stainings = set(match.group("stain") for match in matches if match)
-    z_levels = set(match.group("z") for match in matches if match)
+    stainings = {match.group("stain") for match in matches if match}
+    z_levels = {match.group("z") for match in matches if match}
 
     return list(stainings), list(z_levels)
 

From a0dc6ec3525c79347d4acbb5143a71e469df8a4b Mon Sep 17 00:00:00 2001
From: Luca Marconato <2664412+LucaMarconato@users.noreply.github.com>
Date: Fri, 21 Apr 2023 20:49:58 +0200
Subject: [PATCH 05/17] points to categorical, splitting points by z value,
 debugging the transformations

---
 src/spatialdata_io/readers/merfish.py | 32 ++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/src/spatialdata_io/readers/merfish.py b/src/spatialdata_io/readers/merfish.py
index 79948e02..5d8868d9 100644
--- a/src/spatialdata_io/readers/merfish.py
+++ b/src/spatialdata_io/readers/merfish.py
@@ -1,13 +1,13 @@
 import re
 from pathlib import Path
-from typing import List, Optional, Tuple, Union
+from typing import Optional, Union
 
 import anndata
 import geopandas
 import numpy as np
 import pandas as pd
 from dask import array as da
-from dask.dataframe import read_csv
+import dask.dataframe as dd
 from dask_image.imread import imread
 from spatialdata import SpatialData
 from spatialdata.models import Image2DModel, PointsModel, ShapesModel, TableModel
@@ -17,7 +17,7 @@
 from spatialdata_io._docs import inject_docs
 
 
-def _scan_images(images_dir: Path) -> tuple[list]:
+def _scan_images(images_dir: Path) -> tuple[list, list]:
     """Searches inside the image directory and get all the different channels (stainings) and all the z-levels (usually 0...6)"""
     exp = r"mosaic_(?P<stain>[\w|-]+[0-9]?)_z(?P<z>[0-9]+).tif"
     matches = [re.search(exp, file.name) for file in images_dir.iterdir()]
@@ -86,26 +86,42 @@ def merfish(path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict]
         parsed_im = Image2DModel.parse(
             im,
             dims=("c", "y", "x"),
-            transformations={"pixels": Identity(), "microns": microns_to_pixels.inverse()},
+            transformations={"pixels": Identity()},
+            # transformations={"pixels": Identity(), "microns": microns_to_pixels.inverse()},
             c_coords=stainings,
         )
         images[f"z{z}"] = parsed_im
 
     ### Transcripts
-    transcript_df = read_csv(path / MerfishKeys.TRANSCRIPTS_FILE)
+    transcript_df = dd.read_csv(path / MerfishKeys.TRANSCRIPTS_FILE)
     transcripts = PointsModel.parse(
         transcript_df,
         coordinates={"x": MerfishKeys.GLOBAL_X, "y": MerfishKeys.GLOBAL_Y, "z": MerfishKeys.GLOBAL_Z},
-        transformations={"microns": Identity(), "pixels": microns_to_pixels},
+        transformations={"pixels": Identity()},
+        # transformations={"microns": Identity(), "pixels": microns_to_pixels},
     )
-    points = {"transcripts": transcripts}
+    # points = {"transcripts": transcripts}
+    points = {}
+    gene_categorical = dd.from_pandas(transcripts['gene'].compute().astype('category'), npartitions=transcripts.npartitions).reset_index(drop=True)
+    transcripts['gene'] = gene_categorical
+
+    # split the transcripts into the different z-levels
+    z = transcripts['z'].compute()
+    z_levels = z.value_counts().index
+    z_levels = sorted(z_levels, key=lambda x: int(x))
+    for z_level in z_levels:
+        transcripts_subset = transcripts[z == z_level]
+        # temporary solution until the 3D support is better developed
+        transcripts_subset = transcripts_subset.drop('z', axis=1)
+        points[f"transcripts_z{int(z_level)}"] = transcripts_subset
 
     ### Polygons
     geo_df = geopandas.read_parquet(boundaries_path)
     geo_df = geo_df.rename_geometry("geometry")
     geo_df.index = geo_df[MerfishKeys.INSTANCE_KEY].astype(str)
 
-    polygons = ShapesModel.parse(geo_df, transformations={"microns": Identity(), "pixels": microns_to_pixels})
+    polygons = ShapesModel.parse(geo_df, transformations={"pixels": microns_to_pixels})
+    # polygons = ShapesModel.parse(geo_df, transformations={"microns": Identity(), "pixels": microns_to_pixels})
     shapes = {"polygons": polygons}
 
     ### Table

From ca89463e4691429872ba32b2cc3190b3f9f09b89 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Fri, 21 Apr 2023 18:50:16 +0000
Subject: [PATCH 06/17] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 src/spatialdata_io/readers/merfish.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/src/spatialdata_io/readers/merfish.py b/src/spatialdata_io/readers/merfish.py
index 5d8868d9..21c69485 100644
--- a/src/spatialdata_io/readers/merfish.py
+++ b/src/spatialdata_io/readers/merfish.py
@@ -3,11 +3,11 @@
 from typing import Optional, Union
 
 import anndata
+import dask.dataframe as dd
 import geopandas
 import numpy as np
 import pandas as pd
 from dask import array as da
-import dask.dataframe as dd
 from dask_image.imread import imread
 from spatialdata import SpatialData
 from spatialdata.models import Image2DModel, PointsModel, ShapesModel, TableModel
@@ -102,17 +102,19 @@ def merfish(path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict]
     )
     # points = {"transcripts": transcripts}
     points = {}
-    gene_categorical = dd.from_pandas(transcripts['gene'].compute().astype('category'), npartitions=transcripts.npartitions).reset_index(drop=True)
-    transcripts['gene'] = gene_categorical
+    gene_categorical = dd.from_pandas(
+        transcripts["gene"].compute().astype("category"), npartitions=transcripts.npartitions
+    ).reset_index(drop=True)
+    transcripts["gene"] = gene_categorical
 
     # split the transcripts into the different z-levels
-    z = transcripts['z'].compute()
+    z = transcripts["z"].compute()
     z_levels = z.value_counts().index
     z_levels = sorted(z_levels, key=lambda x: int(x))
     for z_level in z_levels:
         transcripts_subset = transcripts[z == z_level]
         # temporary solution until the 3D support is better developed
-        transcripts_subset = transcripts_subset.drop('z', axis=1)
+        transcripts_subset = transcripts_subset.drop("z", axis=1)
         points[f"transcripts_z{int(z_level)}"] = transcripts_subset
 
     ### Polygons

From 663a4066753213134b4ac31d6e16f2ea331fdd58 Mon Sep 17 00:00:00 2001
From: Blampey Quentin <quentin.blampey@centralesupelec.fr>
Date: Thu, 11 May 2023 14:48:26 +0200
Subject: [PATCH 07/17] merfish reader support dict paths inputs

---
 src/spatialdata_io/_constants/_constants.py | 4 ++++
 src/spatialdata_io/readers/merfish.py       | 9 +++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py
index 8ec28f30..c9bb3e9b 100644
--- a/src/spatialdata_io/_constants/_constants.py
+++ b/src/spatialdata_io/_constants/_constants.py
@@ -138,6 +138,9 @@ class MerfishKeys(ModeEnum):
     # VPT default outputs
     CELLPOSE_BOUNDARIES = "cellpose_micron_space.parquet"
     WATERSHED_BOUNDARIES = "watershed_micron_space.parquet"
+    VPT_NAME_COUNTS = "cell_by_gene"
+    VPT_NAME_OBS = "cell_metadata"
+    VPT_NAME_BOUNDARIES = "cell_boundaries"
 
     # metadata
     INSTANCE_KEY = "EntityID"
@@ -147,3 +150,4 @@ class MerfishKeys(ModeEnum):
     GLOBAL_X = "global_x"
     GLOBAL_Y = "global_y"
     GLOBAL_Z = "global_z"
+    Z_INDEX = "ZIndex"
diff --git a/src/spatialdata_io/readers/merfish.py b/src/spatialdata_io/readers/merfish.py
index 76a6fb1d..f9517898 100644
--- a/src/spatialdata_io/readers/merfish.py
+++ b/src/spatialdata_io/readers/merfish.py
@@ -49,7 +49,11 @@ def _get_file_paths(path: Path, vpt_outputs: Optional[Union[Path, str, dict]]):
         return vpt_outputs / MerfishKeys.COUNTS_FILE, vpt_outputs / MerfishKeys.CELL_METADATA_FILE, valid_boundaries[0]
 
     if isinstance(vpt_outputs, dict):
-        raise NotImplementedError()
+        return (
+            vpt_outputs[MerfishKeys.VPT_NAME_COUNTS],
+            vpt_outputs[MerfishKeys.VPT_NAME_OBS],
+            vpt_outputs[MerfishKeys.VPT_NAME_BOUNDARIES],
+        )
 
     raise ValueError(f"'vpt_outputs' has to be either None, a str, a Path, or a dict. Found type {type(vpt_outputs)}.")
 
@@ -64,7 +68,7 @@ def merfish(path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict]
     path
         Path to the root directory containing the *Merfish* files (e.g., `detected_transcripts.csv`).
     vpt_outputs
-        Optional arguments to indicate the output of the vizgen-postprocessing-tool (VPT), when used. If a folder path is provided, it looks inside the folder for the following files: ``{mf.COUNTS_FILE!r}``, ``{mf.CELL_METADATA_FILE!r}``, and a boundary parquet file. If a dictionnary, then the following keys can be provided: `cell_by_gene`, `cell_metadata`, `boundaries` with the desired path as the value.
+        Optional arguments to indicate the output of the vizgen-postprocessing-tool (VPT), when used. If a folder path is provided, it looks inside the folder for the following files: ``{mf.COUNTS_FILE!r}``, ``{mf.CELL_METADATA_FILE!r}``, and a boundary parquet file. If a dictionnary, then the following keys can be provided: ``{mf.VPT_NAME_COUNTS!r}``, ``{mf.VPT_NAME_OBS!r}``, ``{mf.VPT_NAME_BOUNDARIES!r}`` with the desired path as the value.
 
     Returns
     -------
@@ -103,6 +107,7 @@ def merfish(path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict]
     ### Polygons
     geo_df = geopandas.read_parquet(boundaries_path)
     geo_df = geo_df.rename_geometry("geometry")
+    geo_df = geo_df[geo_df[MerfishKeys.Z_INDEX] == 0]  # Avoid duplicate boundaries on all z-levels
     geo_df.index = geo_df[MerfishKeys.INSTANCE_KEY].astype(str)
 
     polygons = ShapesModel.parse(geo_df, transformations={"microns": Identity(), "pixels": microns_to_pixels})

From 8453fa7065520061b6ba74f7ad223b8b43699760 Mon Sep 17 00:00:00 2001
From: giovp <giov.pll@gmail.com>
Date: Fri, 9 Jun 2023 20:49:27 +0000
Subject: [PATCH 08/17] fix some (but not all) pre-commit checks

---
 src/spatialdata_io/readers/merfish.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/spatialdata_io/readers/merfish.py b/src/spatialdata_io/readers/merfish.py
index 21c69485..fad275eb 100644
--- a/src/spatialdata_io/readers/merfish.py
+++ b/src/spatialdata_io/readers/merfish.py
@@ -77,7 +77,7 @@ def merfish(path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict]
     microns_to_pixels = np.genfromtxt(images_dir / MerfishKeys.TRANSFORMATION_FILE)
     microns_to_pixels = Affine(microns_to_pixels, input_axes=("x", "y"), output_axes=("x", "y"))
 
-    ### Images
+    # Images
     images = {}
 
     stainings, z_levels = _scan_images(images_dir)
@@ -92,7 +92,7 @@ def merfish(path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict]
         )
         images[f"z{z}"] = parsed_im
 
-    ### Transcripts
+    # Transcripts
     transcript_df = dd.read_csv(path / MerfishKeys.TRANSCRIPTS_FILE)
     transcripts = PointsModel.parse(
         transcript_df,
@@ -117,7 +117,7 @@ def merfish(path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict]
         transcripts_subset = transcripts_subset.drop("z", axis=1)
         points[f"transcripts_z{int(z_level)}"] = transcripts_subset
 
-    ### Polygons
+    # Polygons
     geo_df = geopandas.read_parquet(boundaries_path)
     geo_df = geo_df.rename_geometry("geometry")
     geo_df.index = geo_df[MerfishKeys.INSTANCE_KEY].astype(str)
@@ -126,7 +126,7 @@ def merfish(path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict]
     # polygons = ShapesModel.parse(geo_df, transformations={"microns": Identity(), "pixels": microns_to_pixels})
     shapes = {"polygons": polygons}
 
-    ### Table
+    # Table
     data = pd.read_csv(count_path, index_col=0, dtype={MerfishKeys.COUNTS_CELL_KEY: str})
     obs = pd.read_csv(obs_path, index_col=0, dtype={MerfishKeys.INSTANCE_KEY: str})
 

From 7bc0efdd258cd88e8ac4ee8ffd3ea13e8b36ed3e Mon Sep 17 00:00:00 2001
From: Blampey Quentin <quentin.blampey@centralesupelec.fr>
Date: Tue, 13 Jun 2023 14:40:59 +0200
Subject: [PATCH 09/17] merscope: renaming + solving mypy type errors

---
 src/spatialdata_io/__init__.py              |   4 +-
 src/spatialdata_io/_constants/_constants.py |   4 +-
 src/spatialdata_io/readers/merfish.py       | 153 -----------------
 src/spatialdata_io/readers/merscope.py      | 178 ++++++++++++++++++++
 4 files changed, 182 insertions(+), 157 deletions(-)
 delete mode 100644 src/spatialdata_io/readers/merfish.py
 create mode 100644 src/spatialdata_io/readers/merscope.py

diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py
index 5f7d57e2..90a29147 100644
--- a/src/spatialdata_io/__init__.py
+++ b/src/spatialdata_io/__init__.py
@@ -3,7 +3,7 @@
 from spatialdata_io.readers.cosmx import cosmx
 from spatialdata_io.readers.curio import curio
 from spatialdata_io.readers.mcmicro import mcmicro
-from spatialdata_io.readers.merfish import merfish
+from spatialdata_io.readers.merscope import merscope
 from spatialdata_io.readers.steinbock import steinbock
 from spatialdata_io.readers.visium import visium
 from spatialdata_io.readers.xenium import xenium
@@ -15,7 +15,7 @@
     "cosmx",
     "mcmicro",
     "steinbock",
-    "merfish",
+    "merscope",
 ]
 
 __version__ = version("spatialdata-io")
diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py
index f0106a55..13372d9f 100644
--- a/src/spatialdata_io/_constants/_constants.py
+++ b/src/spatialdata_io/_constants/_constants.py
@@ -141,8 +141,8 @@ class McmicroKeys(ModeEnum):
 
 
 @unique
-class MerfishKeys(ModeEnum):
-    """Keys for *MERFISH* data (*MERSCOPE* plateform from Vizgen)"""
+class MerscopeKeys(ModeEnum):
+    """Keys for *MERSCOPE* data (Vizgen plateform)"""
 
     # files and directories
     IMAGES_DIR = "images"
diff --git a/src/spatialdata_io/readers/merfish.py b/src/spatialdata_io/readers/merfish.py
deleted file mode 100644
index 142a47c1..00000000
--- a/src/spatialdata_io/readers/merfish.py
+++ /dev/null
@@ -1,153 +0,0 @@
-import re
-from pathlib import Path
-from typing import Optional, Union
-
-import anndata
-import dask.dataframe as dd
-import geopandas
-import numpy as np
-import pandas as pd
-from dask import array as da
-from dask_image.imread import imread
-from spatialdata import SpatialData
-from spatialdata.models import Image2DModel, PointsModel, ShapesModel, TableModel
-from spatialdata.transformations import Affine, Identity
-
-from spatialdata_io._constants._constants import MerfishKeys
-from spatialdata_io._docs import inject_docs
-
-
-def _scan_images(images_dir: Path) -> tuple[list, list]:
-    """Searches inside the image directory and get all the different channels (stainings) and all the z-levels (usually 0...6)"""
-    exp = r"mosaic_(?P<stain>[\w|-]+[0-9]?)_z(?P<z>[0-9]+).tif"
-    matches = [re.search(exp, file.name) for file in images_dir.iterdir()]
-
-    stainings = {match.group("stain") for match in matches if match}
-    z_levels = {match.group("z") for match in matches if match}
-
-    return list(stainings), list(z_levels)
-
-
-def _get_file_paths(path: Path, vpt_outputs: Optional[Union[Path, str, dict]]):
-    """Gets the file paths to (i) the file of transcript per cell, (ii) the cell metadata file, and (iii) the cell boundary file"""
-    if vpt_outputs is None:
-        return path / MerfishKeys.COUNTS_FILE, path / MerfishKeys.CELL_METADATA_FILE, path / MerfishKeys.BOUNDARIES_FILE
-
-    if isinstance(vpt_outputs, str) or isinstance(vpt_outputs, Path):
-        vpt_outputs = Path(vpt_outputs)
-
-        plausible_boundaries = [
-            vpt_outputs / MerfishKeys.CELLPOSE_BOUNDARIES,
-            vpt_outputs / MerfishKeys.WATERSHED_BOUNDARIES,
-        ]
-        valid_boundaries = [path for path in plausible_boundaries if path.exists()]
-
-        assert (
-            valid_boundaries
-        ), f"Boundary file not found - expected to find one of these files: {', '.join(map(str, plausible_boundaries))}"
-
-        return vpt_outputs / MerfishKeys.COUNTS_FILE, vpt_outputs / MerfishKeys.CELL_METADATA_FILE, valid_boundaries[0]
-
-    if isinstance(vpt_outputs, dict):
-        return (
-            vpt_outputs[MerfishKeys.VPT_NAME_COUNTS],
-            vpt_outputs[MerfishKeys.VPT_NAME_OBS],
-            vpt_outputs[MerfishKeys.VPT_NAME_BOUNDARIES],
-        )
-
-    raise ValueError(f"'vpt_outputs' has to be either None, a str, a Path, or a dict. Found type {type(vpt_outputs)}.")
-
-
-@inject_docs(mf=MerfishKeys)
-def merfish(path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict]]) -> SpatialData:
-    """
-    Read *MERFISH* data from Vizgen.
-
-    Parameters
-    ----------
-    path
-        Path to the root directory containing the *Merfish* files (e.g., `detected_transcripts.csv`).
-    vpt_outputs
-        Optional arguments to indicate the output of the vizgen-postprocessing-tool (VPT), when used. If a folder path is provided, it looks inside the folder for the following files: ``{mf.COUNTS_FILE!r}``, ``{mf.CELL_METADATA_FILE!r}``, and a boundary parquet file. If a dictionnary, then the following keys can be provided: ``{mf.VPT_NAME_COUNTS!r}``, ``{mf.VPT_NAME_OBS!r}``, ``{mf.VPT_NAME_BOUNDARIES!r}`` with the desired path as the value.
-
-    Returns
-    -------
-    :class:`spatialdata.SpatialData`
-    """
-    path = Path(path)
-    count_path, obs_path, boundaries_path = _get_file_paths(path, vpt_outputs)
-    images_dir = path / MerfishKeys.IMAGES_DIR
-
-    microns_to_pixels = np.genfromtxt(images_dir / MerfishKeys.TRANSFORMATION_FILE)
-    microns_to_pixels = Affine(microns_to_pixels, input_axes=("x", "y"), output_axes=("x", "y"))
-
-    # Images
-    images = {}
-
-    stainings, z_levels = _scan_images(images_dir)
-    for z in z_levels:
-        im = da.stack([imread(images_dir / f"mosaic_{stain}_z{z}.tif").squeeze() for stain in stainings], axis=0)
-        parsed_im = Image2DModel.parse(
-            im,
-            dims=("c", "y", "x"),
-            transformations={"pixels": Identity()},
-            # transformations={"pixels": Identity(), "microns": microns_to_pixels.inverse()},
-            c_coords=stainings,
-        )
-        images[f"z{z}"] = parsed_im
-
-    # Transcripts
-    transcript_df = dd.read_csv(path / MerfishKeys.TRANSCRIPTS_FILE)
-    transcripts = PointsModel.parse(
-        transcript_df,
-        coordinates={"x": MerfishKeys.GLOBAL_X, "y": MerfishKeys.GLOBAL_Y, "z": MerfishKeys.GLOBAL_Z},
-        transformations={"pixels": Identity()},
-        # transformations={"microns": Identity(), "pixels": microns_to_pixels},
-    )
-    # points = {"transcripts": transcripts}
-    points = {}
-    gene_categorical = dd.from_pandas(
-        transcripts["gene"].compute().astype("category"), npartitions=transcripts.npartitions
-    ).reset_index(drop=True)
-    transcripts["gene"] = gene_categorical
-
-    # split the transcripts into the different z-levels
-    z = transcripts["z"].compute()
-    z_levels = z.value_counts().index
-    z_levels = sorted(z_levels, key=lambda x: int(x))
-    for z_level in z_levels:
-        transcripts_subset = transcripts[z == z_level]
-        # temporary solution until the 3D support is better developed
-        transcripts_subset = transcripts_subset.drop("z", axis=1)
-        points[f"transcripts_z{int(z_level)}"] = transcripts_subset
-
-    # Polygons
-    geo_df = geopandas.read_parquet(boundaries_path)
-    geo_df = geo_df.rename_geometry("geometry")
-    geo_df = geo_df[geo_df[MerfishKeys.Z_INDEX] == 0]  # Avoid duplicate boundaries on all z-levels
-    geo_df.index = geo_df[MerfishKeys.INSTANCE_KEY].astype(str)
-
-    polygons = ShapesModel.parse(geo_df, transformations={"pixels": microns_to_pixels})
-    # polygons = ShapesModel.parse(geo_df, transformations={"microns": Identity(), "pixels": microns_to_pixels})
-    shapes = {"polygons": polygons}
-
-    # Table
-    data = pd.read_csv(count_path, index_col=0, dtype={MerfishKeys.COUNTS_CELL_KEY: str})
-    obs = pd.read_csv(obs_path, index_col=0, dtype={MerfishKeys.INSTANCE_KEY: str})
-
-    is_gene = ~data.columns.str.lower().str.contains("blank")
-    adata = anndata.AnnData(data.loc[:, is_gene], dtype=data.values.dtype, obs=obs)
-
-    adata.obsm["blank"] = data.loc[:, ~is_gene]  # blank fields are excluded from adata.X
-    adata.obsm["spatial"] = adata.obs[[MerfishKeys.CELL_X, MerfishKeys.CELL_Y]].values
-    adata.obs["region"] = pd.Series(path.stem, index=adata.obs_names, dtype="category")
-    adata.obs[MerfishKeys.INSTANCE_KEY] = adata.obs.index
-
-    table = TableModel.parse(
-        adata,
-        region_key="region",
-        region=adata.obs["region"].cat.categories.tolist(),
-        instance_key=MerfishKeys.INSTANCE_KEY.value,
-    )
-
-    return SpatialData(shapes=shapes, points=points, images=images, table=table)
diff --git a/src/spatialdata_io/readers/merscope.py b/src/spatialdata_io/readers/merscope.py
new file mode 100644
index 00000000..a412d407
--- /dev/null
+++ b/src/spatialdata_io/readers/merscope.py
@@ -0,0 +1,178 @@
+import re
+from pathlib import Path
+from typing import Any, Optional, Union
+
+import anndata
+import dask.dataframe as dd
+import geopandas
+import numpy as np
+import pandas as pd
+from dask import array as da
+from dask_image.imread import imread
+from spatialdata import SpatialData
+from spatialdata.models import Image3DModel, PointsModel, ShapesModel, TableModel
+from spatialdata.transformations import Affine, Identity
+
+from spatialdata_io._constants._constants import MerscopeKeys
+from spatialdata_io._docs import inject_docs
+
+
+def _scan_images(images_dir: Path) -> tuple[list[str], list[str]]:
+    """Searches inside the image directory and get all the different channels (stainings)
+    and all the z-levels (usually 0...6)"""
+    exp = r"mosaic_(?P<stain>[\w|-]+[0-9]?)_z(?P<z>[0-9]+).tif"
+    matches = [re.search(exp, file.name) for file in images_dir.iterdir()]
+
+    stainings = {match.group("stain") for match in matches if match}
+    z_levels = {match.group("z") for match in matches if match}
+
+    return list(stainings), list(z_levels)
+
+
+def _get_file_paths(path: Path, vpt_outputs: Optional[Union[Path, str, dict[str, Any]]]) -> tuple[Path, Path, Path]:
+    """Gets the file paths to (i) the file of transcript per cell,
+    (ii) the cell metadata file, and (iii) the cell boundary file"""
+    if vpt_outputs is None:
+        return (
+            path / MerscopeKeys.COUNTS_FILE,
+            path / MerscopeKeys.CELL_METADATA_FILE,
+            path / MerscopeKeys.BOUNDARIES_FILE,
+        )
+
+    if isinstance(vpt_outputs, str) or isinstance(vpt_outputs, Path):
+        vpt_outputs = Path(vpt_outputs)
+
+        plausible_boundaries = [
+            vpt_outputs / MerscopeKeys.CELLPOSE_BOUNDARIES,
+            vpt_outputs / MerscopeKeys.WATERSHED_BOUNDARIES,
+        ]
+        valid_boundaries = [path for path in plausible_boundaries if path.exists()]
+
+        assert (
+            valid_boundaries
+        ), f"Boundary file not found - expected to find one of these files: {', '.join(map(str, plausible_boundaries))}"
+
+        return (
+            vpt_outputs / MerscopeKeys.COUNTS_FILE,
+            vpt_outputs / MerscopeKeys.CELL_METADATA_FILE,
+            valid_boundaries[0],
+        )
+
+    if isinstance(vpt_outputs, dict):
+        return (
+            vpt_outputs[MerscopeKeys.VPT_NAME_COUNTS],
+            vpt_outputs[MerscopeKeys.VPT_NAME_OBS],
+            vpt_outputs[MerscopeKeys.VPT_NAME_BOUNDARIES],
+        )
+
+    raise ValueError(
+        f"`vpt_outputs` has to be either `None`, `str`, `Path`, or `dict`. Found type {type(vpt_outputs)}."
+    )
+
+
+@inject_docs(ms=MerscopeKeys)
+def merscope(
+    path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict[str, Any]]] = None, read_tif: bool = True
+) -> SpatialData:
+    """
+    Read *MERSCOPE* data from Vizgen.
+
+    This function reads the following files:
+
+        - ``{ms.COUNTS_FILE!r}``: Counts file.
+        - ``{ms.TRANSCRIPTS_FILE!r}``: Transcript file.
+        - ``{ms.CELL_METADATA_FILE!r}``: Per-cell metadata file.
+        - ``{ms.BOUNDARIES_FILE!r}``: Cell polygon boundaries.
+        - `mosaic_**_z*.tif` images inside the ``{ms.IMAGES_DIR!r}`` directory.
+
+    Parameters
+    ----------
+    path
+        Path to the root directory containing the *Merscope* files (e.g., `detected_transcripts.csv`).
+    vpt_outputs
+        Optional arguments to indicate the output of the vizgen-postprocessing-tool (VPT), when used.
+        If a folder path is provided, it looks inside the folder for the following files:
+        ``{ms.COUNTS_FILE!r}``, ``{ms.CELL_METADATA_FILE!r}``, and a boundary parquet file.
+        If a dictionnary, then the following keys can be provided:
+        ``{ms.VPT_NAME_COUNTS!r}``, ``{ms.VPT_NAME_OBS!r}``, ``{ms.VPT_NAME_BOUNDARIES!r}`` with the desired path as the value.
+    read_tif
+        Whether to read the tif images or not
+
+    Returns
+    -------
+    :class:`spatialdata.SpatialData`
+    """
+    path = Path(path)
+    count_path, obs_path, boundaries_path = _get_file_paths(path, vpt_outputs)
+    images_dir = path / MerscopeKeys.IMAGES_DIR
+
+    microns_to_pixels = np.genfromtxt(images_dir / MerscopeKeys.TRANSFORMATION_FILE)
+    microns_to_pixels = Affine(microns_to_pixels, input_axes=("x", "y"), output_axes=("x", "y"))
+
+    # Images
+    images = {}
+
+    if read_tif:
+        stainings, z_levels = _scan_images(images_dir)
+        for z in z_levels:
+            im = da.stack([imread(images_dir / f"mosaic_{stain}_z{z}.tif").squeeze() for stain in stainings], axis=0)
+            parsed_im = Image3DModel.parse(
+                im,
+                dims=("c", "z", "y", "x"),
+                transformations={"pixels": Identity()},
+                c_coords=stainings,
+            )
+            images[f"z{z}"] = parsed_im
+
+    # Transcripts
+    transcript_df = dd.read_csv(path / MerscopeKeys.TRANSCRIPTS_FILE)
+    transcripts = PointsModel.parse(
+        transcript_df,
+        coordinates={"x": MerscopeKeys.GLOBAL_X, "y": MerscopeKeys.GLOBAL_Y, "z": MerscopeKeys.GLOBAL_Z},
+        transformations={"pixels": Identity()},
+    )
+    points = {}
+    gene_categorical = dd.from_pandas(
+        transcripts["gene"].compute().astype("category"), npartitions=transcripts.npartitions
+    ).reset_index(drop=True)
+    transcripts["gene"] = gene_categorical
+
+    # split the transcripts into the different z-levels
+    z = transcripts["z"].compute()
+    z_levels = z.value_counts().index
+    z_levels = sorted(z_levels, key=lambda x: int(x))
+    for z_level in z_levels:
+        transcripts_subset = transcripts[z == z_level]
+        # temporary solution until the 3D support is better developed
+        transcripts_subset = transcripts_subset.drop("z", axis=1)
+        points[f"transcripts_z{int(z_level)}"] = transcripts_subset
+
+    # Polygons
+    geo_df = geopandas.read_parquet(boundaries_path)
+    geo_df = geo_df.rename_geometry("geometry")
+    geo_df = geo_df[geo_df[MerscopeKeys.Z_INDEX] == 0]  # Avoid duplicate boundaries on all z-levels
+    geo_df.index = geo_df[MerscopeKeys.INSTANCE_KEY].astype(str)
+
+    polygons = ShapesModel.parse(geo_df, transformations={"pixels": microns_to_pixels})
+    shapes = {"polygons": polygons}
+
+    # Table
+    data = pd.read_csv(count_path, index_col=0, dtype={MerscopeKeys.COUNTS_CELL_KEY: str})
+    obs = pd.read_csv(obs_path, index_col=0, dtype={MerscopeKeys.INSTANCE_KEY: str})
+
+    is_gene = ~data.columns.str.lower().str.contains("blank")
+    adata = anndata.AnnData(data.loc[:, is_gene], dtype=data.values.dtype, obs=obs)
+
+    adata.obsm["blank"] = data.loc[:, ~is_gene]  # blank fields are excluded from adata.X
+    adata.obsm["spatial"] = adata.obs[[MerscopeKeys.CELL_X, MerscopeKeys.CELL_Y]].values
+    adata.obs["region"] = pd.Series(path.stem, index=adata.obs_names, dtype="category")
+    adata.obs[MerscopeKeys.INSTANCE_KEY] = adata.obs.index
+
+    table = TableModel.parse(
+        adata,
+        region_key="region",
+        region=adata.obs["region"].cat.categories.tolist(),
+        instance_key=MerscopeKeys.INSTANCE_KEY.value,
+    )
+
+    return SpatialData(shapes=shapes, points=points, images=images, table=table)

From 9257b5501db5656be63ec57301560383e0cc601e Mon Sep 17 00:00:00 2001
From: Blampey Quentin <quentin.blampey@centralesupelec.fr>
Date: Tue, 13 Jun 2023 15:09:32 +0200
Subject: [PATCH 10/17] fix flake8 errors

---
 src/spatialdata_io/readers/merscope.py | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/src/spatialdata_io/readers/merscope.py b/src/spatialdata_io/readers/merscope.py
index a412d407..8561e8ed 100644
--- a/src/spatialdata_io/readers/merscope.py
+++ b/src/spatialdata_io/readers/merscope.py
@@ -18,8 +18,11 @@
 
 
 def _scan_images(images_dir: Path) -> tuple[list[str], list[str]]:
-    """Searches inside the image directory and get all the different channels (stainings)
-    and all the z-levels (usually 0...6)"""
+    """
+    Gets images names inside a directory
+
+    It returns all the different channels (stainings) and all the z-levels (usually 0...6)
+    """
     exp = r"mosaic_(?P<stain>[\w|-]+[0-9]?)_z(?P<z>[0-9]+).tif"
     matches = [re.search(exp, file.name) for file in images_dir.iterdir()]
 
@@ -30,8 +33,11 @@ def _scan_images(images_dir: Path) -> tuple[list[str], list[str]]:
 
 
 def _get_file_paths(path: Path, vpt_outputs: Optional[Union[Path, str, dict[str, Any]]]) -> tuple[Path, Path, Path]:
-    """Gets the file paths to (i) the file of transcript per cell,
-    (ii) the cell metadata file, and (iii) the cell boundary file"""
+    """
+    Gets the MERSCOPE file paths when vpt_outputs is provided
+
+    That is, (i) the file of transcript per cell, (ii) the cell metadata file, and (iii) the cell boundary file
+    """
     if vpt_outputs is None:
         return (
             path / MerscopeKeys.COUNTS_FILE,
@@ -114,15 +120,17 @@ def merscope(
 
     if read_tif:
         stainings, z_levels = _scan_images(images_dir)
-        for z in z_levels:
-            im = da.stack([imread(images_dir / f"mosaic_{stain}_z{z}.tif").squeeze() for stain in stainings], axis=0)
+        for z_level in z_levels:
+            im = da.stack(
+                [imread(images_dir / f"mosaic_{stain}_z{z_level}.tif").squeeze() for stain in stainings], axis=0
+            )
             parsed_im = Image3DModel.parse(
                 im,
                 dims=("c", "z", "y", "x"),
                 transformations={"pixels": Identity()},
                 c_coords=stainings,
             )
-            images[f"z{z}"] = parsed_im
+            images[f"z{z_level}"] = parsed_im
 
     # Transcripts
     transcript_df = dd.read_csv(path / MerscopeKeys.TRANSCRIPTS_FILE)

From ca7ae968ecef823421e48932a9a90f9debdb5af1 Mon Sep 17 00:00:00 2001
From: Blampey Quentin <quentin.blampey@centralesupelec.fr>
Date: Wed, 14 Jun 2023 10:39:26 +0200
Subject: [PATCH 11/17] z-layers argument + consider only 2D stacks

---
 src/spatialdata_io/readers/merscope.py | 60 +++++++++++---------------
 1 file changed, 24 insertions(+), 36 deletions(-)

diff --git a/src/spatialdata_io/readers/merscope.py b/src/spatialdata_io/readers/merscope.py
index 8561e8ed..7d4dfa5c 100644
--- a/src/spatialdata_io/readers/merscope.py
+++ b/src/spatialdata_io/readers/merscope.py
@@ -10,26 +10,20 @@
 from dask import array as da
 from dask_image.imread import imread
 from spatialdata import SpatialData
-from spatialdata.models import Image3DModel, PointsModel, ShapesModel, TableModel
+from spatialdata.models import Image2DModel, PointsModel, ShapesModel, TableModel
 from spatialdata.transformations import Affine, Identity
 
 from spatialdata_io._constants._constants import MerscopeKeys
 from spatialdata_io._docs import inject_docs
 
 
-def _scan_images(images_dir: Path) -> tuple[list[str], list[str]]:
-    """
-    Gets images names inside a directory
-
-    It returns all the different channels (stainings) and all the z-levels (usually 0...6)
-    """
+def _get_channel_names(images_dir: Path) -> list[str]:
     exp = r"mosaic_(?P<stain>[\w|-]+[0-9]?)_z(?P<z>[0-9]+).tif"
     matches = [re.search(exp, file.name) for file in images_dir.iterdir()]
 
     stainings = {match.group("stain") for match in matches if match}
-    z_levels = {match.group("z") for match in matches if match}
 
-    return list(stainings), list(z_levels)
+    return list(stainings)
 
 
 def _get_file_paths(path: Path, vpt_outputs: Optional[Union[Path, str, dict[str, Any]]]) -> tuple[Path, Path, Path]:
@@ -78,7 +72,9 @@ def _get_file_paths(path: Path, vpt_outputs: Optional[Union[Path, str, dict[str,
 
 @inject_docs(ms=MerscopeKeys)
 def merscope(
-    path: Union[str, Path], vpt_outputs: Optional[Union[Path, str, dict[str, Any]]] = None, read_tif: bool = True
+    path: str | Path,
+    vpt_outputs: Optional[Path | str | dict[str, Any]] = None,
+    z_layers: int | list[int] | None = 3,
 ) -> SpatialData:
     """
     Read *MERSCOPE* data from Vizgen.
@@ -101,8 +97,9 @@ def merscope(
         ``{ms.COUNTS_FILE!r}``, ``{ms.CELL_METADATA_FILE!r}``, and a boundary parquet file.
         If a dictionnary, then the following keys can be provided:
         ``{ms.VPT_NAME_COUNTS!r}``, ``{ms.VPT_NAME_OBS!r}``, ``{ms.VPT_NAME_BOUNDARIES!r}`` with the desired path as the value.
-    read_tif
-        Whether to read the tif images or not
+    z_layers
+        Indices of the z-layers to consider. Either one `int` index, or a list of `int` indices. If `None`, then no image is loaded.
+        By default, only the middle layer is considered (that is, layer 3).
 
     Returns
     -------
@@ -118,42 +115,32 @@ def merscope(
     # Images
     images = {}
 
-    if read_tif:
-        stainings, z_levels = _scan_images(images_dir)
-        for z_level in z_levels:
-            im = da.stack(
-                [imread(images_dir / f"mosaic_{stain}_z{z_level}.tif").squeeze() for stain in stainings], axis=0
-            )
-            parsed_im = Image3DModel.parse(
-                im,
-                dims=("c", "z", "y", "x"),
-                transformations={"pixels": Identity()},
-                c_coords=stainings,
-            )
-            images[f"z{z_level}"] = parsed_im
+    z_layers = [z_layers] if isinstance(z_layers, int) else z_layers or []
+
+    stainings = _get_channel_names(images_dir)
+    for z_layer in z_layers:
+        im = da.stack([imread(images_dir / f"mosaic_{stain}_z{z_layer}.tif").squeeze() for stain in stainings], axis=0)
+        parsed_im = Image2DModel.parse(
+            im,
+            dims=("c", "y", "x"),
+            transformations={"pixels": Identity()},
+            c_coords=stainings,
+        )
+        images[f"z{z_layer}"] = parsed_im
 
     # Transcripts
     transcript_df = dd.read_csv(path / MerscopeKeys.TRANSCRIPTS_FILE)
     transcripts = PointsModel.parse(
         transcript_df,
-        coordinates={"x": MerscopeKeys.GLOBAL_X, "y": MerscopeKeys.GLOBAL_Y, "z": MerscopeKeys.GLOBAL_Z},
+        coordinates={"x": MerscopeKeys.GLOBAL_X, "y": MerscopeKeys.GLOBAL_Y},
         transformations={"pixels": Identity()},
     )
-    points = {}
     gene_categorical = dd.from_pandas(
         transcripts["gene"].compute().astype("category"), npartitions=transcripts.npartitions
     ).reset_index(drop=True)
     transcripts["gene"] = gene_categorical
 
-    # split the transcripts into the different z-levels
-    z = transcripts["z"].compute()
-    z_levels = z.value_counts().index
-    z_levels = sorted(z_levels, key=lambda x: int(x))
-    for z_level in z_levels:
-        transcripts_subset = transcripts[z == z_level]
-        # temporary solution until the 3D support is better developed
-        transcripts_subset = transcripts_subset.drop("z", axis=1)
-        points[f"transcripts_z{int(z_level)}"] = transcripts_subset
+    points = {"transcripts": transcripts}
 
     # Polygons
     geo_df = geopandas.read_parquet(boundaries_path)
@@ -162,6 +149,7 @@ def merscope(
     geo_df.index = geo_df[MerscopeKeys.INSTANCE_KEY].astype(str)
 
     polygons = ShapesModel.parse(geo_df, transformations={"pixels": microns_to_pixels})
+
     shapes = {"polygons": polygons}
 
     # Table

From 2452a792005117ef0d0dc5ba7a9a619775576695 Mon Sep 17 00:00:00 2001
From: Blampey Quentin <quentin.blampey@centralesupelec.fr>
Date: Wed, 14 Jun 2023 10:49:26 +0200
Subject: [PATCH 12/17] back to py3.9 Union typing syntax

---
 src/spatialdata_io/readers/merscope.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/spatialdata_io/readers/merscope.py b/src/spatialdata_io/readers/merscope.py
index 7d4dfa5c..fc82c8df 100644
--- a/src/spatialdata_io/readers/merscope.py
+++ b/src/spatialdata_io/readers/merscope.py
@@ -72,9 +72,9 @@ def _get_file_paths(path: Path, vpt_outputs: Optional[Union[Path, str, dict[str,
 
 @inject_docs(ms=MerscopeKeys)
 def merscope(
-    path: str | Path,
-    vpt_outputs: Optional[Path | str | dict[str, Any]] = None,
-    z_layers: int | list[int] | None = 3,
+    path: Union[str, Path],
+    vpt_outputs: Optional[Union[Path, str, dict[str, Any]]] = None,
+    z_layers: Union[int, list[int], None] = 3,
 ) -> SpatialData:
     """
     Read *MERSCOPE* data from Vizgen.

From 0226e53c16679a23ce267d01ac3e7e0ed0a9501e Mon Sep 17 00:00:00 2001
From: Blampey Quentin <quentin.blampey@centralesupelec.fr>
Date: Mon, 19 Jun 2023 17:29:28 +0200
Subject: [PATCH 13/17] infer the dataset id using the directory path

---
 src/spatialdata_io/readers/merscope.py | 44 ++++++++++++++++++--------
 1 file changed, 31 insertions(+), 13 deletions(-)

diff --git a/src/spatialdata_io/readers/merscope.py b/src/spatialdata_io/readers/merscope.py
index fc82c8df..2f3731f1 100644
--- a/src/spatialdata_io/readers/merscope.py
+++ b/src/spatialdata_io/readers/merscope.py
@@ -1,6 +1,8 @@
+from __future__ import annotations
+
 import re
 from pathlib import Path
-from typing import Any, Optional, Union
+from typing import Any
 
 import anndata
 import dask.dataframe as dd
@@ -26,7 +28,7 @@ def _get_channel_names(images_dir: Path) -> list[str]:
     return list(stainings)
 
 
-def _get_file_paths(path: Path, vpt_outputs: Optional[Union[Path, str, dict[str, Any]]]) -> tuple[Path, Path, Path]:
+def _get_file_paths(path: Path, vpt_outputs: Path | str | dict[str, Any] | None) -> tuple[Path, Path, Path]:
     """
     Gets the MERSCOPE file paths when vpt_outputs is provided
 
@@ -72,9 +74,11 @@ def _get_file_paths(path: Path, vpt_outputs: Optional[Union[Path, str, dict[str,
 
 @inject_docs(ms=MerscopeKeys)
 def merscope(
-    path: Union[str, Path],
-    vpt_outputs: Optional[Union[Path, str, dict[str, Any]]] = None,
-    z_layers: Union[int, list[int], None] = 3,
+    path: str | Path,
+    vpt_outputs: Path | str | dict[str, Any] | None = None,
+    z_layers: int | list[int] | None = 3,
+    region_name: str | None = None,
+    slide_name: str | None = None,
 ) -> SpatialData:
     """
     Read *MERSCOPE* data from Vizgen.
@@ -90,16 +94,24 @@ def merscope(
     Parameters
     ----------
     path
-        Path to the root directory containing the *Merscope* files (e.g., `detected_transcripts.csv`).
+        Path to the region/root directory containing the *Merscope* files (e.g., `detected_transcripts.csv`).
     vpt_outputs
         Optional arguments to indicate the output of the vizgen-postprocessing-tool (VPT), when used.
         If a folder path is provided, it looks inside the folder for the following files:
-        ``{ms.COUNTS_FILE!r}``, ``{ms.CELL_METADATA_FILE!r}``, and a boundary parquet file.
-        If a dictionnary, then the following keys can be provided:
-        ``{ms.VPT_NAME_COUNTS!r}``, ``{ms.VPT_NAME_OBS!r}``, ``{ms.VPT_NAME_BOUNDARIES!r}`` with the desired path as the value.
+                - ``{ms.COUNTS_FILE!r}``
+                - ``{ms.CELL_METADATA_FILE!r}``
+                - ``{ms.BOUNDARIES_FILE!r}``
+        If a dictionnary, then the following keys should be provided with the desired path:
+                - ``{ms.VPT_NAME_COUNTS!r}``
+                - ``{ms.VPT_NAME_OBS!r}``
+                - ``{ms.VPT_NAME_BOUNDARIES!r}``
     z_layers
         Indices of the z-layers to consider. Either one `int` index, or a list of `int` indices. If `None`, then no image is loaded.
         By default, only the middle layer is considered (that is, layer 3).
+    region_name
+        Name of the region of interest, e.g., `'region_0'`. If `None` then the name of the `path` directory is used.
+    slide_name
+        Name of the slide/run. If `None` then the name of the parent directory of `path` is used (whose name starts with a date).
 
     Returns
     -------
@@ -112,6 +124,10 @@ def merscope(
     microns_to_pixels = np.genfromtxt(images_dir / MerscopeKeys.TRANSFORMATION_FILE)
     microns_to_pixels = Affine(microns_to_pixels, input_axes=("x", "y"), output_axes=("x", "y"))
 
+    region_name = path.name if region_name is None else region_name
+    slide_name = path.parent.name if slide_name is None else slide_name
+    dataset_id = f"{slide_name}_{region_name}"
+
     # Images
     images = {}
 
@@ -126,7 +142,7 @@ def merscope(
             transformations={"pixels": Identity()},
             c_coords=stainings,
         )
-        images[f"z{z_layer}"] = parsed_im
+        images[f"{dataset_id}_z{z_layer}"] = parsed_im
 
     # Transcripts
     transcript_df = dd.read_csv(path / MerscopeKeys.TRANSCRIPTS_FILE)
@@ -140,7 +156,7 @@ def merscope(
     ).reset_index(drop=True)
     transcripts["gene"] = gene_categorical
 
-    points = {"transcripts": transcripts}
+    points = {f"{dataset_id}_transcripts": transcripts}
 
     # Polygons
     geo_df = geopandas.read_parquet(boundaries_path)
@@ -150,7 +166,7 @@ def merscope(
 
     polygons = ShapesModel.parse(geo_df, transformations={"pixels": microns_to_pixels})
 
-    shapes = {"polygons": polygons}
+    shapes = {f"{dataset_id}_polygons": polygons}
 
     # Table
     data = pd.read_csv(count_path, index_col=0, dtype={MerscopeKeys.COUNTS_CELL_KEY: str})
@@ -161,7 +177,9 @@ def merscope(
 
     adata.obsm["blank"] = data.loc[:, ~is_gene]  # blank fields are excluded from adata.X
     adata.obsm["spatial"] = adata.obs[[MerscopeKeys.CELL_X, MerscopeKeys.CELL_Y]].values
-    adata.obs["region"] = pd.Series(path.stem, index=adata.obs_names, dtype="category")
+    adata.obs["region"] = pd.Series(region_name, index=adata.obs_names, dtype="category")
+    adata.obs["slide"] = pd.Series(slide_name, index=adata.obs_names, dtype="category")
+    adata.obs["dataset_id"] = pd.Series(dataset_id, index=adata.obs_names, dtype="category")
     adata.obs[MerscopeKeys.INSTANCE_KEY] = adata.obs.index
 
     table = TableModel.parse(

From 71f075a1e26cf1406525fa6c365d049e6175a811 Mon Sep 17 00:00:00 2001
From: Blampey Quentin <quentin.blampey@centralesupelec.fr>
Date: Mon, 19 Jun 2023 17:34:16 +0200
Subject: [PATCH 14/17] fix flake8 unindent warning

---
 src/spatialdata_io/readers/merscope.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/src/spatialdata_io/readers/merscope.py b/src/spatialdata_io/readers/merscope.py
index 2f3731f1..1ff2c736 100644
--- a/src/spatialdata_io/readers/merscope.py
+++ b/src/spatialdata_io/readers/merscope.py
@@ -98,13 +98,16 @@ def merscope(
     vpt_outputs
         Optional arguments to indicate the output of the vizgen-postprocessing-tool (VPT), when used.
         If a folder path is provided, it looks inside the folder for the following files:
-                - ``{ms.COUNTS_FILE!r}``
-                - ``{ms.CELL_METADATA_FILE!r}``
-                - ``{ms.BOUNDARIES_FILE!r}``
+
+            - ``{ms.COUNTS_FILE!r}``
+            - ``{ms.CELL_METADATA_FILE!r}``
+            - ``{ms.BOUNDARIES_FILE!r}``
+
         If a dictionnary, then the following keys should be provided with the desired path:
-                - ``{ms.VPT_NAME_COUNTS!r}``
-                - ``{ms.VPT_NAME_OBS!r}``
-                - ``{ms.VPT_NAME_BOUNDARIES!r}``
+
+            - ``{ms.VPT_NAME_COUNTS!r}``
+            - ``{ms.VPT_NAME_OBS!r}``
+            - ``{ms.VPT_NAME_BOUNDARIES!r}``
     z_layers
         Indices of the z-layers to consider. Either one `int` index, or a list of `int` indices. If `None`, then no image is loaded.
         By default, only the middle layer is considered (that is, layer 3).

From c488ccb5b04eaf3a51b0262a28d827e482f978d0 Mon Sep 17 00:00:00 2001
From: Blampey Quentin <quentin.blampey@centralesupelec.fr>
Date: Tue, 20 Jun 2023 10:15:36 +0200
Subject: [PATCH 15/17] adding the merscope reader to the docs API

---
 README.md   | 2 +-
 docs/api.md | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 441f05b4..d2cbc080 100644
--- a/README.md
+++ b/README.md
@@ -17,10 +17,10 @@ This package contains reader functions to load common spatial omics formats into
 -   10x Genomics Visium
 -   10x Genomics Xenium
 -   Curio Seeker
+-   Vizgen MERSCOPE (MERFISH)
 
 Coming soon:
 
--   Vizgen MERSCOPE (MERFISH)
 -   Spatial Genomics seqFISH
 -   Akoya PhenoCycler (formerly CODEX)
 
diff --git a/docs/api.md b/docs/api.md
index 63c1e0cf..d39ebd07 100644
--- a/docs/api.md
+++ b/docs/api.md
@@ -17,5 +17,6 @@ I/O for the `spatialdata` project.
     visium
     xenium
     steinbock
+    merscope
     mcmicro
 ```

From 0c9158098d04cc142a7f549b78089747928d4862 Mon Sep 17 00:00:00 2001
From: Blampey Quentin <quentin.blampey@centralesupelec.fr>
Date: Mon, 31 Jul 2023 10:02:54 +0200
Subject: [PATCH 16/17] merscope: handle big image + fix TableModel region

---
 src/spatialdata_io/_constants/_constants.py |  3 +-
 src/spatialdata_io/readers/merscope.py      | 52 +++++++++++++++------
 2 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py
index e540e324..c572fced 100644
--- a/src/spatialdata_io/_constants/_constants.py
+++ b/src/spatialdata_io/_constants/_constants.py
@@ -174,7 +174,7 @@ class MerscopeKeys(ModeEnum):
     VPT_NAME_BOUNDARIES = "cell_boundaries"
 
     # metadata
-    INSTANCE_KEY = "EntityID"
+    METADATA_CELL_KEY = "EntityID"
     COUNTS_CELL_KEY = "cell"
     CELL_X = "center_x"
     CELL_Y = "center_y"
@@ -182,3 +182,4 @@ class MerscopeKeys(ModeEnum):
     GLOBAL_Y = "global_y"
     GLOBAL_Z = "global_z"
     Z_INDEX = "ZIndex"
+    REGION_KEY = "cells_region"
diff --git a/src/spatialdata_io/readers/merscope.py b/src/spatialdata_io/readers/merscope.py
index da347f9e..0487d5d9 100644
--- a/src/spatialdata_io/readers/merscope.py
+++ b/src/spatialdata_io/readers/merscope.py
@@ -1,7 +1,9 @@
 from __future__ import annotations
 
 import re
+from collections.abc import Mapping
 from pathlib import Path
+from types import MappingProxyType
 from typing import Any
 
 import anndata
@@ -79,6 +81,8 @@ def merscope(
     z_layers: int | list[int] | None = 3,
     region_name: str | None = None,
     slide_name: str | None = None,
+    imread_kwargs: Mapping[str, Any] = MappingProxyType({}),
+    image_models_kwargs: Mapping[str, Any] = MappingProxyType({}),
 ) -> SpatialData:
     """
     Read *MERSCOPE* data from Vizgen.
@@ -115,11 +119,26 @@ def merscope(
         Name of the region of interest, e.g., `'region_0'`. If `None` then the name of the `path` directory is used.
     slide_name
         Name of the slide/run. If `None` then the name of the parent directory of `path` is used (whose name starts with a date).
+    imread_kwargs
+        Keyword arguments to pass to the image reader.
+    image_models_kwargs
+        Keyword arguments to pass to the image models.
 
     Returns
     -------
     :class:`spatialdata.SpatialData`
     """
+    if "chunks" not in image_models_kwargs:
+        if isinstance(image_models_kwargs, MappingProxyType):
+            image_models_kwargs = {}
+        assert isinstance(image_models_kwargs, dict)
+        image_models_kwargs["chunks"] = (1, 4096, 4096)
+    if "scale_factors" not in image_models_kwargs:
+        if isinstance(image_models_kwargs, MappingProxyType):
+            image_models_kwargs = {}
+        assert isinstance(image_models_kwargs, dict)
+        image_models_kwargs["scale_factors"] = [2, 2, 2, 2]
+
     path = Path(path)
     count_path, obs_path, boundaries_path = _get_file_paths(path, vpt_outputs)
     images_dir = path / MerscopeKeys.IMAGES_DIR
@@ -127,9 +146,10 @@ def merscope(
     microns_to_pixels = np.genfromtxt(images_dir / MerscopeKeys.TRANSFORMATION_FILE)
     microns_to_pixels = Affine(microns_to_pixels, input_axes=("x", "y"), output_axes=("x", "y"))
 
-    region_name = path.name if region_name is None else region_name
+    vizgen_region = path.name if region_name is None else region_name
     slide_name = path.parent.name if slide_name is None else slide_name
-    dataset_id = f"{slide_name}_{region_name}"
+    dataset_id = f"{slide_name}_{vizgen_region}"
+    region = f"{dataset_id}_polygons"
 
     # Images
     images = {}
@@ -138,12 +158,16 @@ def merscope(
 
     stainings = _get_channel_names(images_dir)
     for z_layer in z_layers:
-        im = da.stack([imread(images_dir / f"mosaic_{stain}_z{z_layer}.tif").squeeze() for stain in stainings], axis=0)
+        im = da.stack(
+            [imread(images_dir / f"mosaic_{stain}_z{z_layer}.tif", **imread_kwargs).squeeze() for stain in stainings],
+            axis=0,
+        )
         parsed_im = Image2DModel.parse(
             im,
             dims=("c", "y", "x"),
-            transformations={"pixels": Identity()},
+            transformations={"microns": microns_to_pixels.inverse()},
             c_coords=stainings,
+            **image_models_kwargs,
         )
         images[f"{dataset_id}_z{z_layer}"] = parsed_im
 
@@ -152,7 +176,7 @@ def merscope(
     transcripts = PointsModel.parse(
         transcript_df,
         coordinates={"x": MerscopeKeys.GLOBAL_X, "y": MerscopeKeys.GLOBAL_Y},
-        transformations={"pixels": microns_to_pixels},
+        transformations={"microns": Identity()},
     )
     categories = transcripts["gene"].compute().astype("category")
     gene_categorical = dd.from_pandas(categories, npartitions=transcripts.npartitions).reset_index(drop=True)
@@ -164,31 +188,33 @@ def merscope(
     geo_df = geopandas.read_parquet(boundaries_path)
     geo_df = geo_df.rename_geometry("geometry")
     geo_df = geo_df[geo_df[MerscopeKeys.Z_INDEX] == 0]  # Avoid duplicate boundaries on all z-levels
-    geo_df.index = geo_df[MerscopeKeys.INSTANCE_KEY].astype(str)
+    geo_df.geometry = geo_df.geometry.map(lambda x: x.geoms[0])  # The MultiPolygons contain only one polygon
+    geo_df.index = geo_df[MerscopeKeys.METADATA_CELL_KEY].astype(str)
 
-    polygons = ShapesModel.parse(geo_df, transformations={"pixels": microns_to_pixels})
+    polygons = ShapesModel.parse(geo_df, transformations={"microns": Identity()})
 
     shapes = {f"{dataset_id}_polygons": polygons}
 
     # Table
     data = pd.read_csv(count_path, index_col=0, dtype={MerscopeKeys.COUNTS_CELL_KEY: str})
-    obs = pd.read_csv(obs_path, index_col=0, dtype={MerscopeKeys.INSTANCE_KEY: str})
+    obs = pd.read_csv(obs_path, index_col=0, dtype={MerscopeKeys.METADATA_CELL_KEY: str})
 
     is_gene = ~data.columns.str.lower().str.contains("blank")
     adata = anndata.AnnData(data.loc[:, is_gene], dtype=data.values.dtype, obs=obs)
 
     adata.obsm["blank"] = data.loc[:, ~is_gene]  # blank fields are excluded from adata.X
     adata.obsm["spatial"] = adata.obs[[MerscopeKeys.CELL_X, MerscopeKeys.CELL_Y]].values
-    adata.obs["region"] = pd.Series(region_name, index=adata.obs_names, dtype="category")
+    adata.obs["region"] = pd.Series(vizgen_region, index=adata.obs_names, dtype="category")
     adata.obs["slide"] = pd.Series(slide_name, index=adata.obs_names, dtype="category")
     adata.obs["dataset_id"] = pd.Series(dataset_id, index=adata.obs_names, dtype="category")
-    adata.obs[MerscopeKeys.INSTANCE_KEY] = adata.obs.index
+    adata.obs[MerscopeKeys.REGION_KEY] = pd.Series(region, index=adata.obs_names, dtype="category")
+    adata.obs[MerscopeKeys.METADATA_CELL_KEY] = adata.obs.index
 
     table = TableModel.parse(
         adata,
-        region_key="region",
-        region=adata.obs["region"].cat.categories.tolist(),
-        instance_key=MerscopeKeys.INSTANCE_KEY.value,
+        region_key=MerscopeKeys.REGION_KEY.value,
+        region=region,
+        instance_key=MerscopeKeys.METADATA_CELL_KEY.value,
     )
 
     return SpatialData(shapes=shapes, points=points, images=images, table=table)

From 137fe8a55868da8c200cd158c0dc2382712482f3 Mon Sep 17 00:00:00 2001
From: Blampey Quentin <quentin.blampey@centralesupelec.fr>
Date: Mon, 31 Jul 2023 10:08:49 +0200
Subject: [PATCH 17/17] microns_to_pixel: in one line for mypy

---
 src/spatialdata_io/readers/merscope.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/spatialdata_io/readers/merscope.py b/src/spatialdata_io/readers/merscope.py
index 0487d5d9..686dcf7c 100644
--- a/src/spatialdata_io/readers/merscope.py
+++ b/src/spatialdata_io/readers/merscope.py
@@ -143,8 +143,9 @@ def merscope(
     count_path, obs_path, boundaries_path = _get_file_paths(path, vpt_outputs)
     images_dir = path / MerscopeKeys.IMAGES_DIR
 
-    microns_to_pixels = np.genfromtxt(images_dir / MerscopeKeys.TRANSFORMATION_FILE)
-    microns_to_pixels = Affine(microns_to_pixels, input_axes=("x", "y"), output_axes=("x", "y"))
+    microns_to_pixels = Affine(
+        np.genfromtxt(images_dir / MerscopeKeys.TRANSFORMATION_FILE), input_axes=("x", "y"), output_axes=("x", "y")
+    )
 
     vizgen_region = path.name if region_name is None else region_name
     slide_name = path.parent.name if slide_name is None else slide_name