From d6a0b8124753be4f9d0d4d7ef6955dc9d2a527d5 Mon Sep 17 00:00:00 2001 From: Laurens Lehner Date: Tue, 23 May 2023 16:23:20 +0200 Subject: [PATCH 1/4] Add initial reader --- src/spatialdata_io/__init__.py | 2 + src/spatialdata_io/_constants/_constants.py | 15 ++++ src/spatialdata_io/readers/curio.py | 86 +++++++++++++++++++++ 3 files changed, 103 insertions(+) create mode 100644 src/spatialdata_io/readers/curio.py diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index 297c4435..aca22eb0 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -1,5 +1,6 @@ from importlib.metadata import version +from spatialdata_io.readers.curio import curio from spatialdata_io.readers.cosmx import cosmx from spatialdata_io.readers.mcmicro import mcmicro from spatialdata_io.readers.steinbock import steinbock @@ -7,6 +8,7 @@ from spatialdata_io.readers.xenium import xenium __all__ = [ + "curio", "visium", "xenium", "cosmx", diff --git a/src/spatialdata_io/_constants/_constants.py b/src/spatialdata_io/_constants/_constants.py index 9f3237c5..0a275fb3 100644 --- a/src/spatialdata_io/_constants/_constants.py +++ b/src/spatialdata_io/_constants/_constants.py @@ -3,6 +3,21 @@ from spatialdata_io._constants._enum import ModeEnum +@unique +class CurioKeys(ModeEnum): + """Keys for *Curio* formatted dataset.""" + + # files and directories + ANNDATA_FILE = ".h5ad" + CLUSTER_ASSIGNMENT = "cluster_assignment.txt" + METRICS_FILE = "Metrics.csv" + VAR_FEATURES_CLUSTERS = "variable_features_clusters.txt" + VAR_FEATURES_MORANSI = "variable_features_moransi.txt" + # metadata + CATEGORY = "Category" + TOP_CLUSTER_DEFINING_FEATURES = "Top_cluster_defining_features" + + @unique class CosmxKeys(ModeEnum): """Keys for *Nanostring Cosmx* formatted dataset.""" diff --git a/src/spatialdata_io/readers/curio.py b/src/spatialdata_io/readers/curio.py new file mode 100644 index 00000000..fa6f2a85 --- /dev/null +++ b/src/spatialdata_io/readers/curio.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +from collections.abc import Mapping +from pathlib import Path +from types import MappingProxyType +from typing import Optional + +import anndata as ad +import pandas as pd +from spatialdata import SpatialData +from spatialdata._logging import logger +from spatialdata.models import TableModel + +from spatialdata_io._constants._constants import CurioKeys +from spatialdata_io._docs import inject_docs + +__all__ = ["curio"] + + +@inject_docs(vx=CurioKeys) +def curio( + path: str | Path, + dataset_id: Optional[str] = None, +) -> SpatialData: + """ + Read *Curio* formatted dataset. + + This function reads the following files: + + - ``_`{vx.ANNDATA_FILE!r}```: Counts and metadata file. + - ``_`{vx.CLUSTER_ASSIGNMENT!r}```: Cluster assignment file. + - ``_`{vx.METRICS_FILE!r}```: Metrics file. + - ``_`{vx.VAR_FEATURES_CLUSTERS!r}```: Variable features clusters file. + - ``_`{vx.VAR_FEATURES_MORANSI!r}```: Variable features Moran's I file. + + .. seealso:: + + - `CODEX output `_. + + Parameters + ---------- + path + Path to the directory containing the data. + dataset_id + Dataset identifier. + imread_kwargs + Keyword arguments passed to :func:`dask_image.imread.imread`. + image_models_kwargs + Keyword arguments passed to :class:`spatialdata.models.Image2DModel`. + + Returns + ------- + :class:`spatialdata.SpatialData` + """ + + path = Path(path) + path_files = [CurioKeys.ANNDATA_FILE, + CurioKeys.CLUSTER_ASSIGNMENT, + CurioKeys.METRICS_FILE, + CurioKeys.VAR_FEATURES_CLUSTERS, + CurioKeys.VAR_FEATURES_MORANSI] + + if dataset_id is not None: + file_names = [f"{dataset_id}_{file_name}" for file_name in path_files] + else: + file_names = [] + for file_name in path_files: + file_names.extend(path.glob(file_name)) + + adata = ad.read_h5ad(path / file_names[0]) + cluster_assign = pd.read_csv(path / file_names[1], sep="\t", header=None) + metrics = pd.read_csv(path / file_names[2], sep="\,", header=0) + var_features_clusters = pd.read_csv(path / file_names[3], sep="\t", header=0) + var_features_moransi = pd.read_csv(path / file_names[4], sep="\t", header=0) + + adata.obs = adata.obs.assign(cluster=cluster_assign[1].values) + categories = metrics[CurioKeys.CATEGORY].unique() + for cat in categories: + df = metrics.loc[metrics[CurioKeys.CATEGORY] == cat] + adata.uns[cat] = dict(zip(df.iloc[:,0], df.iloc[:,1])) + adata.uns[CurioKeys.TOP_CLUSTER_DEFINING_FEATURES] = var_features_clusters + adata.var.join(var_features_moransi, how='outer') + + table = TableModel.parse(adata) + + return SpatialData(table=table) From 9427c774a3e5e2bc9304727b6299f691aac06257 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 23 May 2023 14:25:19 +0000 Subject: [PATCH 2/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/spatialdata_io/__init__.py | 2 +- src/spatialdata_io/readers/curio.py | 25 ++++++++++++------------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/spatialdata_io/__init__.py b/src/spatialdata_io/__init__.py index aca22eb0..98c5aea2 100644 --- a/src/spatialdata_io/__init__.py +++ b/src/spatialdata_io/__init__.py @@ -1,7 +1,7 @@ from importlib.metadata import version -from spatialdata_io.readers.curio import curio from spatialdata_io.readers.cosmx import cosmx +from spatialdata_io.readers.curio import curio from spatialdata_io.readers.mcmicro import mcmicro from spatialdata_io.readers.steinbock import steinbock from spatialdata_io.readers.visium import visium diff --git a/src/spatialdata_io/readers/curio.py b/src/spatialdata_io/readers/curio.py index fa6f2a85..c53ae7c8 100644 --- a/src/spatialdata_io/readers/curio.py +++ b/src/spatialdata_io/readers/curio.py @@ -1,14 +1,11 @@ from __future__ import annotations -from collections.abc import Mapping from pathlib import Path -from types import MappingProxyType from typing import Optional import anndata as ad import pandas as pd from spatialdata import SpatialData -from spatialdata._logging import logger from spatialdata.models import TableModel from spatialdata_io._constants._constants import CurioKeys @@ -54,22 +51,24 @@ def curio( """ path = Path(path) - path_files = [CurioKeys.ANNDATA_FILE, - CurioKeys.CLUSTER_ASSIGNMENT, - CurioKeys.METRICS_FILE, - CurioKeys.VAR_FEATURES_CLUSTERS, - CurioKeys.VAR_FEATURES_MORANSI] - + path_files = [ + CurioKeys.ANNDATA_FILE, + CurioKeys.CLUSTER_ASSIGNMENT, + CurioKeys.METRICS_FILE, + CurioKeys.VAR_FEATURES_CLUSTERS, + CurioKeys.VAR_FEATURES_MORANSI, + ] + if dataset_id is not None: file_names = [f"{dataset_id}_{file_name}" for file_name in path_files] else: file_names = [] for file_name in path_files: file_names.extend(path.glob(file_name)) - + adata = ad.read_h5ad(path / file_names[0]) cluster_assign = pd.read_csv(path / file_names[1], sep="\t", header=None) - metrics = pd.read_csv(path / file_names[2], sep="\,", header=0) + metrics = pd.read_csv(path / file_names[2], sep=r"\,", header=0) var_features_clusters = pd.read_csv(path / file_names[3], sep="\t", header=0) var_features_moransi = pd.read_csv(path / file_names[4], sep="\t", header=0) @@ -77,9 +76,9 @@ def curio( categories = metrics[CurioKeys.CATEGORY].unique() for cat in categories: df = metrics.loc[metrics[CurioKeys.CATEGORY] == cat] - adata.uns[cat] = dict(zip(df.iloc[:,0], df.iloc[:,1])) + adata.uns[cat] = dict(zip(df.iloc[:, 0], df.iloc[:, 1])) adata.uns[CurioKeys.TOP_CLUSTER_DEFINING_FEATURES] = var_features_clusters - adata.var.join(var_features_moransi, how='outer') + adata.var.join(var_features_moransi, how="outer") table = TableModel.parse(adata) From 01d8c189d0e910c79f07b5d89b4e9672c7b18e93 Mon Sep 17 00:00:00 2001 From: Laurens Lehner Date: Tue, 23 May 2023 16:29:10 +0200 Subject: [PATCH 3/4] Update api.md --- docs/api.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/api.md b/docs/api.md index da86bcf3..63c1e0cf 100644 --- a/docs/api.md +++ b/docs/api.md @@ -12,6 +12,7 @@ I/O for the `spatialdata` project. .. autosummary:: :toctree: generated + curio cosmx visium xenium From 72da3107b871dd412e25be9cad70e48a6b4da428 Mon Sep 17 00:00:00 2001 From: Laurens Lehner Date: Tue, 23 May 2023 16:38:38 +0200 Subject: [PATCH 4/4] Fix pre-commit --- src/spatialdata_io/readers/curio.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/spatialdata_io/readers/curio.py b/src/spatialdata_io/readers/curio.py index c53ae7c8..ac8b8e6a 100644 --- a/src/spatialdata_io/readers/curio.py +++ b/src/spatialdata_io/readers/curio.py @@ -49,7 +49,6 @@ def curio( ------- :class:`spatialdata.SpatialData` """ - path = Path(path) path_files = [ CurioKeys.ANNDATA_FILE, @@ -64,7 +63,7 @@ def curio( else: file_names = [] for file_name in path_files: - file_names.extend(path.glob(file_name)) + file_names.extend(str(path.glob(file_name))) adata = ad.read_h5ad(path / file_names[0]) cluster_assign = pd.read_csv(path / file_names[1], sep="\t", header=None)