Skip to content

Cannot read our new Xenium dataset #79

@fbnrst

Description

@fbnrst

My first issue here, so I just want to say: great project, much appreciated!

I tried to read our new Xenium dataset with the current main version of spatialdata-io ('0.0.8.dev26+g03102c6'), but I do get an error:

spatialdata_io.xenium('/path/to/xenium/data')

Output

INFO     reading                                                                                                   
         /group/dcgc/sequencing/10x/XETG00051/20230727__145230__LAB4872_RebekkaW/output-XETG00051__0003507__S146470
         __20230727__145352/cell_feature_matrix.h5                                                                 

/opt/conda/lib/python3.10/site-packages/anndata/_core/anndata.py:1113: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if not is_categorical_dtype(df_full[k]):
/opt/conda/lib/python3.10/site-packages/spatialdata/models/models.py:703: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if not is_categorical_dtype(adata.obs[region_key]):
/opt/conda/lib/python3.10/site-packages/spatialdata_io/readers/xenium.py:195: UserWarning: Converting `region_key: region` to categorical dtype.
  table = TableModel.parse(adata, region=specs["region"], region_key="region", instance_key=str(XeniumKeys.CELL_ID))
/opt/conda/lib/python3.10/site-packages/spatialdata/models/models.py:598: FutureWarning: is_categorical_dtype is deprecated and will be removed in a future version. Use isinstance(dtype, CategoricalDtype) instead
  if is_categorical_dtype(data[c]) and not data[c].cat.known:

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[8], line 1
----> 1 spatialdata_io.xenium('/group/dcgc/sequencing/10x/XETG00051/20230727__145230__LAB4872_RebekkaW/output-XETG00051__0003507__S146470__20230727__145352/')

File /opt/conda/lib/python3.10/site-packages/spatialdata_io/readers/xenium.py:125, in xenium(path, n_jobs, cells_as_shapes, nucleus_boundaries, transcripts, morphology_mip, morphology_focus, imread_kwargs, image_models_kwargs)
    123 points = {}
    124 if transcripts:
--> 125     points["transcripts"] = _get_points(path, specs)
    127 images = {}
    128 if morphology_mip:

File /opt/conda/lib/python3.10/site-packages/spatialdata_io/readers/xenium.py:174, in _get_points(path, specs)
    171 table["feature_name"] = table["feature_name"].apply(lambda x: x.decode("utf-8"), meta=("feature_name", "object"))
    173 transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y"))
--> 174 points = PointsModel.parse(
    175     table,
    176     coordinates={"x": XeniumKeys.TRANSCRIPTS_X, "y": XeniumKeys.TRANSCRIPTS_Y, "z": XeniumKeys.TRANSCRIPTS_Z},
    177     feature_key=XeniumKeys.FEATURE_NAME,
    178     instance_key=XeniumKeys.CELL_ID,
    179     transformations={"global": transform},
    180 )
    181 return points

File /opt/conda/lib/python3.10/functools.py:926, in singledispatchmethod.__get__.<locals>._method(*args, **kwargs)
    924 def _method(*args, **kwargs):
    925     method = self.dispatcher.dispatch(args[0].__class__)
--> 926     return method.__get__(obj, cls)(*args, **kwargs)

File /opt/conda/lib/python3.10/site-packages/spatialdata/models/models.py:572, in PointsModel._(cls, data, coordinates, feature_key, instance_key, transformations, **kwargs)
    570 for c in set(data.columns) - {feature_key, instance_key, *coordinates.values()}:
    571     table[c] = data[c]
--> 572 return cls._add_metadata_and_validate(
    573     table, feature_key=feature_key, instance_key=instance_key, transformations=transformations
    574 )

File /opt/conda/lib/python3.10/site-packages/spatialdata/models/models.py:600, in PointsModel._add_metadata_and_validate(cls, data, feature_key, instance_key, transformations)
    598 if is_categorical_dtype(data[c]) and not data[c].cat.known:
    599     try:
--> 600         data[c] = data[c].cat.set_categories(data[c].head(1).cat.categories)
    601     except ValueError:
    602         logger.info(f"Column `{c}` contains unknown categories. Consider casting it.")

File /opt/conda/lib/python3.10/site-packages/dask/threaded.py:89, in get(dsk, keys, cache, num_workers, pool, **kwargs)
     86     elif isinstance(pool, multiprocessing.pool.Pool):
     87         pool = MultiprocessingPoolExecutor(pool)
---> 89 results = get_async(
     90     pool.submit,
     91     pool._max_workers,
     92     dsk,
     93     keys,
     94     cache=cache,
     95     get_id=_thread_get_id,
     96     pack_exception=pack_exception,
     97     **kwargs,
     98 )
    100 # Cleanup pools associated to dead threads
    101 with pools_lock:

File /opt/conda/lib/python3.10/site-packages/dask/local.py:511, in get_async(submit, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, chunksize, **kwargs)
    509         _execute_task(task, data)  # Re-execute locally
    510     else:
--> 511         raise_exception(exc, tb)
    512 res, worker_id = loads(res_info)
    513 state["cache"][key] = res

File /opt/conda/lib/python3.10/site-packages/dask/local.py:319, in reraise(exc, tb)
    317 if exc.__traceback__ is not tb:
    318     raise exc.with_traceback(tb)
--> 319 raise exc

File /opt/conda/lib/python3.10/site-packages/dask/local.py:224, in execute_task(key, task_info, dumps, loads, get_id, pack_exception)
    222 try:
    223     task, data = loads(task_info)
--> 224     result = _execute_task(task, data)
    225     id = get_id()
    226     result = dumps((result, id))

File lib.pyx:2917, in pandas._libs.lib.map_infer()

File /opt/conda/lib/python3.10/site-packages/spatialdata_io/readers/xenium.py:171, in _get_points.<locals>.<lambda>(x)
    169 def _get_points(path: Path, specs: dict[str, Any]) -> Table:
    170     table = read_parquet(path / XeniumKeys.TRANSCRIPTS_FILE)
--> 171     table["feature_name"] = table["feature_name"].apply(lambda x: x.decode("utf-8"), meta=("feature_name", "object"))
    173     transform = Scale([1.0 / specs["pixel_size"], 1.0 / specs["pixel_size"]], axes=("x", "y"))
    174     points = PointsModel.parse(
    175         table,
    176         coordinates={"x": XeniumKeys.TRANSCRIPTS_X, "y": XeniumKeys.TRANSCRIPTS_Y, "z": XeniumKeys.TRANSCRIPTS_Z},
   (...)
    179         transformations={"global": transform},
    180     )

AttributeError: 'str' object has no attribute 'decode'

It is human data, so I am afraid, I cannot share it.

From the experiment.xenium, I can see that

    "instrument_sw_version": "1.5.1.2",
    "analysis_sw_version": "xenium-1.5.0.3",

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions