Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cannot read h5ad file #102

Closed
chriscainx opened this issue Mar 12, 2018 · 5 comments
Closed

Cannot read h5ad file #102

chriscainx opened this issue Mar 12, 2018 · 5 comments

Comments

@chriscainx
Copy link
Contributor

chriscainx commented Mar 12, 2018

>>> adata.write("./result.h5ad")
>>> bdata = sc.read("./result.h5ad)

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-15-d90a365327a0> in <module>()
      1 adata.write(“./results.h5ad”)
----> 2 bdata = sc.read(“./results.h5ad")

/fastdata/chris/bin/anaconda3/lib/python3.6/site-packages/anndata/readwrite/read.py in read_h5ad(filename, backed)
    347         # load everything into memory
    348         d = _read_h5ad(filename=filename)
--> 349         return AnnData(d)
    350 
    351 

/fastdata/chris/bin/anaconda3/lib/python3.6/site-packages/anndata/base.py in __init__(self, X, obs, var, uns, obsm, varm, raw, dtype, single_col, filename, filemode, asview, oidx, vidx)
    632                 obsm=obsm, varm=varm, raw=raw,
    633                 dtype=dtype, single_col=single_col,
--> 634                 filename=filename, filemode=filemode)
    635 
    636     def _init_as_view(self, adata_ref, oidx, vidx):

/fastdata/chris/bin/anaconda3/lib/python3.6/site-packages/anndata/base.py in _init_as_actual(self, X, obs, var, uns, obsm, varm, raw, dtype, single_col, filename, filemode)
    741                 raise ValueError(
    742                     'If `X` is a dict no further arguments must be provided.')
--> 743             X, obs, var, uns, obsm, varm, raw = self._from_dict(X)
    744 
    745         # init from AnnData

/fastdata/chris/bin/anaconda3/lib/python3.6/site-packages/anndata/base.py in _from_dict(ddata)
   1591                     d_true_keys['obs'][k_stripped] = pd.Categorical.from_codes(
   1592                         codes=d_true_keys['obs'][k_stripped].values,
-> 1593                         categories=v)
   1594                 if k_stripped in d_true_keys['var']:
   1595                     d_true_keys['var'][k_stripped] = pd.Categorical.from_codes(

/fastdata/chris/bin/anaconda3/lib/python3.6/site-packages/pandas/core/categorical.py in from_codes(cls, codes, categories, ordered)
    616                 "codes need to be convertible to an arrays of integers")
    617 
--> 618         categories = CategoricalDtype._validate_categories(categories)
    619 
    620         if len(codes) and (codes.max() >= len(categories) or codes.min() < -1):

/fastdata/chris/bin/anaconda3/lib/python3.6/site-packages/pandas/core/dtypes/dtypes.py in _validate_categories(categories, fastpath)
    324                 raise ValueError('Categorial categories cannot be null')
    325 
--> 326             if not categories.is_unique:
    327                 raise ValueError('Categorical categories must be unique')
    328 

pandas/_libs/properties.pyx in pandas._libs.properties.cache_readonly.__get__()

/fastdata/chris/bin/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in is_unique(self)
   1316     def is_unique(self):
   1317         """ return if the index has unique values """
-> 1318         return self._engine.is_unique
   1319 
   1320     @property

pandas/_libs/properties.pyx in pandas._libs.properties.cache_readonly.__get__()

/fastdata/chris/bin/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in _engine(self)
   1595     def _engine(self):
   1596         # property, for now, slow to look up
-> 1597         return self._engine_type(lambda: self._values, len(self))
   1598 
   1599     def _validate_index_level(self, level):

/fastdata/chris/bin/anaconda3/lib/python3.6/site-packages/pandas/core/indexes/base.py in __len__(self)
    559         return the length of the Index
    560         """
--> 561         return len(self._data)
    562 
    563     def __array__(self, dtype=None):

TypeError: len() of unsized object
@falexwolf
Copy link
Member

Sorry, that's probably due to the fact that your annotations contain a column with a categorical data and a single category. We'll solve this bug in the next version of anndata, just a couple of days.

As workaround, remove the annotation with a single cateogry.

@chriscainx
Copy link
Contributor Author

thank you. please close this issue

@gtca
Copy link

gtca commented Feb 20, 2019

It seems that the issue still can be reproduced in scanpy v1.4:

import pandas as pd
import scanpy as sc
pd.__version__  # => '0.23.4'
sc.__version__  # => '1.4'

adata = sc.datasets.pbmc68k_reduced()
adata.obs["single_cat"] = 1
adata.obs['single_cat'] = pd.Categorical(adata.obs['single_cat'])
adata.write('/tmp/adata.h5ad')

sc.read('/tmp/adata.h5ad')  # => ValueError: codes need to be convertible to an arrays of integers

@flying-sheep
Copy link
Member

do you also have the newest anndata version?

@sophiamaedler
Copy link

sophiamaedler commented Dec 16, 2019

I am also experiencing this issue.

Running the following code:

import pandas as pd
import scanpy as sc
import anndata

print(pd.__version__)
print(sc.__version__)
print(anndata.__version__)
adata = sc.datasets.pbmc68k_reduced()
adata.obs["single_cat"] = 1
adata.obs['single_cat'] = pd.Categorical(adata.obs['single_cat'])
adata.write('/tmp/adata.h5ad')
sc.read('/tmp/adata.h5ad')

Returns this error message:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-5-adde38d13544> in <module>
----> 1 sc.read('/tmp/adata.h5ad')

/usr/local/anaconda3/envs/diffxpy/lib/python3.6/site-packages/scanpy/readwrite.py in read(filename, backed, sheet, ext, delimiter, first_column_names, backup_url, cache, **kwargs)
     95             filename, backed=backed, sheet=sheet, ext=ext,
     96             delimiter=delimiter, first_column_names=first_column_names,
---> 97             backup_url=backup_url, cache=cache, **kwargs,
     98         )
     99     # generate filename and read to dict

/usr/local/anaconda3/envs/diffxpy/lib/python3.6/site-packages/scanpy/readwrite.py in _read(filename, backed, sheet, ext, delimiter, first_column_names, backup_url, cache, suppress_cache_warning, **kwargs)
    497     if ext in {'h5', 'h5ad'}:
    498         if sheet is None:
--> 499             return read_h5ad(filename, backed=backed)
    500         else:
    501             logg.debug(f'reading sheet {sheet} from file {filename}')

/usr/local/anaconda3/envs/diffxpy/lib/python3.6/site-packages/anndata/readwrite/read.py in read_h5ad(filename, backed, chunk_size)
    445     else:
    446         # load everything into memory
--> 447         constructor_args = _read_args_from_h5ad(filename=filename, chunk_size=chunk_size)
    448         X = constructor_args[0]
    449         dtype = None

/usr/local/anaconda3/envs/diffxpy/lib/python3.6/site-packages/anndata/readwrite/read.py in _read_args_from_h5ad(adata, filename, mode, chunk_size)
    500     if not backed:
    501         f.close()
--> 502     return AnnData._args_from_dict(d)
    503 
    504 

/usr/local/anaconda3/envs/diffxpy/lib/python3.6/site-packages/anndata/core/anndata.py in _args_from_dict(ddata)
   2182                         d_true_keys[ann][k_stripped] = pd.Categorical.from_codes(
   2183                             codes=d_true_keys[ann][k_stripped].values,
-> 2184                             categories=v,
   2185                         )
   2186                 k_to_delete.append(k)

/usr/local/anaconda3/envs/diffxpy/lib/python3.6/site-packages/pandas/core/arrays/categorical.py in from_codes(cls, codes, categories, ordered, dtype)
    638         dtype = CategoricalDtype._from_values_or_dtype(categories=categories,
    639                                                        ordered=ordered,
--> 640                                                        dtype=dtype)
    641         if dtype.categories is None:
    642             msg = ("The categories must be provided in 'categories' or "

/usr/local/anaconda3/envs/diffxpy/lib/python3.6/site-packages/pandas/core/dtypes/dtypes.py in _from_values_or_dtype(cls, values, categories, ordered, dtype)
    322             # Note: This could potentially have categories=None and
    323             # ordered=None.
--> 324             dtype = CategoricalDtype(categories, ordered)
    325 
    326         return dtype

/usr/local/anaconda3/envs/diffxpy/lib/python3.6/site-packages/pandas/core/dtypes/dtypes.py in __init__(self, categories, ordered)
    224 
    225     def __init__(self, categories=None, ordered=None):
--> 226         self._finalize(categories, ordered, fastpath=False)
    227 
    228     @classmethod

/usr/local/anaconda3/envs/diffxpy/lib/python3.6/site-packages/pandas/core/dtypes/dtypes.py in _finalize(self, categories, ordered, fastpath)
    333         if categories is not None:
    334             categories = self.validate_categories(categories,
--> 335                                                   fastpath=fastpath)
    336 
    337         self._categories = categories

/usr/local/anaconda3/envs/diffxpy/lib/python3.6/site-packages/pandas/core/dtypes/dtypes.py in validate_categories(categories, fastpath)
    502         if not fastpath and not is_list_like(categories):
    503             msg = "Parameter 'categories' must be list-like, was {!r}"
--> 504             raise TypeError(msg.format(categories))
    505         elif not isinstance(categories, ABCIndexClass):
    506             categories = Index(categories, tupleize_cols=False)

TypeError: Parameter 'categories' must be list-like, was 1

I am running the following versions:

0.24.2 #pandas
1.4.4.post1 #scanpy
0.6.22.post1 #anndata

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

5 participants