In [1]:
import cellxgene_census

help(cellxgene_census)
help(cellxgene_census.get_anndata)
# etc

Help on package cellxgene_census:

NAME
    cellxgene_census - An API to facilitate use of the CZI Science CELLxGENE Census. The Census is a versioned container of single-cell data hosted at `CELLxGENE Discover`_.

DESCRIPTION
    The API is built on the `tiledbsoma` SOMA API, and provides a number of helper functions including:

        * Open a named version of the Census, for use with the SOMA API
        * Get a list of available Census versions, and for each version, a description
        * Get a slice of the Census as an AnnData, for use with ScanPy
        * Get the URI for, or directly download, underlying data in H5AD format

    For more information on the API, visit the `cellxgene_census repo`_. For more information on SOMA, see the `tiledbsoma repo`_.

    .. _CELLxGENE Discover:
        https://cellxgene.cziscience.com/

    .. _cellxgene_census repo:
        https://github.com/chanzuckerberg/cellxgene-census/

    .. _tiledbsoma repo:
        https://github.com/single-cel

In [2]:
# Querying a slice of cell metadata
with cellxgene_census.open_soma(census_version="2025-01-30") as census:
    # Reads SOMADataFrame as a slice
    cell_metadata = census["census_data"]["homo_sapiens"].obs.read(
        value_filter = "sex == 'female' and cell_type in ['microglial cell', 'neuron']",
        column_names = ["assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease"]
    )

    # Concatenates results to pyarrow.Table
    cell_metadata = cell_metadata.concat()

    # Converts to pandas.DataFrame
    cell_metadata = cell_metadata.to_pandas()

    print(cell_metadata)

            assay        cell_type                          tissue  \
0       10x 5' v1           neuron                            lung   
1       10x 5' v1           neuron                            lung   
2       10x 5' v1           neuron                            lung   
3       10x 5' v1           neuron                            lung   
4       10x 5' v1           neuron                            lung   
...           ...              ...                             ...   
796421  10x 3' v3  microglial cell  dorsolateral prefrontal cortex   
796422  10x 3' v3  microglial cell  dorsolateral prefrontal cortex   
796423  10x 3' v3  microglial cell  dorsolateral prefrontal cortex   
796424  10x 3' v3  microglial cell  dorsolateral prefrontal cortex   
796425  10x 3' v3  microglial cell  dorsolateral prefrontal cortex   

       tissue_general suspension_type   disease     sex  
0                lung            cell    normal  female  
1                lung            cell    no

In [None]:
"""
Hangs and never completes, memory continue to climbs
"""
# Obtaining a slice as AnnData
with cellxgene_census.open_soma(census_version="2025-01-30") as census:
    adata = cellxgene_census.get_anndata(
        census = census,
        organism = "Homo sapiens",
        var_value_filter = "feature_id in ['ENSG00000161798', 'ENSG00000188229']",
        obs_value_filter = "sex == 'female' and cell_type in ['microglial cell', 'neuron']",
        obs_column_names = {"obs": ["assay", "cell_type", "tissue", "tissue_general", "suspension_type", "disease"]}
    )

    print(adata)

In [8]:
# Memory-efficient queries

import cellxgene_census
import tiledbsoma

with cellxgene_census.open_soma(census_version="2025-01-30") as census:
    human = census["census_data"]["homo_sapiens"]
    query = human.axis_query(
        measurement_name = "RNA",
        obs_query = tiledbsoma.AxisQuery(
            value_filter = "tissue == 'brain' and sex == 'male'"
        )
    )
    """
    Now we can iterate over the matrix count, as we as the gene metadata.
    For example, to iterate over the matrix count, we can get an iterator
    and perform operations for each iteration.
    """
    iterator = query.X("raw").tables()

    # Get an iterative slice as pyarrow.Table
    raw_slice = next (iterator)
    """
    And you can now perform operations of each iteration slice. As with
    any Python iterator this logic can be wrapped around a for loop.
    """
    query.close()

KeyboardInterrupt: 