### Check observations with embeddings in CellxGene

In [7]:
import pandas as pd
import numpy as np
import scanpy as sc
import cellxgene_census
from cellxgene_census.experimental import get_embedding, get_embedding_metadata, get_all_available_embeddings

# 1. Choose a Census version and organism
CENSUS_VERSION = "2023-12-15"
ORGANISM        = "homo_sapiens"
MEASUREMENT     = "RNA"

In [8]:
cellxgene_census.get_census_version_directory()

OrderedDict([('stable',
              {'release_date': None,
               'release_build': '2025-01-30',
               'soma': {'uri': 's3://cellxgene-census-public-us-west-2/cell-census/2025-01-30/soma/',
                'relative_uri': '/cell-census/2025-01-30/soma/',
                's3_region': 'us-west-2'},
               'h5ads': {'uri': 's3://cellxgene-census-public-us-west-2/cell-census/2025-01-30/h5ads/',
                'relative_uri': '/cell-census/2025-01-30/h5ads/',
                's3_region': 'us-west-2'},
               'flags': {'lts': True}}),
             ('latest',
              {'release_date': None,
               'release_build': '2025-10-13',
               'soma': {'uri': 's3://cellxgene-census-public-us-west-2/cell-census/2025-10-13/soma/',
                'relative_uri': '/cell-census/2025-10-13/soma/',
                's3_region': 'us-west-2'},
               'h5ads': {'uri': 's3://cellxgene-census-public-us-west-2/cell-census/2025-10-13/h5ads/',
        

In [14]:
embs = get_all_available_embeddings(CENSUS_VERSION)
for e in embs:
    if e["experiment_name"] == ORGANISM and e["data_type"] == "obs_embedding":
        print(e["embedding_name"])

scvi
geneformer
scgpt
uce
nmf


In [None]:
EMBED_NAME = "scgpt"

# 3. Open the Census and query an AnnData with obs and embedding
with cellxgene_census.open_soma(census_version=CENSUS_VERSION) as census:
    adata = cellxgene_census.get_anndata(
        census,
        organism           = ORGANISM,
        measurement_name   = MEASUREMENT,
        obs_value_filter   = None,
        obs_column_names   = ["cell_type", "tissue", "dataset_id", "is_primary_data"],
        obs_embeddings     = [EMBED_NAME]
    )

In [None]:
# 4. Inspect the obs metadata
print(adata.obs.head())
print(adata.obs.info())

# 5. Inspect the embedding in adata.obsm
print("Embedding key in adata.obsm:", list(adata.obsm.keys()))
emb_matrix = adata.obsm[EMBED_NAME]   # shape: (n_cells Ã— embedding_dim)
print("Embedding matrix shape:", emb_matrix.shape)

# 6. Optionally: embedding metadata (if you want features names etc.)
emb_meta = get_embedding_metadata(CENSUS_VERSION, EMBED_NAME)
print(emb_meta.head())