In [1]:
import anndata
import os
import requests

save_path = "data/example_sce.h5ad"
if not os.path.exists(save_path):
    response = requests.get("https://go.wisc.edu/69435h")
    with open(save_path, "wb") as f:
        f.write(response.content)

example_sce = anndata.read_h5ad(save_path)
example_sce

AnnData object with n_obs × n_vars = 2087 × 100
    obs: 'clusters_coarse', 'clusters', 'S_score', 'G2M_score', 'cell_type', 'sizeFactor', 'pseudotime'
    var: 'highly_variable_genes'
    uns: 'X_name', 'clusters_coarse_colors', 'clusters_colors', 'day_colors', 'neighbors', 'pca'
    obsm: 'PCA', 'UMAP', 'X_pca', 'X_umap'
    layers: 'counts', 'cpm', 'logcounts', 'spliced', 'unspliced'
    obsp: 'connectivities', 'distances'

The memento model works directly off the matrix of transcript counts. Therefore, we don't need to keep track of the cell-level metadata that are used as predictors in other models. This implementation loads all data at once, though the fact that it works off sparse matrices means that it still is quite memory efficient.


In [2]:
from scdesigner.experimental.estimators import MementoEstimator
from scdesigner.experimental.data import SparseMatrixLoader

memento = MementoEstimator(q=0.01)
sml = SparseMatrixLoader(example_sce, batch_size=1000)
fit = memento.estimate(sml.loader)

In [3]:
fit["covariance"]

tensor([[1.9258, 0.0935, 0.0788,  ..., 0.0000, 0.0339, 0.0000],
        [0.0935, 1.2218, 0.1555,  ..., 0.0000, 0.0601, 0.0000],
        [0.0788, 0.1555, 1.7056,  ..., 0.0000, 0.0570, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 2.6681, 0.0000, 0.0000],
        [0.0339, 0.0601, 0.0570,  ..., 0.0000, 0.2188, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 1.9393]])

These means seem too small, even with a very small sampling fraction $q$. Did we implement the formulas incorrectly, or is the paper incorrect?

In [4]:
fit["mean"]

tensor([0.9964, 0.6870, 0.9047, 0.9294, 0.7371, 0.8868, 1.0727, 0.4265, 0.6228,
        0.3072, 1.1863, 1.5618, 0.9423, 1.4388, 1.5394, 0.8234, 1.0267, 0.7484,
        1.1656, 1.0978, 0.7334, 0.6444, 0.9384, 0.9762, 1.4089, 0.6524, 0.8391,
        1.2052, 1.2606, 0.6162, 1.1437, 0.7583, 0.8251, 1.3618, 0.7300, 1.1556,
        1.2105, 1.4168, 0.8674, 1.6186, 0.6873, 1.2246, 1.0389, 0.9409, 1.3263,
        0.7447, 1.3467, 1.3459, 0.3118, 0.6714, 0.5819, 1.3055, 1.5540, 1.0572,
        0.6961, 0.8785, 1.0091, 1.3071, 0.6647, 0.9972, 1.1098, 1.6534, 0.6749,
        0.7481, 0.8377, 1.4986, 1.3135, 1.1631, 1.3434, 1.1374, 0.9192, 0.5181,
        0.8057, 0.5359, 1.0155, 0.4667, 0.1773, 0.7219, 1.0304, 1.2584, 1.1573,
        0.9930, 0.8115, 0.9604, 1.4384, 0.7189, 1.6503, 0.6454, 0.9660, 1.5492,
        1.4248, 1.1118, 1.4927, 1.1321, 1.2761, 1.1052, 1.3150, 1.1487, 0.0911,
        0.9299])