# Interoperability- Preparing the MuData dataset

In [9]:
import lamindb as ln

In [10]:
assert ln.setup.settings.instance.slug == "theislab/sc-best-practices"

In [11]:
ln.track()

[92m→[0m loaded Transform('sHhbAE1UThuC0000'), re-started Run('bn9MrSp1...') at 2025-03-26 12:49:31 UTC
[92m→[0m notebook imports: anndata==0.11.3 lamindb==1.3.0 mudata==0.3.1 numpy==2.1.3


## generate AnnData

In [12]:
import anndata
import numpy
import scanpy
from scipy.sparse import csr_matrix

In [None]:
# Create a randomly generated AnnData object to use as an example
counts = csr_matrix(
    numpy.random.default_rng().poisson(1, size=(100, 2000)), dtype=numpy.float32
)
adata = anndata.AnnData(counts)
adata.obs_names = [f"Cell_{i:d}" for i in range(adata.n_obs)]
adata.var_names = [f"Gene_{i:d}" for i in range(adata.n_vars)]
# Do some standard processing to populate the object
scanpy.pp.calculate_qc_metrics(adata, inplace=True)
adata.layers["counts"] = adata.X.copy()
scanpy.pp.normalize_total(adata, inplace=True)
scanpy.pp.log1p(adata)
scanpy.pp.highly_variable_genes(adata, inplace=True)
scanpy.tl.pca(adata)
scanpy.pp.neighbors(adata)
scanpy.tl.umap(adata)
adata.write("interoperability_adata.h5ad")

In [15]:
af = ln.Artifact(
    "interoperability_adata.h5ad",
    key="introduction/interoperability_adata.h5ad",
    description="anndata for interoperability",
).save()
af

... uploading interoperability_adata.h5ad: 100.0%


Artifact(uid='Y0xl4XzORVJMgFDl0000', is_latest=True, key='introduction/interoperability_adata.h5ad', description='anndata for interoperability', suffix='.h5ad', otype='AnnData', size=3180536, hash='kJuZZxiZdPF0IXZqZLOfGQ', space_id=1, storage_id=1, run_id=4, created_by_id=5, created_at=2025-03-26 12:56:10 UTC)

## generate MuData

In [17]:
import mudata as md
import numpy as np

md.set_options(pull_on_update=False)

# Use modern random number generation
rng = np.random.default_rng(seed=1)

# create random data
n, d, k = 1000, 100, 10
z = rng.normal(loc=np.arange(k), scale=np.arange(k) * 2, size=(n, k))
w = rng.normal(size=(d, k))
y = np.dot(z, w.T)

# create AnnData from the matrix
adata = anndata.AnnData(y)
adata.obs_names = [f"obs_{i + 1}" for i in range(n)]
adata.var_names = [f"var_{j + 1}" for j in range(d)]

if adata.obs.shape[1] == 0:
    adata.obs["dummy_obs"] = "placeholder"

if adata.var.shape[1] == 0:
    adata.var["dummy_var"] = "placeholder"

# second AnnData object
d2 = 50
w2 = rng.normal(size=(d2, k))
y2 = np.dot(z, w2.T)

adata2 = anndata.AnnData(y2)
adata2.obs_names = [f"obs_{i + 1}" for i in range(n)]
adata2.var_names = [f"var2_{j + 1}" for j in range(d2)]

if adata2.obs.shape[1] == 0:
    adata2.obs["dummy_obs"] = "placeholder"

if adata2.var.shape[1] == 0:
    adata2.var["dummy_var"] = "placeholder"

mdata = md.MuData({"A": adata, "B": adata2})
mdata.write("interoperability_mdata.h5mu")

In [18]:
af = ln.Artifact(
    "interoperability_mdata.h5mu",
    key="introduction/interoperability_mdata.h5mu",
    description="MuData object for interoperability chapter",
).save()
af

[92m→[0m returning existing artifact with same hash: Artifact(uid='d0BIszdg2GPp3Nh80001', is_latest=True, key='introduction/interoperability_mdata.h5mu', description='MuData object for interoperability chapter', suffix='.h5mu', otype='MuData', size=1405984, hash='NX7ugtFo7KkZVUmwaASFiQ', space_id=1, storage_id=1, run_id=4, created_by_id=5, created_at=2025-03-20 17:29:41 UTC); to track this artifact as an input, use: ln.Artifact.get()


Artifact(uid='d0BIszdg2GPp3Nh80001', is_latest=True, key='introduction/interoperability_mdata.h5mu', description='MuData object for interoperability chapter', suffix='.h5mu', otype='MuData', size=1405984, hash='NX7ugtFo7KkZVUmwaASFiQ', space_id=1, storage_id=1, run_id=4, created_by_id=5, created_at=2025-03-20 17:29:41 UTC)