# Stream data

When working with large serialized objects, it is often inefficient to load entire files into memory.

Here, we show how to subset an `AnnData` stored in the cloud.

In [None]:
import lamindb as ln

In [None]:
ln.track()

In [None]:
file = ln.add(ln.File("s3://lamindb-ci/lndb-storage/pbmc68k.h5ad"))
# if exists: file = ln.select(ln.File, key="lndb-storage/pbmc68k.h5ad").one()

In [None]:
obs = file.subsetter()
subset_obs = obs.cell_type.isin(["Dendritic cells", "CD14+ Monocytes"]) & (
    obs.percent_mito <= 0.05
)

In [None]:
adata_subset = file.stream(subset_obs=subset_obs)

In [None]:
adata_subset

In [None]:
adata_subset.obs["cell_type"].value_counts()

You can do the same with a zarr object:

In [None]:
file = ln.add(ln.File("s3://lamindb-ci/lndb-storage/pbmc68k.zarr"))
# if exists: file = ln.select(ln.File, key="lndb-storage/pbmc68k.zarr").one()
adata_subset = file.stream(subset_obs=subset_obs)
adata_subset.obs["cell_type"].value_counts()