# marshall-2022

In [None]:
!lamin load scverse/spatial

In [None]:
import lamindb as ln

ln.settings.transform.stem_uid = "VwUYGOmA3koC"
ln.settings.transform.version = "1"
ln.track()

## Download and process data

h5ad file downloaded from: https://lamin.ai/laminlabs/cellxgene/artifacts/zuSDjhCBRxYCOm8pY6SL

In [None]:
h5ad_artifact = (
    ln.Artifact.using("laminlabs/cellxgene").filter(uid="zuSDjhCBRxYCOm8pY6SL").one()
)
h5ad = h5ad_artifact.stage()

Convert h5ad to zarr.

Script below is from: https://github.com/vitessce/vitessce-python/blob/main/demos/marshall-2022/src/convert_to_zarr.py

In [None]:
from anndata import read_h5ad
import numpy as np
import scanpy as sc
from vitessce.data_utils import (
    to_diamond,
    to_uint8,
    optimize_adata,
)


def convert_h5ad_to_zarr(input_path, output_path):
    adata = read_h5ad(input_path)

    sc.pp.filter_cells(adata, min_genes=200)
    sc.pp.filter_genes(adata, min_cells=3)

    adata.var["mt"] = adata.var["feature_name"].str.startswith(
        "MT-"
    )  # annotate the group of mitochondrial genes as 'mt'
    sc.pp.calculate_qc_metrics(
        adata, qc_vars=["mt"], percent_top=None, log1p=False, inplace=True
    )

    sc.pp.normalize_total(adata, target_sum=1e4, inplace=True)
    sc.pp.log1p(adata)

    sc.pp.highly_variable_genes(adata, min_mean=0.0125, max_mean=3, min_disp=0.5)

    adata_hvg = adata[:, adata.var["highly_variable"]].copy()
    sc.pp.regress_out(adata_hvg, ["total_counts", "pct_counts_mt"])
    sc.pp.scale(adata_hvg, max_value=3)

    adata.obsm["X_hvg"] = adata_hvg.X
    adata.obsm["X_hvg_uint8"] = to_uint8(adata_hvg.X, norm_along="var")

    num_cells = adata.obs.shape[0]
    adata.obsm["X_spatial"] = adata.obsm["X_spatial"]
    adata.obsm["X_segmentations"] = np.zeros((num_cells, 4, 2))
    radius = 10
    for i in range(num_cells):
        adata.obsm["X_segmentations"][i, :, :] = to_diamond(
            adata.obsm["X_spatial"][i, 0], adata.obsm["X_spatial"][i, 1], radius
        )

    adata = optimize_adata(
        adata,
        obs_cols=["cell_type"],
        var_cols=["feature_name"],
        obsm_keys=["X_hvg", "X_hvg_uint8", "X_umap", "X_spatial", "X_segmentations"],
        layer_keys=[],
    )

    adata.write_zarr(output_path, chunks=[adata.shape[0], 10])

In [None]:
zarr_filepath = "./marshall_2022_iscience.h5ad.zarr"
convert_h5ad_to_zarr(h5ad, zarr_filepath)

## Create the Vitessce configuration

In [None]:
from vitessce import (
    VitessceConfig,
    Component as cm,
    AnnDataWrapper,
)

vc = VitessceConfig(
    schema_version="1.0.15",
    name="Marshall et al., 2022 iScience",
    description=(
        "Spatial transcriptomics (Slide-seqV2) in the healthy human kidney"
        " (Puck_200903_13)"
    ),
)

In [None]:
dataset = vc.add_dataset(name="marshall_2022").add_object(
    AnnDataWrapper(
        adata_path=zarr_filepath,
        obs_feature_matrix_path="X",
        obs_embedding_paths=["obsm/X_umap"],
        obs_embedding_names=["UMAP"],
        obs_locations_path="obsm/X_spatial",
        obs_segmentations_path="obsm/X_segmentations",
        obs_set_paths=["obs/cell_type"],
        obs_set_names=["Bead Type"],
        feature_labels_path="var/feature_name",
    )
)

In [None]:
obs_sets = vc.add_view(cm.OBS_SETS, dataset=dataset)
obs_set_sizes = vc.add_view(cm.OBS_SET_SIZES, dataset=dataset)
scatterplot = vc.add_view(cm.SCATTERPLOT, dataset=dataset, mapping="UMAP")
spatial = vc.add_view(cm.SPATIAL, dataset=dataset)
spatial_layers = vc.add_view(cm.LAYER_CONTROLLER, dataset=dataset)
genes = vc.add_view(cm.FEATURE_LIST, dataset=dataset)
description = vc.add_view(cm.DESCRIPTION, dataset=dataset)
vc.layout(
    (scatterplot | obs_sets | obs_set_sizes)
    / (spatial | spatial_layers | (genes | description))
);

## Ingest into LaminDB

In [None]:
from lamindb.integrations import save_vitessce_config

In [None]:
artifact = save_vitessce_config(vc, description="Marshall et al., 2022 iScience")

In [None]:
# ln.finish()

Annotate with metadata:

In [None]:
import bionty as bt

bt.settings.organism = "human"

In [None]:
artifact.features._add_from(h5ad_artifact)
artifact.labels.add_from(h5ad_artifact)