In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import warnings

warnings.filterwarnings("ignore")

import lamindb as ln
import pandas as pd
import spatialdata as sd

from spatialdata_db import SpatialDataDBCurator

[92m→[0m connected lamindb: scverse/spatialdata-db


In [4]:
def replace_var_index(sdata):
    for table_name, table in sdata.tables.items():
        if isinstance(table.var, pd.DataFrame):
            table.var["gene_names"] = table.var.index
            table.var.set_index("gene_ids", inplace=True)
            sdata.tables[table_name] = table

# Upload Visium Spatial, Human large instestine

In [3]:
ln.track("NDcshmgyz9vh0000")

[92m→[0m loaded Transform('NDcshmgy'), re-started Run('XNbeIVQt') at 2025-01-19 21:38:42 UTC
[92m→[0m notebook imports: lamindb==0.77.2 pandas==2.2.3 spatialdata-db==0.0.1 spatialdata==0.2.7.dev14+gc07c363


In [6]:
DATA_PATH = "/lustre/groups/ml01/projects/2024_spatialdata_db/data"
uid = "1000j"

In [7]:
folder_path = None
for folder in os.listdir(DATA_PATH):
    if folder.startswith(str(uid)):
        folder_path = os.path.join(DATA_PATH, folder)
        break

if folder_path is None:
    print(f"Folder for UID {uid} not found.")

In [8]:
file = None
for file in os.listdir(folder_path):
    if file.endswith(".zarr"):
        break

if file is None:
    print(f"No zarr file found in folder for UID {uid}.")

file_path = os.path.join(folder_path, file)

In [10]:
sdata = sd.read_zarr(file_path)
replace_var_index(sdata)

In [11]:
sdc = SpatialDataDBCurator(sdata, organism="human")
sdc.validate()

[94m•[0m validating categoricals of 'sample' metadata...
[94m•[0m mapping "assay" on [3mExperimentalFactor.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Visium'[0m
    [1;93m1 synonym[0m found: [1;93m"Visium" → "Visium Spatial Gene Expression"[0m
    → curate synonyms via [1;96m.standardize("assay")[0m
[94m•[0m mapping "chemistry_version" on [3mULabel.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'v1'[0m
    → fix typos, remove non-existent values, or save terms via [1;96m.add_new_from("chemistry_version")[0m
[94m•[0m mapping "organism" on [3mOrganism.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Human'[0m
    [1;93m1 synonym[0m found: [1;93m"Human" → "human"[0m
    → curate synonyms via [1;96m.standardize("organism")[0m
[94m•[0m mapping "tissue" on [3mTissue.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'large_intestine'[0m
    → fix typos, remove non-existent values, or save 

False

In [12]:
sdc.standardize("assay", accessor="sample")
sdc.add_new_from("chemistry_version", accessor="sample")
sdc.standardize("organism", accessor="sample")
sdc.add_new_from("tissue", accessor="sample")
sdc.add_new_from("disease", accessor="sample")

[92m✓[0m standardized 1 synonym in "assay": [1;92m"Visium" → "Visium Spatial Gene Expression"[0m
[92m✓[0m added 1 record with [3mULabel.name[0m for "chemistry_version": 'v1'
[92m✓[0m standardized 1 synonym in "organism": [1;92m"Human" → "human"[0m
[92m✓[0m added 1 record with [3mTissue.name[0m for "tissue": 'large_intestine'
[92m✓[0m added 1 record with [3mDisease.name[0m for "disease": 'large intestine colorectal cancer'


In [13]:
sdc.validate()

[94m•[0m validating categoricals of 'sample' metadata...
[92m✓[0m "assay" is validated against [3mExperimentalFactor.name[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[92m✓[0m "organism" is validated against [3mOrganism.name[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m
[92m✓[0m "preproc_version" is validated against [3mULabel.name[0m

[94m•[0m validating categoricals in table 'table'...
[92m✓[0m "var_index" is validated against [3mGene.ensembl_gene_id[0m



True

In [14]:
description = (
    "10X, "
    + sdata.attrs["sample"]["assay"]
    + ", "
    + sdata.attrs["sample"]["organism"]
    + ", "
    + sdata.attrs["sample"]["disease"]
    + ", "
    + sdata.attrs["sample"]["tissue"]
    + ", "
    + sdata.attrs["sample"]["date"][:4]
)
sdc.save_artifact(description=description)

[34mINFO    [0m The SpatialData object is not self-contained [1m([0mi.e. it contains some elements that are Dask-backed from    
         locations outside [35m/home/icb/lea.zimmermann/.cache/lamindb/[0m[95m65485167.zarr[0m[1m)[0m. Please see the documentation of 
         `[1;35mis_self_contained[0m[1m([0m[1m)[0m` to understand the implications of working with SpatialData objects that are not     
         self-contained.                                                                                           
[34mINFO    [0m The Zarr backing store has been changed from                                                              
         [35m/lustre/groups/ml01/projects/2024_spatialdata_db/data/1000j__10X__Visium__Human__large_intestine__20220328[0m
         [35m__v1.3.0/[0m[95m1000j__10X__Visium__Human__large_intestine__20220328__v1.3.0_attrs.zarr[0m the new file path:       
         [35m/home/icb/lea.zimmermann/.cache/lamindb/[0m[95m65485167.zarr[0m  

Artifact(uid='8XBBsxnNHuWCOeOb0000', is_latest=True, description='10X, Visium, Human, large intestine colorectal cancer, large_intestine, 2022', suffix='.zarr', size=52401277, hash='TUrS6Myy91ExOsSws_zQ2A', n_objects=323, _hash_type='md5-d', _accessor='spatialdata', visibility=1, _key_is_virtual=True, storage_id=2, transform_id=63, run_id=237, created_by_id=8, created_at=2025-01-19 16:42:31 UTC)

In [4]:
ln.finish(ignore_non_consecutive=True)

[92m→[0m finished Run('XNbeIVQt') after 0d 0h 0m 12s at 2025-01-19 21:38:55 UTC
[92m→[0m go to: https://lamin.ai/scverse/spatialdata-db/transform/NDcshmgyz9vh0000
[92m→[0m if you want to update your notebook without re-running it, use `lamin save /home/icb/lea.zimmermann/projects/spatialdata-db/spatialdata-db/scripts/upload_1000j_visium.ipynb`
