In [1]:
%load_ext autoreload
%autoreload 2

# Upload 10X data

In [2]:
import os

import warnings
warnings.filterwarnings("ignore")

import spatialdata as sd
import pandas as pd
from lamin_spatial import SpatialDataCurator

import lamindb as ln

import bionty as bt

from lnschema_core import ULabel

from spatialdata_db import SpatialDataDBCurator

[92m→[0m connected lamindb: scverse/spatialdata-db


In [3]:
DATA_PATH = '/lustre/groups/ml01/projects/2024_spatialdata_db/data'

In [4]:
def get_curator(uid):
    folder_path = None
    for folder in os.listdir(DATA_PATH):
        if folder.startswith(str(uid)):
            folder_path = os.path.join(DATA_PATH, folder)
            break

    if folder_path is None:
        print(f"Folder for UID {uid} not found.")
        return None

    for filename in os.listdir(folder_path):
        if "attrs" in filename:
            file_path = os.path.join(folder_path, filename)    

    sdata = sd.read_zarr(file_path)

    for table in sdata.tables.keys():
        sdata.tables[table].var['gene_symbols'] = sdata.tables[table].var.index
        sdata.tables[table].var.index = sdata.tables[table].var['gene_ids'].values

    tables = {table_key: bt.Gene.ensembl_gene_id for table_key in sdata.tables.keys()}

    sdc = SpatialDataDBCurator(sdata, 
        var_index=tables,
        organism=sdata.attrs['sample']['organism'].lower())

    return sdc, sdata

In [5]:
ln.track("UK1W3xXjulcs0000")

[92m→[0m loaded Transform('UK1W3xXj'), re-started Run('JfFRdZLa') at 2024-12-16 17:55:18 UTC
[92m→[0m notebook imports: bionty==0.53.2 lamin_spatial==0.1.0 lamindb==0.77.2 lnschema_core==0.77.1 pandas==2.2.3 spatialdata-db==0.0.1 spatialdata==0.2.7.dev14+gc07c363


In [None]:
sdc = SpatialDataDBCurator(
    sdata, 
    var_index=tables,
    organism='human')

In [11]:
sdc.validate()

[94m•[0m validating categoricals of 'sample' metadata...
[92m✓[0m "assay" is validated against [3mExperimentalFactor.name[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[94m•[0m mapping "organism" on [3mOrganism.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Mouse'[0m
    [1;93m1 synonym[0m found: [1;93m"Mouse" → "mouse"[0m
    → curate synonyms via [1;96m.standardize("organism")[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m
[92m✓[0m "preproc_version" is validated against [3mULabel.name[0m

[94m•[0m validating categoricals in table 'square_008um'...
[92m✓[0m "var_index" is validated against [3mGene.ensembl_gene_id[0m

[94m•[0m validating categoricals in table 'square_002um'...
[92m✓[0m "var_index" is validated against [3mGene.ensembl_gene_id[0m

[94m•[0m validating 

False

In [12]:
sdc.standardize("organism", accessor='sample')

[92m✓[0m standardized 1 synonym in "organism": [1;92m"Mouse" → "mouse"[0m


In [13]:
sdc.validate()

[94m•[0m validating categoricals of 'sample' metadata...
[92m✓[0m "assay" is validated against [3mExperimentalFactor.name[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[92m✓[0m "organism" is validated against [3mOrganism.name[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m
[92m✓[0m "preproc_version" is validated against [3mULabel.name[0m

[94m•[0m validating categoricals in table 'square_008um'...
[92m✓[0m "var_index" is validated against [3mGene.ensembl_gene_id[0m

[94m•[0m validating categoricals in table 'square_002um'...
[92m✓[0m "var_index" is validated against [3mGene.ensembl_gene_id[0m

[94m•[0m validating categoricals in table 'square_016um'...
[92m✓[0m "var_index" is validated against [3mGene.ensembl_gene_id[0m



True

In [14]:
artifact = sdc.save_artifact(description="10X, VisiumHD, Mouse, Healthy, Brain, 2024")

[34mINFO    [0m The SpatialData object is not self-contained [1m([0mi.e. it contains some elements that are Dask-backed from    
         locations outside [35m/home/icb/lea.zimmermann/.cache/lamindb/[0m[95m62167371.zarr[0m[1m)[0m. Please see the documentation of 
         `[1;35mis_self_contained[0m[1m([0m[1m)[0m` to understand the implications of working with SpatialData objects that are not     
         self-contained.                                                                                           
[34mINFO    [0m The Zarr backing store has been changed from                                                              
         [35m/lustre/groups/ml01/projects/2024_spatialdata_db/data/108x2__10X__VisiumHD__Mouse__brain__20240329__v3.0.0[0m
         [35m/[0m[95m108x2__10X__VisiumHD__Mouse__brain__20240329__v3.0.0_attrs.zarr[0m the new file path:                       
         [35m/home/icb/lea.zimmermann/.cache/lamindb/[0m[95m62167371.zarr[0m  

In [42]:
sdc, sdata = get_curator('10nsr')

In [None]:
sdc = SpatialDataDBCurator(sdata, var_index=tables, organism='human')

In [43]:
sdc.validate()

[94m•[0m validating categoricals of 'sample' metadata...
[94m•[0m mapping "assay" on [3mExperimentalFactor.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Xenium'[0m
    [1;93m1 synonym[0m found: [1;93m"Xenium" → "10x Xenium"[0m
    → curate synonyms via [1;96m.standardize("assay")[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[94m•[0m mapping "organism" on [3mOrganism.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Human'[0m
    [1;93m1 synonym[0m found: [1;93m"Human" → "human"[0m
    → curate synonyms via [1;96m.standardize("organism")[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[94m•[0m mapping "disease" on [3mDisease.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'acute lymphoid leukemia'[0m
    [1;93m1 synonym[0m found: [1;93m"acute lymphoid leukemia" → "acute lymphoblastic leukemia"[0m
    → curate synonyms via [1;96m.standardize("disease")[0m
[9

False

In [44]:
sdc.standardize("assay", accessor='sample')
sdc.standardize("organism", accessor='sample')
sdc.standardize("disease", accessor='sample')

[92m✓[0m standardized 1 synonym in "assay": [1;92m"Xenium" → "10x Xenium"[0m
[92m✓[0m standardized 1 synonym in "organism": [1;92m"Human" → "human"[0m
[92m✓[0m standardized 1 synonym in "disease": [1;92m"acute lymphoid leukemia" → "acute lymphoblastic leukemia"[0m


In [45]:
sdc.validate()

[94m•[0m validating categoricals of 'sample' metadata...
[92m✓[0m "assay" is validated against [3mExperimentalFactor.name[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[92m✓[0m "organism" is validated against [3mOrganism.name[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m
[92m✓[0m "preproc_version" is validated against [3mULabel.name[0m

[94m•[0m validating categoricals in table 'table'...
[92m✓[0m "var_index" is validated against [3mGene.ensembl_gene_id[0m



True

In [48]:
description = '10X, '+ sdata.attrs['sample']['assay']+ ', '+ sdata.attrs['sample']['organism']+ ', '+sdata.attrs['sample']['disease']+', '+ sdata.attrs['sample']['tissue']+ ', '+ sdata.attrs['sample']['date'][:4]

In [50]:
sdc.save_artifact(description=description)

[34mINFO    [0m The SpatialData object is not self-contained [1m([0mi.e. it contains some elements that are Dask-backed from    
         locations outside [35m/home/icb/lea.zimmermann/.cache/lamindb/[0m[95m59939931.zarr[0m[1m)[0m. Please see the documentation of 
         `[1;35mis_self_contained[0m[1m([0m[1m)[0m` to understand the implications of working with SpatialData objects that are not     
         self-contained.                                                                                           
[34mINFO    [0m The Zarr backing store has been changed from                                                              
         [35m/lustre/groups/ml01/projects/2024_spatialdata_db/data/10nsr__10X__Xenium__Human__bone_bone_marrow__2024040[0m
         [35m3__v1.9.0__AcuteLymphoidLeukemiaBoneMarrow/[0m[95m10nsr__10X__Xenium__Human__bone_bone_marrow__20240403__v1.9.0__[0m
         [95mAcuteLymphoidLeukemiaBoneMarrow_metadata_attrs.zarr[0m the new file

Artifact(uid='5UbbJ6cR0WzaBDHB0000', is_latest=True, description='10X, Xenium, Human, acute lymphoid leukemia, bonemarrow, 2024', suffix='.zarr', size=2490544230, hash='JBdHTZJUvNltQNjFB8UdcQ', n_objects=598, _hash_type='md5-d', _accessor='spatialdata', visibility=1, _key_is_virtual=True, storage_id=2, transform_id=56, run_id=197, created_by_id=8, created_at=2024-12-16 10:27:24 UTC)

In [62]:
sdc, sdata = get_curator('10u63')
sdc.validate()

         [4;94mhttps://github.com/scverse/spatialdata/discussions/657[0m for a solution. Otherwise, please correct the radii
         of the circles before calling the parser function.                                                        
         [4;94mhttps://github.com/scverse/spatialdata/discussions/657[0m for a solution. Otherwise, please correct the radii
         of the circles before calling the parser function.                                                        
[94m•[0m validating categoricals of 'sample' metadata...
[94m•[0m mapping "assay" on [3mExperimentalFactor.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Xenium'[0m
    [1;93m1 synonym[0m found: [1;93m"Xenium" → "10x Xenium"[0m
    → curate synonyms via [1;96m.standardize("assay")[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[94m•[0m mapping "organism" on [3mOrganism.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Human'[0m
   

False

In [63]:
sdc.standardize("assay", accessor='sample')
sdc.standardize("organism", accessor='sample')

[92m✓[0m standardized 1 synonym in "assay": [1;92m"Xenium" → "10x Xenium"[0m
[92m✓[0m standardized 1 synonym in "organism": [1;92m"Human" → "human"[0m


In [64]:
sdc.validate()
description = '10X, '+ sdata.attrs['sample']['assay']+ ', '+ sdata.attrs['sample']['organism']+ ', '+sdata.attrs['sample']['disease']+', '+ sdata.attrs['sample']['tissue']+ ', '+ sdata.attrs['sample']['date'][:4]
sdc.save_artifact(description=description)

[94m•[0m validating categoricals of 'sample' metadata...
[92m✓[0m "assay" is validated against [3mExperimentalFactor.name[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[92m✓[0m "organism" is validated against [3mOrganism.name[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m
[92m✓[0m "preproc_version" is validated against [3mULabel.name[0m

[94m•[0m validating categoricals in table 'table'...
[92m✓[0m "var_index" is validated against [3mGene.ensembl_gene_id[0m

[34mINFO    [0m The SpatialData object is not self-contained [1m([0mi.e. it contains some elements that are Dask-backed from    
         locations outside [35m/home/icb/lea.zimmermann/.cache/lamindb/[0m[95m29495382.zarr[0m[1m)[0m. Please see the documentation of 
         `[1;35mis_self_contained[0m[1m([0m[1m)[0m` to understand

Artifact(uid='fVvF7TCAlJfKsWgo0000', is_latest=True, description='10X, Xenium, Human, glioblastoma, brain, 2024', suffix='.zarr', size=25439820101, hash='Y7XqV167fo0hN6xNsRQSWw', n_objects=2426, _hash_type='md5-d', _accessor='spatialdata', visibility=1, _key_is_virtual=True, storage_id=2, transform_id=56, run_id=197, created_by_id=8, created_at=2024-12-16 10:48:23 UTC)

In [65]:
sdc, sdata = get_curator('10wmu')
sdc.validate()

[94m•[0m saving validated records of 'tissue'
[94m•[0m validating categoricals of 'sample' metadata...
[94m•[0m mapping "assay" on [3mExperimentalFactor.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Xenium'[0m
    [1;93m1 synonym[0m found: [1;93m"Xenium" → "10x Xenium"[0m
    → curate synonyms via [1;96m.standardize("assay")[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[94m•[0m mapping "organism" on [3mOrganism.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Human'[0m
    [1;93m1 synonym[0m found: [1;93m"Human" → "human"[0m
    → curate synonyms via [1;96m.standardize("organism")[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m
[92m✓[0m "preproc_version" is validated against [3mULabel.name[0m

[94m•[0m validating categoricals in table 'table'...
[92m✓

False

In [66]:
sdc.standardize("assay", accessor='sample')
sdc.standardize("organism", accessor='sample')

[92m✓[0m standardized 1 synonym in "assay": [1;92m"Xenium" → "10x Xenium"[0m
[92m✓[0m standardized 1 synonym in "organism": [1;92m"Human" → "human"[0m


In [67]:
sdc.validate()
description = '10X, '+ sdata.attrs['sample']['assay']+ ', '+ sdata.attrs['sample']['organism']+ ', '+sdata.attrs['sample']['disease']+', '+ sdata.attrs['sample']['tissue']+ ', '+ sdata.attrs['sample']['date'][:4]
sdc.save_artifact(description=description)

[94m•[0m validating categoricals of 'sample' metadata...
[92m✓[0m "assay" is validated against [3mExperimentalFactor.name[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[92m✓[0m "organism" is validated against [3mOrganism.name[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m
[92m✓[0m "preproc_version" is validated against [3mULabel.name[0m

[94m•[0m validating categoricals in table 'table'...
[92m✓[0m "var_index" is validated against [3mGene.ensembl_gene_id[0m

[34mINFO    [0m The SpatialData object is not self-contained [1m([0mi.e. it contains some elements that are Dask-backed from    
         locations outside [35m/home/icb/lea.zimmermann/.cache/lamindb/[0m[95m84319053.zarr[0m[1m)[0m. Please see the documentation of 
         `[1;35mis_self_contained[0m[1m([0m[1m)[0m` to understand

Artifact(uid='AcJ31iZl6KgZR7BO0000', is_latest=True, description='10X, Xenium, Human, healthy, heart, 2024', suffix='.zarr', size=1483994224, hash='f7BgRVX9kNxvnSpm4BrOMA', n_objects=368, _hash_type='md5-d', _accessor='spatialdata', visibility=1, _key_is_virtual=True, storage_id=2, transform_id=56, run_id=197, created_by_id=8, created_at=2024-12-16 10:56:05 UTC)

In [68]:
sdc, sdata = get_curator('10tsy')
sdc.validate()

[94m•[0m saving validated records of 'var_index'
[94m•[0m validating categoricals of 'sample' metadata...
[94m•[0m mapping "assay" on [3mExperimentalFactor.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Xenium'[0m
    [1;93m1 synonym[0m found: [1;93m"Xenium" → "10x Xenium"[0m
    → curate synonyms via [1;96m.standardize("assay")[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[94m•[0m mapping "organism" on [3mOrganism.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Human'[0m
    [1;93m1 synonym[0m found: [1;93m"Human" → "human"[0m
    → curate synonyms via [1;96m.standardize("organism")[0m
[94m•[0m mapping "tissue" on [3mTissue.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'intestine_colon'[0m
    → fix typos, remove non-existent values, or save terms via [1;96m.add_new_from("tissue")[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is v

False

In [69]:
sdc.standardize("assay", accessor='sample')
sdc.standardize("organism", accessor='sample')
sdc.add_new_from("tissue", accessor='sample')

[92m✓[0m standardized 1 synonym in "assay": [1;92m"Xenium" → "10x Xenium"[0m
[92m✓[0m standardized 1 synonym in "organism": [1;92m"Human" → "human"[0m
[92m✓[0m added 1 record with [3mTissue.name[0m for "tissue": 'intestine_colon'


In [70]:
sdc.validate()
description = '10X, '+ sdata.attrs['sample']['assay']+ ', '+ sdata.attrs['sample']['organism']+ ', '+sdata.attrs['sample']['disease']+', '+ sdata.attrs['sample']['tissue']+ ', '+ sdata.attrs['sample']['date'][:4]
sdc.save_artifact(description=description)

[94m•[0m validating categoricals of 'sample' metadata...
[92m✓[0m "assay" is validated against [3mExperimentalFactor.name[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[92m✓[0m "organism" is validated against [3mOrganism.name[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m
[92m✓[0m "preproc_version" is validated against [3mULabel.name[0m

[94m•[0m validating categoricals in table 'table'...
[92m✓[0m "var_index" is validated against [3mGene.ensembl_gene_id[0m

[34mINFO    [0m The SpatialData object is not self-contained [1m([0mi.e. it contains some elements that are Dask-backed from    
         locations outside [35m/home/icb/lea.zimmermann/.cache/lamindb/[0m[95m46536717.zarr[0m[1m)[0m. Please see the documentation of 
         `[1;35mis_self_contained[0m[1m([0m[1m)[0m` to understand

Artifact(uid='J6HsBBMXCJ8amco80000', is_latest=True, description='10X, Xenium, Human, cancer, intestine_colon, 2023', suffix='.zarr', size=5129800776, hash='AyFqQ-0ipr-sgDzUuEErmg', n_objects=786, _hash_type='md5-d', _accessor='spatialdata', visibility=1, _key_is_virtual=True, storage_id=2, transform_id=56, run_id=197, created_by_id=8, created_at=2024-12-16 10:59:12 UTC)

In [6]:
sdc, sdata = get_curator('10k5p')
sdc.validate()

[94m•[0m validating categoricals of 'sample' metadata...
[94m•[0m mapping "assay" on [3mExperimentalFactor.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Visium'[0m
    [1;93m1 synonym[0m found: [1;93m"Visium" → "Visium Spatial Gene Expression"[0m
    → curate synonyms via [1;96m.standardize("assay")[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[94m•[0m mapping "organism" on [3mOrganism.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Human'[0m
    [1;93m1 synonym[0m found: [1;93m"Human" → "human"[0m
    → curate synonyms via [1;96m.standardize("organism")[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m
[92m✓[0m "preproc_version" is validated against [3mULabel.name[0m

[94m•[0m validating categoricals in table 'table'...
[92m✓[0m "var_index" is validate

False

In [10]:
sdc.standardize("assay", accessor='sample')
sdc.standardize("organism", accessor='sample')

[93m![0m values are already standardized
[93m![0m values are already standardized


In [11]:
sdc.validate()
description = '10X, '+ sdata.attrs['sample']['assay']+ ', '+ sdata.attrs['sample']['organism']+ ', '+sdata.attrs['sample']['disease']+', '+ sdata.attrs['sample']['tissue']+ ', '+ sdata.attrs['sample']['date'][:4]
sdc.save_artifact(description=description)

[94m•[0m validating categoricals of 'sample' metadata...
[92m✓[0m "assay" is validated against [3mExperimentalFactor.name[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[92m✓[0m "organism" is validated against [3mOrganism.name[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m
[92m✓[0m "preproc_version" is validated against [3mULabel.name[0m

[94m•[0m validating categoricals in table 'table'...
[92m✓[0m "var_index" is validated against [3mGene.ensembl_gene_id[0m

[34mINFO    [0m The SpatialData object is not self-contained [1m([0mi.e. it contains some elements that are Dask-backed from    
         locations outside [35m/home/icb/lea.zimmermann/.cache/lamindb/[0m[95m26615312.zarr[0m[1m)[0m. Please see the documentation of 
         `[1;35mis_self_contained[0m[1m([0m[1m)[0m` to understand

Artifact(uid='Co2Bwl9TymNlnv9c0000', is_latest=True, description='10X, Visium, Human, breast cancer, breast, 2022', suffix='.zarr', size=53442529, hash='Z05oc7m9alFGiMo1l33DOQ', n_objects=311, _hash_type='md5-d', _accessor='spatialdata', visibility=1, _key_is_virtual=True, storage_id=2, transform_id=56, run_id=197, created_by_id=8, created_at=2024-12-16 15:04:08 UTC)

In [74]:
sdc, sdata = get_curator('10f2z')
sdc.validate()

[94m•[0m validating categoricals of 'sample' metadata...
[94m•[0m mapping "assay" on [3mExperimentalFactor.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Visium'[0m
    [1;93m1 synonym[0m found: [1;93m"Visium" → "Visium Spatial Gene Expression"[0m
    → curate synonyms via [1;96m.standardize("assay")[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[94m•[0m mapping "organism" on [3mOrganism.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Human'[0m
    [1;93m1 synonym[0m found: [1;93m"Human" → "human"[0m
    → curate synonyms via [1;96m.standardize("organism")[0m
[94m•[0m mapping "tissue" on [3mTissue.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'prostate'[0m
    [1;93m1 synonym[0m found: [1;93m"prostate" → "prostate gland"[0m
    → curate synonyms via [1;96m.standardize("tissue")[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is vali

False

In [75]:
sdc.standardize("assay", accessor='sample')
sdc.standardize("organism", accessor='sample')
sdc.standardize("tissue", accessor='sample')

[92m✓[0m standardized 1 synonym in "assay": [1;92m"Visium" → "Visium Spatial Gene Expression"[0m
[92m✓[0m standardized 1 synonym in "organism": [1;92m"Human" → "human"[0m
[92m✓[0m standardized 1 synonym in "tissue": [1;92m"prostate" → "prostate gland"[0m


In [76]:
sdc.validate()
description = '10X, '+ sdata.attrs['sample']['assay']+ ', '+ sdata.attrs['sample']['organism']+ ', '+sdata.attrs['sample']['disease']+', '+ sdata.attrs['sample']['tissue']+ ', '+ sdata.attrs['sample']['date'][:4]
sdc.save_artifact(description=description)

[94m•[0m validating categoricals of 'sample' metadata...
[92m✓[0m "assay" is validated against [3mExperimentalFactor.name[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[92m✓[0m "organism" is validated against [3mOrganism.name[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m
[92m✓[0m "preproc_version" is validated against [3mULabel.name[0m

[94m•[0m validating categoricals in table 'table'...
[92m✓[0m "var_index" is validated against [3mGene.ensembl_gene_id[0m

[34mINFO    [0m The SpatialData object is not self-contained [1m([0mi.e. it contains some elements that are Dask-backed from    
         locations outside [35m/home/icb/lea.zimmermann/.cache/lamindb/[0m[95m83643060.zarr[0m[1m)[0m. Please see the documentation of 
         `[1;35mis_self_contained[0m[1m([0m[1m)[0m` to understand

Artifact(uid='8mjX2vs8KhTS3Y4c0000', is_latest=True, description='10X, Visium, Human, prostate cancer, prostate, 2021', suffix='.zarr', size=60838635, hash='TNg5OjUQowqBR_B0komdBQ', n_objects=326, _hash_type='md5-d', _accessor='spatialdata', visibility=1, _key_is_virtual=True, storage_id=2, transform_id=56, run_id=197, created_by_id=8, created_at=2024-12-16 11:05:27 UTC)

In [7]:
sdc, sdata = get_curator('10zvc')
sdc.validate()

[94m•[0m validating categoricals of 'sample' metadata...
[94m•[0m mapping "assay" on [3mExperimentalFactor.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Visium'[0m
    [1;93m1 synonym[0m found: [1;93m"Visium" → "Visium Spatial Gene Expression"[0m
    → curate synonyms via [1;96m.standardize("assay")[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[94m•[0m mapping "organism" on [3mOrganism.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Mouse'[0m
    [1;93m1 synonym[0m found: [1;93m"Mouse" → "mouse"[0m
    → curate synonyms via [1;96m.standardize("organism")[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m
[92m✓[0m "preproc_version" is validated against [3mULabel.name[0m

[94m•[0m validating categoricals in table 'table'...
[94m•[0m mapping "var_index" on 

False

In [14]:
sdc.standardize("assay", accessor='sample')
sdc.standardize("organism", accessor='sample')
# sdc.add_new_from_var_index(table='table')

[92m✓[0m standardized 1 synonym in "assay": [1;92m"Visium" → "Visium Spatial Gene Expression"[0m
[92m✓[0m standardized 1 synonym in "organism": [1;92m"Mouse" → "mouse"[0m


In [15]:
sdc.validate()

[94m•[0m validating categoricals of 'sample' metadata...
[92m✓[0m "assay" is validated against [3mExperimentalFactor.name[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[92m✓[0m "organism" is validated against [3mOrganism.name[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m
[92m✓[0m "preproc_version" is validated against [3mULabel.name[0m

[94m•[0m validating categoricals in table 'table'...
[94m•[0m mapping "var_index" on [3mGene.ensembl_gene_id[0m
[93m![0m   [1;91m181 terms[0m are not validated: [1;91m'ENSMUSG00000022591', 'ENSMUSG00000094127', 'ENSMUSG00000066936', 'ENSMUSG00000116275', 'ENSMUSG00000102049', 'ENSMUSG00000104367', 'ENSMUSG00000091312', 'ENSMUSG00000087062', 'ENSMUSG00000079353', 'ENSMUSG00000096240', 'ENSMUSG00000079286', 'ENSMUSG00000085431', 'ENSMUSG00000075015', 'ENSMUSG000

False

In [16]:
description = '10X, '+ sdata.attrs['sample']['assay']+ ', '+ sdata.attrs['sample']['organism']+ ', '+sdata.attrs['sample']['disease']+', '+ sdata.attrs['sample']['tissue']+ ', '+ sdata.attrs['sample']['date'][:4]
sdc.save_artifact(description=description)

[94m•[0m validating categoricals of 'sample' metadata...
[92m✓[0m "assay" is validated against [3mExperimentalFactor.name[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[92m✓[0m "organism" is validated against [3mOrganism.name[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[92m✓[0m "disease" is validated against [3mDisease.name[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m
[92m✓[0m "preproc_version" is validated against [3mULabel.name[0m

[94m•[0m validating categoricals in table 'table'...
[94m•[0m mapping "var_index" on [3mGene.ensembl_gene_id[0m
[93m![0m   [1;91m181 terms[0m are not validated: [1;91m'ENSMUSG00000022591', 'ENSMUSG00000094127', 'ENSMUSG00000066936', 'ENSMUSG00000116275', 'ENSMUSG00000102049', 'ENSMUSG00000104367', 'ENSMUSG00000091312', 'ENSMUSG00000087062', 'ENSMUSG00000079353', 'ENSMUSG00000096240', 'ENSMUSG00000079286', 'ENSMUSG00000085431', 'ENSMUSG00000075015', 'ENSMUSG000

ValidationError: Dataset does not validate. Please curate.

In [None]:
sdc.validate()
description = '10X, '+ sdata.attrs['sample']['assay']+ ', '+ sdata.attrs['sample']['organism']+ ', '+sdata.attrs['sample']['disease']+', '+ sdata.attrs['sample']['tissue']+ ', '+ sdata.attrs['sample']['date'][:4]
sdc.save_artifact(description=description)

In [None]:
sdc, sdata = get_curator('10fn9')
sdc.validate()

[94m•[0m validating categoricals of 'sample' metadata...
[94m•[0m mapping "assay" on [3mExperimentalFactor.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Visium'[0m
    [1;93m1 synonym[0m found: [1;93m"Visium" → "Visium Spatial Gene Expression"[0m
    → curate synonyms via [1;96m.standardize("assay")[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[94m•[0m mapping "organism" on [3mOrganism.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Mouse'[0m
    [1;93m1 synonym[0m found: [1;93m"Mouse" → "mouse"[0m
    → curate synonyms via [1;96m.standardize("organism")[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[94m•[0m mapping "disease" on [3mDisease.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'unknown'[0m
    → fix typos, remove non-existent values, or save terms via [1;96m.add_new_from("disease")[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m


False

In [None]:
sdc.standardize("assay", accessor='sample')
sdc.standardize("organism", accessor='sample')
sdc.add_new_from_var_index(table='table')

[92m✓[0m standardized 1 synonym in "assay": [1;92m"Visium" → "Visium Spatial Gene Expression"[0m
[92m✓[0m standardized 1 synonym in "organism": [1;92m"Mouse" → "mouse"[0m


IntegrityError: duplicate key value violates unique constraint "lnschema_bionty_gene_uid_key"
DETAIL:  Key (uid)=(6KCoBil6asUg) already exists.


In [None]:
sdc.validate()
description = '10X, '+ sdata.attrs['sample']['assay']+ ', '+ sdata.attrs['sample']['organism']+ ', '+sdata.attrs['sample']['disease']+', '+ sdata.attrs['sample']['tissue']+ ', '+ sdata.attrs['sample']['date'][:4]
sdc.save_artifact(description=description)

In [None]:
sdc, sdata = get_curator('10jff')
sdc.validate()

[94m•[0m validating categoricals of 'sample' metadata...
[94m•[0m mapping "assay" on [3mExperimentalFactor.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Visium'[0m
    [1;93m1 synonym[0m found: [1;93m"Visium" → "Visium Spatial Gene Expression"[0m
    → curate synonyms via [1;96m.standardize("assay")[0m
[92m✓[0m "chemistry_version" is validated against [3mULabel.name[0m
[94m•[0m mapping "organism" on [3mOrganism.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'Mouse'[0m
    [1;93m1 synonym[0m found: [1;93m"Mouse" → "mouse"[0m
    → curate synonyms via [1;96m.standardize("organism")[0m
[92m✓[0m "tissue" is validated against [3mTissue.name[0m
[94m•[0m mapping "disease" on [3mDisease.name[0m
[93m![0m   [1;91m1 term[0m is not validated: [1;91m'unknown'[0m
    → fix typos, remove non-existent values, or save terms via [1;96m.add_new_from("disease")[0m
[92m✓[0m "license" is validated against [3mULabel.name[0m


False

In [None]:
sdc.standardize("assay", accessor='sample')
sdc.standardize("organism", accessor='sample')

In [None]:
sdc.validate()
description = '10X, '+ sdata.attrs['sample']['assay']+ ', '+ sdata.attrs['sample']['organism']+ ', '+sdata.attrs['sample']['disease']+', '+ sdata.attrs['sample']['tissue']+ ', '+ sdata.attrs['sample']['date'][:4]
sdc.save_artifact(description=description)