# Validator demo

Currently this requires Tim's main fork.

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import lamindb as ln
import bionty as bt
import pandas as pd
import spatialdata as sd

[92m✓[0m wrote new records from public sources.yaml to /home/zeth/.lamin/bionty/versions/sources_local.yaml!

if you see this message repeatedly, run: import bionty; bionty.base.reset_sources()
[92m→[0m connected lamindb: scverse/spatialdata-db


In [3]:
sdata = sd.read_zarr("10ktp__10X__Visium__Mouse__brain__20200623__v1.1.0.zarr")

In [4]:
meta = pd.DataFrame({"provider": ["10x"], "assay": ["xenium"], "species": ["human"]})
sdata.metadata = sd.models.MetadataModel.parse(meta)

In [5]:
from spatialdata_db import SpatialDataValidator

In [6]:
sdata.tables["table"].obs

Unnamed: 0,in_tissue,array_row,array_col,spot_id,region
AAACAAGTATCTCCCA-1,1,50,102,0,Visium_Adult_Mouse_Brain
AAACAATCTACTAGCA-1,1,3,43,1,Visium_Adult_Mouse_Brain
AAACACCAATAACTGC-1,1,59,19,2,Visium_Adult_Mouse_Brain
AAACAGAGCGACTCCT-1,1,14,94,3,Visium_Adult_Mouse_Brain
AAACCGGGTAGGTACC-1,1,42,28,4,Visium_Adult_Mouse_Brain
...,...,...,...,...,...
TTGTTGTGTGTCAAGA-1,1,31,77,2697,Visium_Adult_Mouse_Brain
TTGTTTCACATCCAGG-1,1,58,42,2698,Visium_Adult_Mouse_Brain
TTGTTTCATTAGTCTA-1,1,60,30,2699,Visium_Adult_Mouse_Brain
TTGTTTCCATACAACT-1,1,45,27,2700,Visium_Adult_Mouse_Brain


In [7]:
ct_lo = bt.CellType.lookup()

In [8]:
# Add some dummy values
sdata.tables["table"].obs["celltype"] = ct_lo.perivascular_cell.name

In [9]:
sdv = SpatialDataValidator(sdata, organism="mouse")

[94m•[0m [1;93m1 non-validated values are not saved in [3mFeature.name[0m: ['provider']![0m
      → to lookup values, use lookup().columns
      → to save, run [1;93madd_new_from_columns[0m
[94m•[0m [1;93m5 non-validated values are not saved in [3mFeature.name[0m: ['spot_id', 'in_tissue', 'array_row', 'region', 'array_col']![0m
      → to lookup values, use lookup().columns
      → to save, run [1;93madd_new_from_columns[0m


In [10]:
sdv.validate()

[94m•[0m Validating [1;92mmetadata[0m.
[92m✓[0m assay is validated against [3mExperimentalFactor.name[0m
[94m•[0m Validating Anndata object with key [1;92mtable[0m
[94m•[0m mapping [3mvar_index[0m on [3mGene.ensembl_gene_id[0m
[93m![0m    found [1;93m1094[0m validated terms: [1;93m['ENSG00000166974', 'ENSG00000230409', 'ENSG00000255974', 'ENSG00000080572', 'ENSG00000005206', 'ENSG00000135423', 'ENSG00000087365', 'ENSG00000243478', 'ENSG00000155744', 'ENSG00000129351', 'ENSG00000168564', 'ENSG00000184613', 'ENSG00000183671', 'ENSG00000042980', 'ENSG00000111142', 'ENSG00000162877', 'ENSG00000144583', 'ENSG00000284622', 'ENSG00000109099', 'ENSG00000163286', 'ENSG00000114125', 'ENSG00000068394', 'ENSG00000132321', 'ENSG00000184945', 'ENSG00000125354', 'ENSG00000175229', 'ENSG00000005810', 'ENSG00000253953', 'ENSG00000217707', 'ENSG00000071991', 'ENSG00000197157', 'ENSG00000100300', 'ENSG00000125631', 'ENSG00000115866', 'ENSG00000123838', 'ENSG00000196220', 'ENSG00000

False

In [11]:
adata = sdata.tables["table"]
adata.var = adata.var.reset_index().rename(columns={'index': 'symbol', "gene_ids": "ensembl_id"}).set_index('ensembl_id')
adata.var

Unnamed: 0_level_0,symbol,feature_types,genome
ensembl_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
ENSMUSG00000051951,Xkr4,Gene Expression,mm10
ENSMUSG00000089699,Gm1992,Gene Expression,mm10
ENSMUSG00000102331,Gm19938,Gene Expression,mm10
ENSMUSG00000102343,Gm37381,Gene Expression,mm10
ENSMUSG00000025900,Rp1,Gene Expression,mm10
...,...,...,...
ENSMUSG00000095523,AC124606.1,Gene Expression,mm10
ENSMUSG00000095475,AC133095.2,Gene Expression,mm10
ENSMUSG00000094855,AC133095.1,Gene Expression,mm10
ENSMUSG00000095019,AC234645.1,Gene Expression,mm10


In [12]:
sdv = SpatialDataValidator(sdata, organism="mouse")
sdv.validate()

[94m•[0m [1;93m1 non-validated values are not saved in [3mFeature.name[0m: ['provider']![0m
      → to lookup values, use lookup().columns
      → to save, run [1;93madd_new_from_columns[0m
[94m•[0m [1;93m5 non-validated values are not saved in [3mFeature.name[0m: ['spot_id', 'in_tissue', 'array_row', 'region', 'array_col']![0m
      → to lookup values, use lookup().columns
      → to save, run [1;93madd_new_from_columns[0m
[94m•[0m Validating [1;92mmetadata[0m.
[92m✓[0m assay is validated against [3mExperimentalFactor.name[0m
[94m•[0m Validating Anndata object with key [1;92mtable[0m
[94m•[0m mapping [3mvar_index[0m on [3mGene.ensembl_gene_id[0m
[93m![0m    found [1;93m32091[0m validated terms: [1;93m['ENSMUSG00000051951', 'ENSMUSG00000089699', 'ENSMUSG00000102331', 'ENSMUSG00000102343', 'ENSMUSG00000025900', 'ENSMUSG00000025902', 'ENSMUSG00000104238', 'ENSMUSG00000104328', 'ENSMUSG00000033845', 'ENSMUSG00000025903', 'ENSMUSG00000033813', 'ENSMUS

False

In [13]:
sdata.tables["table"] = adata[:, ~adata.var_names.isin(sdv.table_validators["table"].non_validated['var_index'])]

In [14]:
sdv = SpatialDataValidator(sdata, organism="mouse")
sdv.validate()

[94m•[0m [1;93m1 non-validated values are not saved in [3mFeature.name[0m: ['provider']![0m
      → to lookup values, use lookup().columns
      → to save, run [1;93madd_new_from_columns[0m
[94m•[0m [1;93m5 non-validated values are not saved in [3mFeature.name[0m: ['spot_id', 'in_tissue', 'array_row', 'region', 'array_col']![0m
      → to lookup values, use lookup().columns
      → to save, run [1;93madd_new_from_columns[0m
[94m•[0m Validating [1;92mmetadata[0m.
[92m✓[0m assay is validated against [3mExperimentalFactor.name[0m
[94m•[0m Validating Anndata object with key [1;92mtable[0m
[94m•[0m mapping [3mvar_index[0m on [3mGene.ensembl_gene_id[0m
[93m![0m    found [1;93m32091[0m validated terms: [1;93m['ENSMUSG00000051951', 'ENSMUSG00000089699', 'ENSMUSG00000102331', 'ENSMUSG00000102343', 'ENSMUSG00000025900', 'ENSMUSG00000025902', 'ENSMUSG00000104238', 'ENSMUSG00000104328', 'ENSMUSG00000033845', 'ENSMUSG00000025903', 'ENSMUSG00000033813', 'ENSMUS

False

I'm not gonna polute the instance, but now we would save validated terms and features and if we validate again with `sdv.validate()` (same object) it would validate and return `True`.