# Add SpatialData

- Convert the cohort of fovs and labels into several spatialdata objets, one `spatialdata` object per FOV.
- Add metadata to the `spatialdata` objects and save them to LaminDB as artifacts.

## Setup


In [None]:
import bionty as bt
import lamindb as ln
from distributed import Client, LocalCluster
from upath import UPath

import nbl

cluster = LocalCluster(n_workers=10)
client = Client(cluster)

In [None]:
client

In [3]:
ln.settings.sync_git_repo = "https://github.com/karadavis-lab/nbl.git"

In [None]:
ln.track(project="Neuroblastoma")

In [5]:
bt.settings.organism = "human"

In [6]:
raw_data_dir = UPath("../../data/raw/nbl_cohort")

fov_dir = raw_data_dir / "images"
label_dir = raw_data_dir / "segmentation" / "labels"

In [None]:
hu_sdatas = nbl.io.convert_cohort(
    fov_dir=fov_dir,
    label_dir=label_dir,
    filter_fovs=r"Hu-*",
    return_collection=False,
)

In [None]:
nbl_sdatas = nbl.io.convert_cohort(
    fov_dir=fov_dir,
    label_dir=label_dir,
    filter_fovs=r"NBL-*",
    return_collection=False,
)

In [11]:
nbl_sdata = nbl_sdatas["NBL-1-R5C8"]

# ln.Artifact.from_spatialdata(hu_sdata, key="control/Hu-Adrenal-Medulla-R5C12.zarr")

In [9]:
clinical_data = ln.Artifact.filter(ln.Q(key__contains="clinical_data")).one().load()

In [None]:
nbl_a = ln.Artifact.from_spatialdata(nbl_sdata, key="NBL-1-R5C8.zarr")

In [None]:
clinical_data[clinical_data["FOV"] == "NBL-1-R5C8"]

In [None]:
ln.curators.SpatialDataCurator

In [None]:
for _, fov_row in clinical_data.iterrows():
    # fov = fov_row["FOV"]
    fov_name = fov_row["FOV"]
    fov_sdata = nbl_sdatas[fov_name]

    fov_sdata_artifact = ln.Artifact.from_spatialdata(fov_sdata, key=f"nbl_sdatas/{fov_name}.zarr")
    fov_sdata_artifact.save()

    for f in (features := ln.Feature.lookup()):
        match f:
            case ln.Feature():
                feature_name = f.name
            case _:
                continue
        feature_value = fov_row[feature_name]
        feature = ln.Feature.filter(ln.Q(name=feature_name)).one()
        fov_sdata_artifact.labels.add(records=ln.ULabel.filter(ln.Q(name=feature_value)), feature=feature)

    # nbl_a.labels.add(records = ln.ULabel(name=feature_name, value=feature_value), feature=f)

[34mINFO    [0m The Zarr backing store has been changed from                                                              
         [35m/Users/srivarra/Library/Caches/lamindb/[0m[95mgnKmLGXiaUgQswQR0000.zarr[0m the new file path:                       
         [35m/Users/srivarra/Library/Caches/lamindb/[0m[95mr1eG0oZIhYpPCCs00000.zarr[0m                                          
[92m→[0m returning existing artifact with same hash: Artifact(uid='gnKmLGXiaUgQswQR0000', is_latest=True, key='nbl_sdatas/NBL-1-R5C8.zarr', suffix='.zarr', kind='dataset', otype='SpatialData', size=71691210, hash='lKpzDK0t_3EiRojqioa4BA', n_files=64, space_id=1, storage_id=1, run_id=3, created_by_id=1, created_at=2025-05-03 19:48:05 UTC); to track this artifact as an input, use: ln.Artifact.get()
[34mINFO    [0m The Zarr backing store has been changed from                                                              
         [35m/Users/srivarra/Library/Caches/lamindb/[0m[95mZiOctB0cidVaHwQI00

In [63]:
ln.Artifact.filter(ln.Q(key__contains="nbl_sdatas/NBL-7-R13C12.zarr")).latest_version().one().describe()

In [27]:
ln.ULabel.filter(ln.Q(name=feature_value))

[1m<[0m[1;95mQuerySet[0m[39m [0m[1;39m[[0m[1;35mULabel[0m[1;39m([0m[33muid[0m[39m=[0m[32m'obzUy9nz'[0m[39m, [0m[33mname[0m[39m=[0m[32m'NBL-1-R5C8'[0m[39m, [0m[33mis_type[0m[39m=[0m[3;91mFalse[0m[39m, [0m[33mspace_id[0m[39m=[0m[1;36m1[0m[39m, [0m[33mcreated_by_id[0m[39m=[0m[1;36m1[0m[39m, [0m[33mrun_id[0m[39m=[0m[1;36m2[0m[39m, [0m[33mcreated_at[0m[39m=[0m[1;36m2025[0m[39m-[0m[1;36m04[0m[39m-[0m[1;36m29[0m[39m [0m[1;92m21:31:43[0m[39m UTC[0m[1;39m)[0m[1;39m][0m[1m>[0m

In [47]:
ln.Artifact.filter(ln.Q(key__contains="nbl_sdatas"))

[1m<[0m[1;95mQuerySet[0m[39m [0m[1;39m[[0m[1;35mArtifact[0m[1;39m([0m[33muid[0m[39m=[0m[32m'gnKmLGXiaUgQswQR0000'[0m[39m, [0m[33mis_latest[0m[39m=[0m[3;92mTrue[0m[39m, [0m[33mkey[0m[39m=[0m[32m'nbl_sdatas/NBL-1-R5C8.zarr'[0m[39m, [0m[33msuffix[0m[39m=[0m[32m'.zarr'[0m[39m, [0m[33mkind[0m[39m=[0m[32m'dataset'[0m[39m, [0m[33motype[0m[39m=[0m[32m'SpatialData'[0m[39m, [0m[33msize[0m[39m=[0m[1;36m71691210[0m[39m, [0m[33mhash[0m[39m=[0m[32m'lKpzDK0t_3EiRojqioa4BA'[0m[39m, [0m[33mn_files[0m[39m=[0m[1;36m64[0m[39m, [0m[33mspace_id[0m[39m=[0m[1;36m1[0m[39m, [0m[33mstorage_id[0m[39m=[0m[1;36m1[0m[39m, [0m[33mrun_id[0m[39m=[0m[1;36m3[0m[39m, [0m[33mcreated_by_id[0m[39m=[0m[1;36m1[0m[39m, [0m[33mcreated_at[0m[39m=[0m[1;36m2025[0m[39m-[0m[1;36m05[0m[39m-[0m[1;36m03[0m[39m [0m[1;92m19:48:05[0m[39m UTC[0m[1;39m)[0m[39m, [0m[1;35mArtifact[0m[1;39m([0m[33muid

In [55]:
ln.Collection(ln.Artifact.filter(ln.Q(key__contains="nbl_sdatas")))

FieldValidationError: ['key'] are required.

In [60]:
ln.Collection(artifacts=ln.Artifact.filter(ln.Q(key__contains="nbl_sdatas")).latest_version(), key="nbl fovs")

[1;35mCollection[0m[1m([0m[33muid[0m=[32m'wsvEs3A9IwuleT2J0000'[0m, [33mis_latest[0m=[3;92mTrue[0m, [33mkey[0m=[32m'nbl fovs'[0m, [33mhash[0m=[32m'5g9CmyIyoXquM5TCKik6Kw'[0m, [33mspace_id[0m=[1;36m1[0m, [33mcreated_by_id[0m=[1;36m1[0m, [33mrun_id[0m=[1;36m3[0m, [33mcreated_at[0m=[1m<[0m[1;95mdjango.db.models.expressions.DatabaseDefault[0m[39m object at [0m[1;36m0x3695ef290[0m[1m>[0m[1m)[0m