In [1]:
#imports
import spatialdata
import dask_image.imread
import dask.array as da
import geopandas as gpd
import anndata as ad
import napari_spatialdata
import time

import sys, os
sys.path.append(os.path.abspath("../functions/"))
import anndata_utils



In [2]:
for package in [spatialdata, gpd, ad, napari_spatialdata]:
    print(f"{package.__name__}: {package.__version__}")

spatialdata: 0.2.5.post0
geopandas: 1.0.1
anndata: 0.11.1
napari_spatialdata: 0.5.3


In [3]:
#paths
path_to_mIF = "/Volumes/Extreme SSD/P26_BCCN/P26E07_E08_E09_SOPA_Gates/processed_images/991.ome.tif"
path_to_mask = "/Volumes/Extreme SSD/P26_BCCN/P26E07_E08_E09_SOPA_Gates/expanded-masks/991.tif"
path_to_shapes = "../data/geojsons/Primary/processed/20241129_1442_Primary991_harmonized.geojson"
path_to_imaging = "../data/imaging/991.csv"
path_to_proteomics = "../data/proteomic/20241201_170620_filtered_imputed_outerjoin_mergefirst.h5ad"

# initiate spatialdata object
sdata = spatialdata.SpatialData()

In [4]:
#parse image
image = dask_image.imread.imread(path_to_mIF)
image = image.rechunk(chunks=(1, 5000, 5000))
parsed_image = spatialdata.models.Image2DModel.parse(image)
sdata['image'] = parsed_image

[34mINFO    [0m no axes information specified in the object, setting `dims` to: [1m([0m[32m'c'[0m, [32m'y'[0m, [32m'x'[0m[1m)[0m                           


In [5]:
#parse mask
mask = dask_image.imread.imread(path_to_mask)
mask = da.squeeze(mask)
assert mask.ndim == 2
mask = mask.rechunk(chunks=(5000, 5000))
sdata['mask'] = spatialdata.models.Labels2DModel.parse(mask)

[34mINFO    [0m no axes information specified in the object, setting `dims` to: [1m([0m[32m'y'[0m, [32m'x'[0m[1m)[0m                                


In [6]:
#create polygons from mask
sdata['poly_mask'] = spatialdata.to_polygons(sdata['mask'])
# change datatype of index to int64 to match CellID from imaging table
sdata['poly_mask'].index = sdata['poly_mask'].index.astype('int64')

In [7]:
# parse shapes 
gdf = gpd.read_file(path_to_shapes)
# make column matching proteomic table the index
gdf = gdf.set_index("harmonized_class")
sdata["Primary_contours"] = spatialdata.models.ShapesModel.parse(gdf)

In [8]:
#load proteomic table
adata = ad.read_h5ad(path_to_proteomics)
adata.obs["region"] = "Primary_contours"
adata.obs["region"] = adata.obs["region"].astype("category")
table = spatialdata.models.TableModel.parse(adata, region = "Primary_contours", region_key = "region", instance_key = "shape_key")
sdata["proteomics"] = table
sdata['proteomics'] = anndata_utils.switch_adat_var_index(sdata['proteomics'], "Genes")
sdata['proteomics'].var.sort_index(inplace=True) #much easier to find a gene like this

  self._check_key(key, self.keys(), self._shared_keys)


In [9]:
#load imaging table
adata = anndata_utils.read_quant(path_to_imaging)
#labels and cells ids should start at 1, 0 is reserved for background
adata.obs['CellID'] = adata.obs['CellID'] + 1
adata.obs.index = adata.obs.index.astype('int64')
adata.obs.index = adata.obs.index + 1
adata.obs["region"] = "poly_mask"
adata.obs["region"] = adata.obs["region"].astype("category")
table = spatialdata.models.TableModel.parse(adata, region = "poly_mask", region_key = "region", instance_key = "CellID")
sdata["imaging"] = table

[32m2024-12-09 15:25:55.616[0m | [1mINFO    [0m | [36manndata_utils[0m:[36mread_quant[0m:[36m13[0m - [1m ---- read_quant : version number 1.1.0 ----[0m
[32m2024-12-09 15:25:58.844[0m | [1mINFO    [0m | [36manndata_utils[0m:[36mread_quant[0m:[36m33[0m - [1m 685805 cells and 75 variables[0m
[32m2024-12-09 15:25:58.845[0m | [1mINFO    [0m | [36manndata_utils[0m:[36mread_quant[0m:[36m34[0m - [1m ---- read_quant is done, took 3s  ----[0m


In [10]:
datetime = time.strftime("%Y%m%d_%H%M")
sdata.write(f"../data/sdata/{datetime}_sdata_full.zarr")

[34mINFO    [0m The Zarr backing store has been changed from [3;35mNone[0m the new file path:                                      
         ..[35m/data/sdata/[0m[95m20241209_1525_sdata_full.zarr[0m                                                               


In [11]:
import filtering
sdata['imaging'] = filtering.filter_by_abs_value(
    adata = sdata['imaging'],
    marker = "mean_DAPI_bg",
    value=200,
    direction="above"
)

[32m2024-12-09 15:40:18.094[0m | [1mINFO    [0m | [36mfiltering[0m:[36mfilter_by_abs_value[0m:[36m36[0m - [1m ---- filter_by_abs_value : version number 1.1.0 ----[0m
[32m2024-12-09 15:40:18.429[0m | [1mINFO    [0m | [36mfiltering[0m:[36mfilter_by_abs_value[0m:[36m70[0m - [1mNumber of cells with mean_DAPI_bg above 200: 638060[0m
[32m2024-12-09 15:40:18.430[0m | [1mINFO    [0m | [36mfiltering[0m:[36mfilter_by_abs_value[0m:[36m85[0m - [1m ---- filter_by_abs_value is done, took 0s  ----[0m
  self._check_key(key, self.keys(), self._shared_keys)


In [12]:
import qupath_utils
import importlib
importlib.reload(qupath_utils)

qupath_utils.sdata_to_qupath_detections(
    sdata=sdata,
    key_to_shapes="poly_mask",
    export_path=f"../data/geojsons/{datetime}_qupath_detections_justint64.geojson",
    table_key="imaging",
    classify_by="mean_DAPI_bg_above_200",
)

[32m2024-12-09 15:40:19.589[0m | [1mINFO    [0m | [36mqupath_utils[0m:[36msdata_to_qupath_detections[0m:[36m35[0m - [1mConverting poly_mask geodataframe to detections[0m
[32m2024-12-09 15:40:19.613[0m | [1mINFO    [0m | [36mqupath_utils[0m:[36msdata_to_qupath_detections[0m:[36m58[0m - [1mCheck of inputs completed, starting conversion to detections[0m
[32m2024-12-09 15:40:19.615[0m | [1mINFO    [0m | [36mqupath_utils[0m:[36msdata_to_qupath_detections[0m:[36m69[0m - [1mNaming detections as cellID[0m
[32m2024-12-09 15:40:19.757[0m | [1mINFO    [0m | [36mqupath_utils[0m:[36msdata_to_qupath_detections[0m:[36m73[0m - [1mLabeling geometries as detections, for smooth viewing in QuPath[0m
[32m2024-12-09 15:40:19.762[0m | [1mINFO    [0m | [36mqupath_utils[0m:[36msdata_to_qupath_detections[0m:[36m77[0m - [1mClassifying detections by mean_DAPI_bg_above_200[0m
[32m2024-12-09 15:40:19.769[0m | [1mINFO    [0m | [36mqupath_utils[0m:[36

# Indexing works now :) 

Summary:
    If the indexing column share same dtype and same order it works fine, I changed both index and CellID, because sometimes I use index.