In [1]:
#imports
import spatialdata
import dask_image.imread
import dask.array as da
import geopandas as gpd
import anndata as ad
import napari_spatialdata
import time

import sys, os
sys.path.append(os.path.abspath("../functions/"))
import anndata_utils



In [2]:
for package in [spatialdata, gpd, ad, napari_spatialdata]:
    print(f"{package.__name__}: {package.__version__}")

spatialdata: 0.2.5.post0
geopandas: 1.0.1
anndata: 0.11.1
napari_spatialdata: 0.5.3


# Create a sdata object for testing

In [4]:
#paths


In [4]:
# initiate spatialdata object
sdata = spatialdata.SpatialData()

In [5]:
#parse image
image = dask_image.imread.imread(path_to_mIF)
image = image.rechunk(chunks=(1, 5000, 5000))
parsed_image = spatialdata.models.Image2DModel.parse(image)
sdata['image'] = parsed_image

[34mINFO    [0m no axes information specified in the object, setting `dims` to: [1m([0m[32m'c'[0m, [32m'y'[0m, [32m'x'[0m[1m)[0m                           


In [6]:
#parse mask
mask = dask_image.imread.imread(path_to_mask)
mask = da.squeeze(mask)
assert mask.ndim == 2
mask = mask.rechunk(chunks=(5000, 5000))
sdata['mask'] = spatialdata.models.Labels2DModel.parse(mask)

[34mINFO    [0m no axes information specified in the object, setting `dims` to: [1m([0m[32m'y'[0m, [32m'x'[0m[1m)[0m                                


In [None]:
# parse shapes 
gdf = gpd.read_file(path_to_shapes)
# make column matching proteomic table the index
gdf = gdf.set_index("harmonized_class")
sdata["Primary_contours"] = spatialdata.models.ShapesModel.parse(gdf)

In [8]:
def switch_adat_var_index(adata, new_index):
    """
    Created by Jose Nimo on 2023-07-01
    Lastest modified by Jose Nimo on 2024-11-16

    Description:
    Switch the index of adata.var to a new index. Useful for switching between gene names and protein names.

    Arg:
        adata: anndata object
        new_index: pandas series, new index to switch to
    Returns:
        adata: anndata object, with the new index
    """
    adata_copy = adata.copy()

    adata_copy.var[adata_copy.var.index.name] = adata_copy.var.index
    adata_copy.var.set_index(new_index, inplace=True)
    adata_copy.var.index.name = new_index
    
    return adata_copy

In [9]:
#load proteomic table
adata = ad.read_h5ad(path_to_proteomics)
adata.obs["region"] = "Primary_contours"
adata.obs["region"] = adata.obs["region"].astype("category")
table = spatialdata.models.TableModel.parse(adata, region = "Primary_contours", region_key = "region", instance_key = "shape_key")
sdata["proteomics"] = table
sdata['proteomics'] = switch_adat_var_index(sdata['proteomics'], "Genes")
sdata['proteomics'].var.sort_index(inplace=True) #much easier to find a gene like this

  self._check_key(key, self.keys(), self._shared_keys)


In [None]:
#load imaging table
adata = anndata_utils.read_quant(path_to_imaging)
adata.obs["shapes"] = "mask"
adata.obs["shapes"] = adata.obs["shapes"].astype("category")
table = spatialdata.models.TableModel.parse(adata, region = "mask", region_key = "shapes", instance_key = "CellID")
sdata["imaging"] = table

[32m2024-12-08 20:26:18.183[0m | [1mINFO    [0m | [36manndata_utils[0m:[36mread_quant[0m:[36m13[0m - [1m ---- read_quant : version number 1.1.0 ----[0m
[32m2024-12-08 20:26:21.716[0m | [1mINFO    [0m | [36manndata_utils[0m:[36mread_quant[0m:[36m33[0m - [1m 685805 cells and 75 variables[0m
[32m2024-12-08 20:26:21.717[0m | [1mINFO    [0m | [36manndata_utils[0m:[36mread_quant[0m:[36m34[0m - [1m ---- read_quant is done, took 3s  ----[0m


In [6]:
adata.obs

Unnamed: 0,CellID,Y_centroid,X_centroid,Area,MajorAxisLength,MinorAxisLength,Eccentricity,Orientation,Extent,Solidity,shapes
0,0,29410.806452,35874.857801,1519.0,98.696620,30.646839,0.950568,-1.524977,228.651804,0.676615,mask
1,1,29445.080408,36162.243140,1567.0,50.422862,39.687556,0.616832,0.160002,148.710678,0.975109,mask
2,2,29505.171707,38455.235122,1025.0,40.280587,34.195207,0.528513,-0.346644,131.438600,0.927602,mask
3,3,29539.721673,35060.844867,1315.0,61.049935,28.208869,0.886848,-1.447088,151.254834,0.947406,mask
4,4,29982.642779,36765.660558,3656.0,84.178897,57.384893,0.731630,0.251203,243.379726,0.926743,mask
...,...,...,...,...,...,...,...,...,...,...,...
685800,685800,40389.067593,46087.962963,1080.0,56.372404,25.352581,0.893163,1.002754,141.610173,0.961710,mask
685801,685801,40382.629182,47716.017658,1076.0,41.856049,34.076776,0.580666,1.330143,129.396970,0.951370,mask
685802,685802,40382.072917,48436.798177,768.0,33.256837,29.969709,0.433488,0.983058,105.254834,0.957606,mask
685803,685803,40383.786948,45011.021113,1042.0,45.051210,31.924524,0.705583,1.218499,133.189863,0.964815,mask


In [11]:
import filtering
sdata['imaging']= filtering.filter_by_abs_value(adata=sdata['imaging'], marker='mean_DAPI_bg', value=200, direction='above', plot=False)

[32m2024-12-03 19:48:15.530[0m | [1mINFO    [0m | [36mfiltering[0m:[36mfilter_by_abs_value[0m:[36m36[0m - [1m ---- filter_by_abs_value : version number 1.1.0 ----[0m
[32m2024-12-03 19:48:15.641[0m | [1mINFO    [0m | [36mfiltering[0m:[36mfilter_by_abs_value[0m:[36m70[0m - [1mNumber of cells with mean_DAPI_bg above 200: 638060[0m
[32m2024-12-03 19:48:15.642[0m | [1mINFO    [0m | [36mfiltering[0m:[36mfilter_by_abs_value[0m:[36m85[0m - [1m ---- filter_by_abs_value is done, took 0s  ----[0m
  self._check_key(key, self.keys(), self._shared_keys)


import napari_spatialdata
interactive = napari_spatialdata.Interactive(sdata)
interactive.run()

#save the spatialdata object
import time
datetime = time.strftime("%Y%m%d_%H%M")
time_start = time.time()
sdata.write(f"../data/sdata/{datetime}_sdata_fullres.zarr")
time_end = time.time()
print(f"Time to save: {time_end - time_start}")

In [13]:
sdata['imaging'].obs

Unnamed: 0,CellID,Y_centroid,X_centroid,Area,MajorAxisLength,MinorAxisLength,Eccentricity,Orientation,Extent,Solidity,shapes,mean_DAPI_bg_above_200
0,0,29410.806452,35874.857801,1519.0,98.696620,30.646839,0.950568,-1.524977,228.651804,0.676615,mask,True
1,1,29445.080408,36162.243140,1567.0,50.422862,39.687556,0.616832,0.160002,148.710678,0.975109,mask,True
2,2,29505.171707,38455.235122,1025.0,40.280587,34.195207,0.528513,-0.346644,131.438600,0.927602,mask,True
3,3,29539.721673,35060.844867,1315.0,61.049935,28.208869,0.886848,-1.447088,151.254834,0.947406,mask,True
4,4,29982.642779,36765.660558,3656.0,84.178897,57.384893,0.731630,0.251203,243.379726,0.926743,mask,True
...,...,...,...,...,...,...,...,...,...,...,...,...
685800,685800,40389.067593,46087.962963,1080.0,56.372404,25.352581,0.893163,1.002754,141.610173,0.961710,mask,True
685801,685801,40382.629182,47716.017658,1076.0,41.856049,34.076776,0.580666,1.330143,129.396970,0.951370,mask,True
685802,685802,40382.072917,48436.798177,768.0,33.256837,29.969709,0.433488,0.983058,105.254834,0.957606,mask,True
685803,685803,40383.786948,45011.021113,1042.0,45.051210,31.924524,0.705583,1.218499,133.189863,0.964815,mask,True


In [12]:
import napari_spatialdata
interactive = napari_spatialdata.Interactive(sdata)
interactive.run()

  utils.warn_names_duplicates("var")
[32m2024-12-03 19:48:33.111[0m | [1mINFO    [0m | [36mnapari_spatialdata._view[0m:[36m_on_layer_update[0m:[36m355[0m - [1mUpdating layer.[0m
[32m2024-12-03 19:48:33.112[0m | [1mINFO    [0m | [36mnapari_spatialdata._view[0m:[36m_on_layer_update[0m:[36m355[0m - [1mUpdating layer.[0m
[32m2024-12-03 19:50:45.282[0m | [1mINFO    [0m | [36mnapari_spatialdata._view[0m:[36m_on_layer_update[0m:[36m355[0m - [1mUpdating layer.[0m
[32m2024-12-03 19:50:45.285[0m | [1mINFO    [0m | [36mnapari_spatialdata._view[0m:[36m_on_layer_update[0m:[36m355[0m - [1mUpdating layer.[0m
[32m2024-12-03 19:51:16.668[0m | [1mINFO    [0m | [36mnapari_spatialdata._view[0m:[36m_on_layer_update[0m:[36m355[0m - [1mUpdating layer.[0m
