# Run FlowSOM for pixel and cell clustering

In [4]:
%load_ext autoreload
%autoreload 2

import sparrow as sp
from sparrow.datasets import pixie_example
from sparrow.table.cell_clustering._utils import _export_to_ark_format as _export_to_ark_format_cells
from sparrow.table.pixel_clustering._cluster_intensity import _export_to_ark_format as _export_to_ark_format_pixels
from sparrow.utils._keys import ClusteringKey

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load example dataset

In [5]:
sdata_ark_analysis = pixie_example(["fov0", "fov1"])
sdata_ark_analysis

2024-06-14 10:18:53,348 - sparrow.image._manager - INFO - Writing results to layer 'raw_image_fov0'
2024-06-14 10:18:53,357 - sparrow.image._manager - INFO - Writing results to layer 'label_nuclear_fov0'
2024-06-14 10:18:53,365 - sparrow.image._manager - INFO - Writing results to layer 'label_whole_fov0'


/Users/benjaminr/.cache/huggingface/datasets/downloads/extracted/ccde124668e88cec60561ae6aa0b5dd0317fef8e1096373c8ac24b309eefd749/post_clustering


2024-06-14 10:18:53,518 - sparrow.image._manager - INFO - Writing results to layer 'raw_image_fov1'
2024-06-14 10:18:53,550 - sparrow.image._manager - INFO - Writing results to layer 'label_nuclear_fov1'
2024-06-14 10:18:53,580 - sparrow.image._manager - INFO - Writing results to layer 'label_whole_fov1'
  adata.uns[cls.ATTRS_KEY] = attr


SpatialData object
├── Images
│     ├── 'raw_image_fov0': SpatialImage[cyx] (22, 512, 512)
│     └── 'raw_image_fov1': SpatialImage[cyx] (22, 1024, 1024)
├── Labels
│     ├── 'label_nuclear_fov0': SpatialImage[yx] (512, 512)
│     ├── 'label_nuclear_fov1': SpatialImage[yx] (1024, 1024)
│     ├── 'label_whole_fov0': SpatialImage[yx] (512, 512)
│     └── 'label_whole_fov1': SpatialImage[yx] (1024, 1024)
└── Tables
      └── 'table': AnnData (1414, 22)
with coordinate systems:
    ▸ 'global', with elements:
        raw_image_fov0 (Images), raw_image_fov1 (Images), label_nuclear_fov0 (Labels), label_nuclear_fov1 (Labels), label_whole_fov0 (Labels), label_whole_fov1 (Labels)

In [6]:
channels = [
    "CD3",
    "CD4",
    "CD8",
    "CD14",
    "CD20",
    "CD31",
    "CD45",
    "CD68",
    "CD163",
    "CK17",
    "Collagen1",
    "Fibronectin",
    "ECAD",
    "HLADR",
    "SMA",
    "Vim",
]

In [7]:
sdata_ark_analysis = sp.im.pixel_clustering_preprocess(
    sdata_ark_analysis,
    img_layer=["raw_image_fov0", "raw_image_fov1"],
    output_layer=["raw_image_fov0_processed", "raw_image_fov1_processed"],
    channels=channels,
    chunks=2048,
    overwrite=True,
    sigma=2.0,
)
sdata_ark_analysis

2024-06-14 10:18:54,549 - sparrow.image._manager - INFO - Writing results to layer 'raw_image_fov0_processed'
2024-06-14 10:18:54,890 - sparrow.image._manager - INFO - Writing results to layer 'raw_image_fov1_processed'


SpatialData object
├── Images
│     ├── 'raw_image_fov0': SpatialImage[cyx] (22, 512, 512)
│     ├── 'raw_image_fov0_processed': SpatialImage[cyx] (16, 512, 512)
│     ├── 'raw_image_fov1': SpatialImage[cyx] (22, 1024, 1024)
│     └── 'raw_image_fov1_processed': SpatialImage[cyx] (16, 1024, 1024)
├── Labels
│     ├── 'label_nuclear_fov0': SpatialImage[yx] (512, 512)
│     ├── 'label_nuclear_fov1': SpatialImage[yx] (1024, 1024)
│     ├── 'label_whole_fov0': SpatialImage[yx] (512, 512)
│     └── 'label_whole_fov1': SpatialImage[yx] (1024, 1024)
└── Tables
      └── 'table': AnnData (1414, 22)
with coordinate systems:
    ▸ 'global', with elements:
        raw_image_fov0 (Images), raw_image_fov0_processed (Images), raw_image_fov1 (Images), raw_image_fov1_processed (Images), label_nuclear_fov0 (Labels), label_nuclear_fov1 (Labels), label_whole_fov0 (Labels), label_whole_fov1 (Labels)

In [8]:
sdata_ark_analysis, fsom, mapping = sp.im.flowsom(
    sdata_ark_analysis,
    img_layer=["raw_image_fov0_processed", "raw_image_fov1_processed"],
    output_layer_clusters=[
        "raw_image_fov0_flowsom_clusters",
        "raw_image_fov1_flowsom_clusters",
    ],  # we need output_cluster_layer and output_meta_cluster_layer --> these will both be labels layers
    output_layer_metaclusters=["raw_image_fov0_flowsom_metaclusters", "raw_image_fov1_flowsom_metaclusters"],
    n_clusters=20,
    random_state=111,
    chunks=512,
    overwrite=True,
)
sdata_ark_analysis

[32m2024-06-14 10:18:55.164[0m | [34m[1mDEBUG   [0m | [36mflowsom.main[0m:[36m__init__[0m:[36m84[0m - [34m[1mReading input.[0m
[32m2024-06-14 10:18:55.165[0m | [34m[1mDEBUG   [0m | [36mflowsom.main[0m:[36m__init__[0m:[36m86[0m - [34m[1mFitting model: clustering and metaclustering.[0m
[32m2024-06-14 10:18:58.077[0m | [34m[1mDEBUG   [0m | [36mflowsom.main[0m:[36m__init__[0m:[36m88[0m - [34m[1mUpdating derived values.[0m
2024-06-14 10:18:59,231 - sparrow.image._manager - INFO - Writing results to layer 'raw_image_fov0_flowsom_clusters'
2024-06-14 10:18:59,625 - sparrow.image._manager - INFO - Writing results to layer 'raw_image_fov0_flowsom_metaclusters'
2024-06-14 10:19:00,514 - sparrow.image._manager - INFO - Writing results to layer 'raw_image_fov1_flowsom_clusters'
2024-06-14 10:19:01,370 - sparrow.image._manager - INFO - Writing results to layer 'raw_image_fov1_flowsom_metaclusters'


SpatialData object
├── Images
│     ├── 'raw_image_fov0': SpatialImage[cyx] (22, 512, 512)
│     ├── 'raw_image_fov0_processed': SpatialImage[cyx] (16, 512, 512)
│     ├── 'raw_image_fov1': SpatialImage[cyx] (22, 1024, 1024)
│     └── 'raw_image_fov1_processed': SpatialImage[cyx] (16, 1024, 1024)
├── Labels
│     ├── 'label_nuclear_fov0': SpatialImage[yx] (512, 512)
│     ├── 'label_nuclear_fov1': SpatialImage[yx] (1024, 1024)
│     ├── 'label_whole_fov0': SpatialImage[yx] (512, 512)
│     ├── 'label_whole_fov1': SpatialImage[yx] (1024, 1024)
│     ├── 'raw_image_fov0_flowsom_clusters': SpatialImage[yx] (512, 512)
│     ├── 'raw_image_fov0_flowsom_metaclusters': SpatialImage[yx] (512, 512)
│     ├── 'raw_image_fov1_flowsom_clusters': SpatialImage[yx] (1024, 1024)
│     └── 'raw_image_fov1_flowsom_metaclusters': SpatialImage[yx] (1024, 1024)
└── Tables
      └── 'table': AnnData (1414, 22)
with coordinate systems:
    ▸ 'global', with elements:
        raw_image_fov0 (Images), raw_image

In [9]:
sdata_ark_analysis = sp.tb.cluster_intensity(
    sdata_ark_analysis,
    mapping=mapping,
    img_layer=["raw_image_fov0_processed", "raw_image_fov1_processed"],
    labels_layer=["raw_image_fov0_flowsom_clusters", "raw_image_fov1_flowsom_clusters"],
    # labels_layer="raw_image_fov0_flowsom_clusters",
    output_layer="counts_clusters",
    overwrite=True,
)
# Note: to visualize in napari-spatialdata, this counts_clusters table layer needs to be removed for now. See https://github.com/scverse/napari-spatialdata/issues/244
sdata_ark_analysis

  adata.obsm["spatial"] = coordinates
  adata.obsm["spatial"] = coordinates
  self._check_key(key, self.keys(), self._shared_keys)
2024-06-14 10:19:02,922 - sparrow.table._preprocess - INFO - Calculating cell size from provided labels_layer 'raw_image_fov0_flowsom_clusters'
2024-06-14 10:19:02,929 - sparrow.table._preprocess - INFO - Calculating cell size from provided labels_layer 'raw_image_fov1_flowsom_clusters'
  adata = spatialdata.models.TableModel.parse(
  self._check_key(key, self.keys(), self._shared_keys)
  self._check_key(key, self.keys(), self._shared_keys)


SpatialData object
├── Images
│     ├── 'raw_image_fov0': SpatialImage[cyx] (22, 512, 512)
│     ├── 'raw_image_fov0_processed': SpatialImage[cyx] (16, 512, 512)
│     ├── 'raw_image_fov1': SpatialImage[cyx] (22, 1024, 1024)
│     └── 'raw_image_fov1_processed': SpatialImage[cyx] (16, 1024, 1024)
├── Labels
│     ├── 'label_nuclear_fov0': SpatialImage[yx] (512, 512)
│     ├── 'label_nuclear_fov1': SpatialImage[yx] (1024, 1024)
│     ├── 'label_whole_fov0': SpatialImage[yx] (512, 512)
│     ├── 'label_whole_fov1': SpatialImage[yx] (1024, 1024)
│     ├── 'raw_image_fov0_flowsom_clusters': SpatialImage[yx] (512, 512)
│     ├── 'raw_image_fov0_flowsom_metaclusters': SpatialImage[yx] (512, 512)
│     ├── 'raw_image_fov1_flowsom_clusters': SpatialImage[yx] (1024, 1024)
│     └── 'raw_image_fov1_flowsom_metaclusters': SpatialImage[yx] (1024, 1024)
└── Tables
      ├── 'counts_clusters': AnnData (100, 16)
      └── 'table': AnnData (1414, 22)
with coordinate systems:
    ▸ 'global', with eleme

In [10]:
sdata_ark_analysis, fsom = sp.tb.flowsom(
    sdata_ark_analysis,
    labels_layer_cells=["label_whole_fov0", "label_whole_fov1"],
    labels_layer_clusters=[
        "raw_image_fov0_flowsom_metaclusters",
        "raw_image_fov1_flowsom_metaclusters",
    ],  # here you could also choose "ark_pixel_som_cluster"
    output_layer="table_cell_clustering_flowsom",
    chunks=512,
    overwrite=True,
    random_state=100,
)
sdata_ark_analysis

2024-06-14 10:19:03,234 - sparrow.table._preprocess - INFO - Calculating cell size from provided labels_layer 'label_whole_fov0'
2024-06-14 10:19:03,241 - sparrow.table._preprocess - INFO - Calculating cell size from provided labels_layer 'label_whole_fov1'
  adata = spatialdata.models.TableModel.parse(
  self._check_key(key, self.keys(), self._shared_keys)
[32m2024-06-14 10:19:03.270[0m | [34m[1mDEBUG   [0m | [36mflowsom.main[0m:[36m__init__[0m:[36m84[0m - [34m[1mReading input.[0m
[32m2024-06-14 10:19:03.271[0m | [34m[1mDEBUG   [0m | [36mflowsom.main[0m:[36m__init__[0m:[36m86[0m - [34m[1mFitting model: clustering and metaclustering.[0m
[32m2024-06-14 10:19:03.302[0m | [34m[1mDEBUG   [0m | [36mflowsom.main[0m:[36m__init__[0m:[36m88[0m - [34m[1mUpdating derived values.[0m
2024-06-14 10:19:03,514 - sparrow.table.cell_clustering._clustering - INFO - Adding mean cluster intensity to '.uns['clustering']'
2024-06-14 10:19:03,535 - sparrow.table.cell_

SpatialData object
├── Images
│     ├── 'raw_image_fov0': SpatialImage[cyx] (22, 512, 512)
│     ├── 'raw_image_fov0_processed': SpatialImage[cyx] (16, 512, 512)
│     ├── 'raw_image_fov1': SpatialImage[cyx] (22, 1024, 1024)
│     └── 'raw_image_fov1_processed': SpatialImage[cyx] (16, 1024, 1024)
├── Labels
│     ├── 'label_nuclear_fov0': SpatialImage[yx] (512, 512)
│     ├── 'label_nuclear_fov1': SpatialImage[yx] (1024, 1024)
│     ├── 'label_whole_fov0': SpatialImage[yx] (512, 512)
│     ├── 'label_whole_fov1': SpatialImage[yx] (1024, 1024)
│     ├── 'raw_image_fov0_flowsom_clusters': SpatialImage[yx] (512, 512)
│     ├── 'raw_image_fov0_flowsom_metaclusters': SpatialImage[yx] (512, 512)
│     ├── 'raw_image_fov1_flowsom_clusters': SpatialImage[yx] (1024, 1024)
│     └── 'raw_image_fov1_flowsom_metaclusters': SpatialImage[yx] (1024, 1024)
└── Tables
      ├── 'counts_clusters': AnnData (100, 16)
      ├── 'table': AnnData (1414, 22)
      └── 'table_cell_clustering_flowsom': AnnData 

In [11]:
# weighted channel average for visualization -> calculate this on the flowsom clustered matrix
sdata_ark_analysis = sp.tb.weighted_channel_expression(
    sdata_ark_analysis,
    table_layer_cell_clustering="table_cell_clustering_flowsom",
    table_layer_pixel_cluster_intensity="counts_clusters",
    output_layer="table_cell_clustering_flowsom",
    clustering_key=ClusteringKey._METACLUSTERING_KEY,
    overwrite=True,
)
sdata_ark_analysis

2024-06-14 10:19:03,666 - sparrow.table.cell_clustering._weighted_channel_expression - INFO - Adding mean over obtained cell clusters '(clustering)' of the average marker expression for each cell weighted by pixel cluster count to '.uns[ 'clustering_channels' ]' of table layer 'table_cell_clustering_flowsom'
2024-06-14 10:19:03,670 - sparrow.table.cell_clustering._weighted_channel_expression - INFO - Adding mean over obtained cell clusters '(metaclustering)' of the average marker expression for each cell weighted by pixel cluster count to '.uns[ 'metaclustering_channels' ]' of table layer 'table_cell_clustering_flowsom'
2024-06-14 10:19:03,670 - sparrow.table.cell_clustering._weighted_channel_expression - INFO - Adding average marker expression for each cell weighted by pixel cluster count to '.obs' of table layer 'table_cell_clustering_flowsom'
  self._check_key(key, self.keys(), self._shared_keys)


SpatialData object
├── Images
│     ├── 'raw_image_fov0': SpatialImage[cyx] (22, 512, 512)
│     ├── 'raw_image_fov0_processed': SpatialImage[cyx] (16, 512, 512)
│     ├── 'raw_image_fov1': SpatialImage[cyx] (22, 1024, 1024)
│     └── 'raw_image_fov1_processed': SpatialImage[cyx] (16, 1024, 1024)
├── Labels
│     ├── 'label_nuclear_fov0': SpatialImage[yx] (512, 512)
│     ├── 'label_nuclear_fov1': SpatialImage[yx] (1024, 1024)
│     ├── 'label_whole_fov0': SpatialImage[yx] (512, 512)
│     ├── 'label_whole_fov1': SpatialImage[yx] (1024, 1024)
│     ├── 'raw_image_fov0_flowsom_clusters': SpatialImage[yx] (512, 512)
│     ├── 'raw_image_fov0_flowsom_metaclusters': SpatialImage[yx] (512, 512)
│     ├── 'raw_image_fov1_flowsom_clusters': SpatialImage[yx] (1024, 1024)
│     └── 'raw_image_fov1_flowsom_metaclusters': SpatialImage[yx] (1024, 1024)
└── Tables
      ├── 'counts_clusters': AnnData (100, 16)
      ├── 'table': AnnData (1414, 22)
      └── 'table_cell_clustering_flowsom': AnnData 

In [12]:
df = _export_to_ark_format_pixels(adata=sdata_ark_analysis["counts_clusters"], output=None)
(
    df_cell_som_cluster_count_avg,
    df_cell_som_cluster_channel_avg,
    df_cell_meta_cluster_channel_avg,
) = _export_to_ark_format_cells(sdata_ark_analysis, table_layer="table_cell_clustering_flowsom", output=None)
df



channels,CD3,CD4,CD8,CD14,CD20,CD31,CD45,CD68,CD163,CK17,Collagen1,Fibronectin,ECAD,HLADR,SMA,Vim,pixel_meta_cluster,pixel_som_cluster,count
cells,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1_counts_clusters_c87db112,2.957331,4.494543,3.305711,4.343360,1.849581,5.298170,3.745769,2.057324,2.595093,1.275055,7.540906,7.779709,1.745743,1.576688,4.947849,89.713973,5,1,9050
2_counts_clusters_c87db112,9.223635,13.199765,5.344522,6.280923,10.007229,2.288556,24.313907,4.365400,6.206642,2.654662,8.120834,9.743484,4.143529,6.264831,3.418407,50.938679,5,2,9495
3_counts_clusters_c87db112,6.237812,9.367096,5.586749,12.584922,3.089530,3.213206,8.372398,6.089461,9.301675,4.037615,22.116019,14.121541,9.079312,5.893481,8.240330,34.542586,5,3,13887
4_counts_clusters_c87db112,18.534875,4.432272,49.607468,7.216803,1.551463,0.690981,17.769115,2.776675,3.575007,2.577310,29.798437,17.675432,2.835710,3.067414,6.869558,9.590902,9,4,10893
5_counts_clusters_c87db112,30.877554,3.149300,95.926183,6.370782,1.371339,0.650704,27.585160,2.047785,2.841191,2.113751,9.091455,5.126966,1.822619,3.226701,2.975826,8.591069,9,5,10411
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96_counts_clusters_c87db112,2.338454,9.962485,2.193743,62.699224,0.810325,0.677078,9.805510,3.943257,9.179419,1.048139,40.200144,11.909424,1.028891,8.213371,3.419144,4.546988,2,96,13135
97_counts_clusters_c87db112,2.924828,14.908827,2.606520,75.980420,1.187327,1.034916,13.934763,6.531429,14.983489,1.084566,12.627397,9.501719,1.691567,14.328754,5.775819,8.412790,18,97,18881
98_counts_clusters_c87db112,2.663261,14.675678,2.450740,49.446807,0.936380,0.965804,15.280870,6.356314,12.865684,1.003338,11.538158,7.572785,1.616540,45.161700,4.578395,9.472542,2,98,9606
99_counts_clusters_c87db112,2.786221,16.886792,2.544968,57.760554,1.510566,0.621739,9.873467,9.815158,48.120955,1.054896,9.604530,7.028433,1.367574,9.942413,3.265405,11.612421,8,99,10220
