In [12]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
import sparrow as sp
from sparrow.datasets import pixie_example
from sparrow.table.cell_clustering._utils import _export_to_ark_format as _export_to_ark_format_cells
from sparrow.table.pixel_clustering._cluster_intensity import _export_to_ark_format as _export_to_ark_format_pixels
from sparrow.utils._keys import ClusteringKey

sdata_ark_analysis = pixie_example(["fov0", "fov1"])

channels = [
    "CD3",
    "CD4",
    "CD8",
    "CD14",
    "CD20",
    "CD31",
    "CD45",
    "CD68",
    "CD163",
    "CK17",
    "Collagen1",
    "Fibronectin",
    "ECAD",
    "HLADR",
    "SMA",
    "Vim",
]

sdata_ark_analysis = sp.im.pixel_clustering_preprocess(
    sdata_ark_analysis,
    img_layer=["raw_image_fov0", "raw_image_fov1"],
    output_layer=["raw_image_fov0_processed", "raw_image_fov1_processed"],
    channels=channels,
    chunks=2048,
    overwrite=True,
    sigma=2.0,
)


sdata_ark_analysis, fsom, mapping = sp.im.flowsom(
    sdata_ark_analysis,
    img_layer=["raw_image_fov0_processed", "raw_image_fov1_processed"],
    output_layer_clusters=[
        "raw_image_fov0_flowsom_clusters",
        "raw_image_fov1_flowsom_clusters",
    ],  # we need output_cluster_layer and output_meta_cluster_layer --> these will both be labels layers
    output_layer_metaclusters=["raw_image_fov0_flowsom_metaclusters", "raw_image_fov1_flowsom_metaclusters"],
    n_clusters=20,
    random_state=111,
    chunks=512,
    overwrite=True,
)

sdata_ark_analysis = sp.tb.cluster_intensity(
    sdata_ark_analysis,
    mapping=mapping,
    img_layer=["raw_image_fov0_processed", "raw_image_fov1_processed"],
    labels_layer=["raw_image_fov0_flowsom_clusters", "raw_image_fov1_flowsom_clusters"],
    # labels_layer="raw_image_fov0_flowsom_clusters",
    output_layer="counts_clusters",
    overwrite=True,
)
# TODO: to visualize in napari spatialdata, this counts_clusters table layer needs to be removed from the sdata, because napari-spatialdata does not support viewing sdata that contain tables not linked to element.

sdata_ark_analysis, fsom = sp.tb.flowsom(
    sdata_ark_analysis,
    labels_layer_cells=["label_whole_fov0", "label_whole_fov1"],
    labels_layer_clusters=[
        "raw_image_fov0_flowsom_metaclusters",
        "raw_image_fov1_flowsom_metaclusters",
    ],  # here you could also choose "ark_pixel_som_cluster"
    output_layer="table_cell_clustering_flowsom",
    chunks=512,
    overwrite=True,
    random_state=100,
)


# weighted channel average for visualization -> calculate this on the flowsom clustered matrix
sdata_ark_analysis = sp.tb.weighted_channel_expression(
    sdata_ark_analysis,
    table_layer_cell_clustering="table_cell_clustering_flowsom",
    table_layer_pixel_cluster_intensity="counts_clusters",
    output_layer="table_cell_clustering_flowsom",
    clustering_key=ClusteringKey._METACLUSTERING_KEY,
    overwrite=True,
)


df = _export_to_ark_format_pixels(adata=sdata_ark_analysis["counts_clusters"], output=None)

(
    df_cell_som_cluster_count_avg,
    df_cell_som_cluster_channel_avg,
    df_cell_meta_cluster_channel_avg,
) = _export_to_ark_format_cells(sdata_ark_analysis, table_layer="table_cell_clustering_flowsom", output=None)

2024-06-06 16:33:59,174 - sparrow.image._manager - INFO - Writing results to layer 'raw_image_fov0'
2024-06-06 16:33:59,182 - sparrow.image._manager - INFO - Writing results to layer 'label_nuclear_fov0'
2024-06-06 16:33:59,193 - sparrow.image._manager - INFO - Writing results to layer 'label_whole_fov0'


/Users/benjaminr/.cache/huggingface/datasets/downloads/extracted/ccde124668e88cec60561ae6aa0b5dd0317fef8e1096373c8ac24b309eefd749/post_clustering


2024-06-06 16:33:59,338 - sparrow.image._manager - INFO - Writing results to layer 'raw_image_fov1'
2024-06-06 16:33:59,356 - sparrow.image._manager - INFO - Writing results to layer 'label_nuclear_fov1'
2024-06-06 16:33:59,366 - sparrow.image._manager - INFO - Writing results to layer 'label_whole_fov1'
  adata.uns[cls.ATTRS_KEY] = attr
2024-06-06 16:33:59,942 - sparrow.image._manager - INFO - Writing results to layer 'raw_image_fov0_processed'
2024-06-06 16:34:00,217 - sparrow.image._manager - INFO - Writing results to layer 'raw_image_fov1_processed'
[32m2024-06-06 16:34:00.355[0m | [34m[1mDEBUG   [0m | [36mflowsom.main[0m:[36m__init__[0m:[36m84[0m - [34m[1mReading input.[0m
[32m2024-06-06 16:34:00.356[0m | [34m[1mDEBUG   [0m | [36mflowsom.main[0m:[36m__init__[0m:[36m86[0m - [34m[1mFitting model: clustering and metaclustering.[0m
[32m2024-06-06 16:34:01.889[0m | [34m[1mDEBUG   [0m | [36mflowsom.main[0m:[36m__init__[0m:[36m88[0m - [34m[1mUpdat