# Run FlowSOM for pixel and cell clustering

In [None]:
%load_ext autoreload
%autoreload 2

import harpy as hp
from harpy.datasets import pixie_example
from harpy.utils._keys import ClusteringKey

## Load example dataset

In [None]:
sdata_ark_analysis = pixie_example(["fov0", "fov1"])
sdata_ark_analysis

In [16]:
channels = [
    "CD3",
    "CD4",
    "CD8",
    "CD14",
    "CD20",
    "CD31",
    "CD45",
    "CD68",
    "CD163",
    "CK17",
    "Collagen1",
    "Fibronectin",
    "ECAD",
    "HLADR",
    "SMA",
    "Vim",
]

## Preprocess

In [None]:
sdata_ark_analysis = hp.im.pixel_clustering_preprocess(
    sdata_ark_analysis,
    img_layer=["raw_image_fov0", "raw_image_fov1"],
    output_layer=["raw_image_fov0_processed", "raw_image_fov1_processed"],
    channels=channels,
    chunks=2048,
    persist_intermediate=True, # set to False if you have multiple images, and if they are large.
    overwrite=True,
    sigma=2.0,
)
sdata_ark_analysis

## Pixel clustering

In [None]:
import flowsom as fs
from dask.distributed import Client, LocalCluster

work_with_client = False

if work_with_client:
    # client example
    cluster = LocalCluster(
        n_workers=1,
        threads_per_worker=10,
    )

    client = Client(cluster)
else:
    client = None

batch_model=fs.models.BatchFlowSOMEstimator

sdata_ark_analysis, fsom, mapping = hp.im.flowsom(
    sdata_ark_analysis,
    img_layer=[ "raw_image_fov0_processed", "raw_image_fov1_processed" ],
    output_layer_clusters=[
        "raw_image_fov0_flowsom_clusters",
        "raw_image_fov1_flowsom_clusters",
    ],  # we need output_cluster_layer and output_meta_cluster_layer --> these will both be labels layers
    output_layer_metaclusters=[
        "raw_image_fov0_flowsom_metaclusters",
        "raw_image_fov1_flowsom_metaclusters",
        ],
    n_clusters=20,
    random_state=111,
    chunks=512,
    client = client,
    model = batch_model,
    num_batches = 10,
    xdim=10,
    ydim=10,
    z_score=True,
    z_cap=3,
    persist_intermediate=True,
    overwrite=True,
)
sdata_ark_analysis

In [None]:
sdata_ark_analysis = hp.tb.cluster_intensity(
    sdata_ark_analysis,
    mapping=mapping,
    img_layer=["raw_image_fov0_processed", "raw_image_fov1_processed"],
    labels_layer=["raw_image_fov0_flowsom_clusters", "raw_image_fov1_flowsom_clusters"],
    to_coordinate_system=[ "fov0", "fov1" ],
    output_layer="counts_clusters",
    overwrite=True,
)
sdata_ark_analysis

## Visualization of pixel clusters and metaclusters.

In [None]:
hp.pl.pixel_clusters(
    sdata_ark_analysis,
    labels_layer="raw_image_fov0_flowsom_metaclusters",
    figsize=(5, 5),
    to_coordinate_system="fov0",
    render_labels_kwargs={ "alpha":1 }
)

## Heatmap of channel intensity per cluster and metacluster

In [None]:
for _metaclusters in [True, False]:
    hp.pl.pixel_clusters_heatmap(
        sdata_ark_analysis,
        table_layer="counts_clusters",
        figsize=(8, 8),
        fig_kwargs={"dpi": 100},
        linewidths=0.001,
        metaclusters=_metaclusters,
        z_score=True,
    )

## Cell clustering

In [None]:
batch_model=fs.models.BatchFlowSOMEstimator

sdata_ark_analysis, fsom = hp.tb.flowsom(
    sdata_ark_analysis,
    labels_layer_cells=["label_whole_fov0", "label_whole_fov1"],
    labels_layer_clusters=[
        "raw_image_fov0_flowsom_metaclusters",
        "raw_image_fov1_flowsom_metaclusters",
    ],  # here you could also choose "ark_pixel_som_cluster"
    output_layer="table_cell_clustering_flowsom",
    chunks=512,
    model = batch_model,
    num_batches = 10,
    random_state=100,
    overwrite=True,
)
sdata_ark_analysis

## Optional export to a .csv format that can be used for visualization using the ark gui

In [None]:
# weighted channel average for visualization -> calculate this on the flowsom clustered matrix
sdata_ark_analysis = hp.tb.weighted_channel_expression(
    sdata_ark_analysis,
    table_layer_cell_clustering="table_cell_clustering_flowsom",
    table_layer_pixel_cluster_intensity="counts_clusters",
    output_layer="table_cell_clustering_flowsom",
    clustering_key=ClusteringKey._METACLUSTERING_KEY,
    overwrite=True,
)
sdata_ark_analysis

In [None]:
from harpy.table.cell_clustering._utils import _export_to_ark_format as _export_to_ark_format_cells
from harpy.table.pixel_clustering._cluster_intensity import _export_to_ark_format as _export_to_ark_format_pixels

df = _export_to_ark_format_pixels(adata=sdata_ark_analysis["counts_clusters"], output=None)
(
    df_cell_som_cluster_count_avg,
    df_cell_som_cluster_channel_avg,
    df_cell_meta_cluster_channel_avg,
) = _export_to_ark_format_cells(sdata_ark_analysis, table_layer="table_cell_clustering_flowsom", output=None)
df

In [None]:
df_cell_meta_cluster_channel_avg

In [None]:
# "table_cell_clustering_flowsom" is annotated by segmentation masks, so they can be visualised using napari-spatialdata
sdata_ark_analysis[ "table_cell_clustering_flowsom" ].uns[ "spatialdata_attrs" ]

#from napari_spatialdata import Interactive

#Interactive(sdata_ark_analysis)