# Case study: Samusik bone marrow CyTOF data

This notebook generates results for a case study that uses a [mass cytometry (CyTOF) mouse bone marrow dataset](https://pubmed.ncbi.nlm.nih.gov/27183440/).

This is basically just to illustrate integration with FlowSOM and how the MST topology can be verified using ViVAE embeddings.

<hr>

## **0.** Load required modules

In [1]:
import warnings
warnings.filterwarnings('ignore')
import os
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.collections as mc
from matplotlib.patches import Circle, Wedge
import ViVAE
import torch
import flowsom as fs

## **1.** Load input data

See the ViScore [benchmarking](https://github.com/saeyslab/ViScore/tree/main/benchmarking) page for instructions on how to obtain and pre-process the Farrell data.

In [2]:
dataset = 'Samusik'

ff       = fs.io.read_FCS(os.path.join('..', 'cytometry_data', f'{dataset}.fcs'))
col_idcs = np.arange(8, 47) # column indices of markers of interest
ff       = ff[:,col_idcs]
cofactor = 5.
for channel in range(ff.shape[1]):
    ff[:,channel].X = np.arcsinh(ff[:,channel].X/cofactor)
exprs   = np.asarray(ff.X) # extracted expression data

labels   = np.load(os.path.join('..', 'cytometry_data', f'{dataset}_annot.npy'), allow_pickle=True)

knn     = ViVAE.make_knn(x=exprs, fname=os.path.join('..', 'cytometry_data', f'{dataset}_knn.npy')) # already exists, load
exprs_d = ViVAE.smooth(exprs, knn, k=100, coef=1., n_iter=1)

Loading k-NNG


## **2.** Generate embedding

In [7]:
np.random.seed(1)
torch.manual_seed(1)

model_vivae = ViVAE.ViVAE(input_dim=exprs.shape[1], latent_dim=2, random_state=1)
model_vivae.fit(exprs_d, n_epochs=50, batch_size=1024, lam_recon=1., lam_kldiv=1., lam_mds=10.)

emb_vivae = model_vivae.transform(exprs_d)

Epoch 1/50	recon: 74.0045	kldiv: 0.5907	geom: 0.0000	egeom: 0.0000	mds: 104.0324
Epoch 2/50	recon: 15.0145	kldiv: 0.4939	geom: 0.0000	egeom: 0.0000	mds: 26.2824
Epoch 3/50	recon: 7.2328	kldiv: 0.4441	geom: 0.0000	egeom: 0.0000	mds: 12.9351
Epoch 4/50	recon: 3.5494	kldiv: 0.3569	geom: 0.0000	egeom: 0.0000	mds: 6.3052
Epoch 5/50	recon: 2.2324	kldiv: 0.2494	geom: 0.0000	egeom: 0.0000	mds: 4.8055
Epoch 6/50	recon: 1.6301	kldiv: 0.1986	geom: 0.0000	egeom: 0.0000	mds: 3.9126
Epoch 7/50	recon: 1.2371	kldiv: 0.1634	geom: 0.0000	egeom: 0.0000	mds: 3.3438
Epoch 8/50	recon: 0.9840	kldiv: 0.1385	geom: 0.0000	egeom: 0.0000	mds: 2.9039
Epoch 9/50	recon: 0.8216	kldiv: 0.1208	geom: 0.0000	egeom: 0.0000	mds: 2.5983
Epoch 10/50	recon: 0.6976	kldiv: 0.1066	geom: 0.0000	egeom: 0.0000	mds: 2.3153
Epoch 11/50	recon: 0.6042	kldiv: 0.0945	geom: 0.0000	egeom: 0.0000	mds: 2.0699
Epoch 12/50	recon: 0.5330	kldiv: 0.0856	geom: 0.0000	egeom: 0.0000	mds: 1.9136
Epoch 13/50	recon: 0.4718	kldiv: 0.0783	geom: 0.0000	eg

## **3.** Train FlowSOM model

In [8]:
fsom = fs.FlowSOM(ff, n_clusters=40, xdim=10, ydim=10, seed=42)

[32m2024-09-03 16:43:53.721[0m | [34m[1mDEBUG   [0m | [36mflowsom.main[0m:[36m__init__[0m:[36m84[0m - [34m[1mReading input.[0m
[32m2024-09-03 16:43:53.722[0m | [34m[1mDEBUG   [0m | [36mflowsom.main[0m:[36m__init__[0m:[36m86[0m - [34m[1mFitting model: clustering and metaclustering.[0m
[32m2024-09-03 16:43:57.030[0m | [34m[1mDEBUG   [0m | [36mflowsom.main[0m:[36m__init__[0m:[36m88[0m - [34m[1mUpdating derived values.[0m


## **4.** Plot embedding without and with FlowSOM tree overlay

In [20]:
palette = ['#726ca6','#8ff56b','#79d0f9','#fba56a','#eefc85','#aeaead','#6e85ff','#b97671','#dbbafd','#6bb277','#b7fbce','#6af1b0','#b26ae7','#fb6c98','#fdc4b8','#c1c86c','#699dc0','#d889c1','#a89ef4','#95d598','#757469','#78fefe','#f1f7c3','#b2ddfb','#cad9a3','#9b9f69','#aa7caa','#74c7c0','#face7e','#fe9cdb','#ce9f81','#bafc85','#fdd5f1','#e97f6c','#8d89d7','#839095','#d68ef9','#a5d1ca','#d1fcfe','#6eaaee','#f799a1','#d7b2c8','#70d87a','#99faa9','#6b70d4','#dbd8d2','#fb77c9','#88f4d2','#d17a98','#90b2d3','#aafef7','#debc9d','#d2e96a','#96c96a','#8c6ff5','#927286','#7cff8e','#80b19e','#adbcfa','#d86fdd','#aee276','#eee1a5','#feb6fe','#996dc9','#b699cc','#ad908d','#76946b','#d2fea8','#a7b883','#b881fe','#69e7da','#92e8f5','#b5b6d4','#dadcfa','#bf6cbe','#9199b6','#70d79f','#6afd6e','#dcb26c','#d69fae','#b5eab1','#fce96a','#6987aa','#8dadfc','#938afd','#c7ebe4','#de6a7f','#938669','#c4cce8','#e36daf','#e8f1e8','#86e1b6','#ff6b69','#ed9ffc','#87d7d6','#feb58d','#b96a93','#dcd189','#adc9a7']
mpl.rcParams['axes.linewidth'] = 0.1
pops = np.unique(labels)
fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(6, 3), dpi=150, sharey=True)
fig.subplots_adjust(wspace=.1)
s = .01
idcs_unass = np.where(labels=='unassigned')[0]

ax[0].scatter(emb_vivae[idcs_unass,0], emb_vivae[idcs_unass,1], s=s, label='unassigned', c='grey', alpha=1., linewidth=.5)

for i, pop in enumerate(pops):
    if pop!='unassigned':
        idcs = np.where(labels==pop)[0]
        ax[0].scatter(emb_vivae[idcs,0], emb_vivae[idcs,1], s=s, label=pop, c=palette[i], alpha=1., linewidth=.5)
ax[0].tick_params(axis='both', labelsize=5)
ax[0].axis('equal')
ax[0].set_title('', size=7, ha='left', x=-.03, y=.98)
l = fig.legend(bbox_to_anchor=(1.13, .92), fontsize=5, markerscale=50.)

fsom_node_scale = 0.004
fsom_edge_scale = 0.5

ax[1].scatter(emb_vivae[idcs_unass,0], emb_vivae[idcs_unass,1], s=s, label='unassigned', c='grey', alpha=.5, linewidth=.5)

## Determine FlowSOM tree node sizes

cluster_sizes = fsom.get_cluster_data().obs['percentages']
cluster_empty = cluster_sizes==0.
ranges = np.ptp(emb_vivae, axis=0)
maxsize = np.min(ranges)*fsom_node_scale
node_sizes = np.sqrt(np.multiply((np.divide(cluster_sizes, np.max(cluster_sizes))), maxsize))
node_sizes[cluster_empty] = min([0.05, node_sizes.max()])
## Get embedding of cluster centroids
centroids = fsom.get_cluster_data().obsm['codes']
layout = model_vivae.transform(centroids)
edge_list = fsom.get_cluster_data().uns['graph'].get_edgelist()
segment_plot = [
    (layout[nodeID[0], 0],
        layout[nodeID[0], 1],
        layout[nodeID[1], 0],
        layout[nodeID[1], 1])
        for nodeID in edge_list
]
edges = np.asarray(segment_plot, dtype=np.float32)
e = [[(row[0], row[1]), (row[2], row[3])] for row in edges]
mst = mc.LineCollection(e)
mst.set_edgecolor('black')
mst.set_linewidth(fsom_edge_scale)
#mst.set_zorder(0)
ax[1].add_collection(mst)

# Add FlowSOM tree nodes
nodes = [Circle((row[0], row[1]), node_sizes.iloc[i]) for i, row in enumerate(layout)]

n = mc.PatchCollection(nodes)
n.set_facecolor(['#C7C7C7' if tf else '#FFFFFF' for tf in cluster_empty])
n.set_edgecolor('black')
n.set_linewidth(fsom_edge_scale/1.5)
n.set_zorder(3)
ax[1].add_collection(n)

## Set up colour palette for nodes
pops = np.unique(labels)
color_dict = dict(zip(pops, palette))
color_dict['unassigned'] = '#bfbfbf'
## Plot pie per node for labelled cell populations
for cl in range(fsom.get_cell_data().uns['n_nodes']):
    node_cell_types = labels[fsom.get_cell_data().obs['clustering'] == cl]
    node_cell_types = node_cell_types[[x not in ['unassigned'] for x in node_cell_types]]
    if len(node_cell_types) != 0:
        table = pd.crosstab(node_cell_types, columns='count')
        table['part'] = np.multiply(np.divide(table['count'], sum(table['count'])), 360)
        angles = np.asarray(np.cumsum(table['part']))
        if 0 not in angles:
            angles = np.insert(angles, 0, 0)
        row = layout[cl, :]
        patches = fs.pl._plot_helper_functions.add_wedges(
            tuple(row), heights=np.repeat(node_sizes.iloc[cl], len(angles)), angles=angles
        )
        p = mc.PatchCollection(patches)
        p.set_facecolor([color_dict.get(key) for key in table.index.values])
        p.set_edgecolor('black')
        p.set_linewidth(fsom_edge_scale/4.)
        p.set_zorder(3)
        ax[1].add_collection(p)

for i, pop in enumerate(pops):
    if pop!='unassigned':
        idcs = np.where(labels==pop)[0]
        ax[1].scatter(emb_vivae[idcs,0], emb_vivae[idcs,1], s=s, label=pop, c=palette[i], alpha=.5, linewidth=.5)
ax[0].tick_params(axis='both', labelbottom=False, labelleft=False, bottom=False, left=False)
ax[1].tick_params(axis='both', labelbottom=False, labelleft=False, bottom=False, left=False)
ax[0].set_box_aspect(1)
ax[1].set_box_aspect(1)

ax[0].set_title('A', size=10, ha='left', x=-.065, y=.96, fontweight='bold')
ax[1].set_title('B', size=10, ha='left', x=-.065, y=.96, fontweight='bold')

fig.savefig('samusik_flowsom.png', bbox_inches='tight', dpi=300, transparent=True)
fig.savefig('samusik_flowsom.svg', bbox_inches='tight', transparent=True)