## Ovrlpy can help identify spurious spatial cell types

In [None]:
from pathlib import Path

import numpy as np
import pandas as pd
import polars as pl
import anndata as ad
import matplotlib.pyplot as plt

import ovrlpy

In [None]:
from matplotlib_scalebar.scalebar import ScaleBar
from matplotlib.patches import Rectangle
from typing import Any
import matplotlib.lines as mlines

SCALEBAR_PARAMS: dict[str, Any] = {
    "dx": 1,
    "units": "um",
    "length_fraction": 0.1,
    "location": "lower right",
    "pad": 0.1,
    "frameon": False
}

def _plot_scalebar(ax, dx=1, units="um", fontsize=5, color="black", box_alpha=0, **kwargs):
    scalebar = ScaleBar(
        dx,
        units=units,
        scale_loc="top", 
        sep=1,
        **kwargs
    )
    scalebar.set_font_properties({"size": fontsize})
    scalebar.linewidth = 0.3
    scalebar.box_alpha = box_alpha
    scalebar.color = color
    ax.add_artist(scalebar)

In [None]:
CM = 1/2.54

fig_dir = Path("./fig/")

In [None]:
from matplotlib.colors import LinearSegmentedColormap

BIH_CMAP = LinearSegmentedColormap.from_list(
    "BIH",
    [
        "#430541",
        "mediumvioletred",
        "violet",
        "powderblue",
        "powderblue",
        "white",
        "white",
    ][::-1],
)

### data

#### Transcripts in the Tissue Section

In [None]:
MERFISH_data_folder_path = Path("../data/mouse_hypothalamus/MERFISH/")

In [None]:
columns = [
        "Centroid_X",
        "Centroid_Y",
        "Centroid_Z",
        "Gene_name",
]

signal_coordinate_df = pd.read_csv(MERFISH_data_folder_path/"merfish_barcodes_example.csv", usecols=columns).rename(
    columns={
        "Centroid_X": "x",
        "Centroid_Y": "y",
        "Centroid_Z": "z",
        "Gene_name": "gene",
    }
)

signal_coordinate_df = signal_coordinate_df.loc[~signal_coordinate_df["gene"].str.contains("Blank|NegControl")].copy()

signal_coordinate_df["gene"] = signal_coordinate_df["gene"].astype("category")

coordinate_x_m = signal_coordinate_df["x"].min()
coordinate_y_m = signal_coordinate_df["y"].min()
signal_coordinate_df["x"] = signal_coordinate_df["x"] - coordinate_x_m
signal_coordinate_df["y"] = signal_coordinate_df["y"] - coordinate_y_m

signal_coordinate_df = signal_coordinate_df.copy()

#### Results of BANKSY

In [None]:
banksy_folder_path = Path("../data/banksy_results/")

In [None]:
columns = ["Centroid_X", "Centroid_Y", "Bregma", "lam0.2"]

banksy_result = pd.read_csv(
    banksy_folder_path/"banksy_cluster.txt",
    usecols=columns,
    sep="\t"
).rename(
    columns={
        "Centroid_X": "x",
        "Centroid_Y": "y",
        "Bregma": "Bregma",
        "lam0.2": "banksy_cluster",
    }
)

banksy_result = banksy_result[banksy_result["Bregma"] == -0.24]

banksy_result["x"] = banksy_result["x"] - coordinate_x_m
banksy_result["y"] = banksy_result["y"] - coordinate_y_m

In [None]:
MOD1_df = banksy_result[banksy_result['banksy_cluster'] == 8]
MOD2_df = banksy_result[banksy_result['banksy_cluster'] == 7]

differentially expressed genes identified by BANKSY

In [None]:
# all differentially expressed genes
DE_genes = ['Mlc1', 'Dgkk', 'Cbln2', 'Syt4', 'Gad1', 'Plin3', 'Gnrh1', 'Sln', 'Gjc3', 'Mbp', 'Lpar1', 'Trh', 'Ucn3', 'Cck']
# DE_genes_MOD2: 7
DE_genes_MOD2 = ['Mlc1', 'Dgkk', 'Cbln2', 'Syt4', 'Gad1', 'Plin3', 'Gnrh1', 'Sln', 'Gjc3']
# DE_genes_MOD1: 8
DE_genes_MOD1 = ['Mbp', 'Lpar1', 'Trh', 'Ucn3', 'Cck']

In [None]:
MOD1_signals = signal_coordinate_df[signal_coordinate_df['gene'].isin(DE_genes_MOD1)].copy()
MOD2_signals = signal_coordinate_df[signal_coordinate_df['gene'].isin(DE_genes_MOD2)].copy()

#### Segmentation Dataset

In [None]:
merfish_data = pd.read_csv(MERFISH_data_folder_path / "merfish_all_cells.csv").rename(columns={"Centroid_X": "x", "Centroid_Y": "y"})

merfish_data = merfish_data.drop(columns=[col for col in merfish_data.columns if col == 'Fos' or col.startswith('Blank_')])
merfish_data = merfish_data[merfish_data["Cell_class"] != "Ambiguous"]
merfish_data = merfish_data[merfish_data["Animal_ID"] == 1]
merfish_data = merfish_data[merfish_data["Bregma"] == -0.24]

merfish_data["x"] -= coordinate_x_m
merfish_data["y"] -= coordinate_y_m

merfish_data = merfish_data.merge(
    banksy_result[["x", "y", "banksy_cluster"]],
    on=["x", "y"],
    how="left"
)
merfish_data = merfish_data.rename(columns={"banksy_cluster": "banksy"})

cell_class_m = {
    'Astrocyte': 'Astrocyte',
    'Endothelial 1': 'Endothelial',
    'Endothelial 2': 'Endothelial',
    'Endothelial 3': 'Endothelial',
    'Ependymal': 'Ependymal',
    'Excitatory': 'Excitatory',
    'Inhibitory': 'Inhibitory',
    'Microglia': 'Microglia',
    'OD Immature 1': 'OD Immature',
    'OD Immature 2': 'OD Immature',
    'OD Mature 1': 'OD Mature',
    'OD Mature 2': 'OD Mature',
    'OD Mature 3': 'OD Mature',
    'OD Mature 4': 'OD Mature',
    'Pericytes': 'Pericytes'
}

merfish_data["Cell_class"] = merfish_data["Cell_class"].map(cell_class_m).fillna("Other")

merfish_data = merfish_data.sort_values(by="Cell_class").copy()

#### Cell boundaries

In [None]:
boundaries_df = pd.read_csv(MERFISH_data_folder_path/'cellboundaries_example_animal.csv')
boundaries_df = boundaries_df.dropna(subset=["boundaryX", "boundaryY"])

cell_ids = merfish_data["Cell_ID"]
boundaries_df = boundaries_df[boundaries_df["feature_uID"].isin(cell_ids)]
boundaries_df = boundaries_df.merge(
    merfish_data[["Cell_ID", "x", "y", "banksy", "Cell_class"]],
    left_on="feature_uID",
    right_on="Cell_ID",
    how="inner"
).drop(columns=["Cell_ID"])

boundaries_df["boundaryX"] = boundaries_df["boundaryX"].apply(lambda x: [float(i) for i in x.split(";")] if isinstance(x, str) else x)
boundaries_df["boundaryY"] = boundaries_df["boundaryY"].apply(lambda x: [float(i) for i in x.split(";")] if isinstance(x, str) else x)

boundaries_df["boundaryX"] = boundaries_df["boundaryX"].apply(lambda x: [i - coordinate_x_m for i in x] if isinstance(x, list) else x)
boundaries_df["boundaryY"] = boundaries_df["boundaryY"].apply(lambda x: [i - coordinate_y_m for i in x] if isinstance(x, list) else x)

boundaries_df = boundaries_df.copy()

In [None]:
MOD1_boundaries = boundaries_df[boundaries_df['banksy'] == 8].copy()
MOD2_boundaries = boundaries_df[boundaries_df['banksy'] == 7].copy()

#### Matched Single-Cell RNA Sequencing

In [None]:
from scipy import sparse
from scipy.io import mmread
from sklearn.preprocessing import MaxAbsScaler
from pandas.api.types import CategoricalDtype

def load_scRNA_data(mtx_path, barcodes_path, genes_path, meta_path, cell_class_filter, neuron_cluster = False):
    X = mmread(mtx_path).tocsr()
    cell_ids = pd.read_csv(barcodes_path, sep="\t", header=None)[0].values
    gene_names = pd.read_csv(genes_path, sep="\t", header=None)[1].values

    adata = ad.AnnData(X=X.T)
    adata.var_names = gene_names
    adata.obs_names = cell_ids
    adata.var_names_make_unique()

    meta = pd.read_excel(meta_path).rename(columns={
        "Cell name": "Cell_name",
        "Sex": "Sex",
        "Replicate number": "Rep",
        "Cell class (determined from clustering of all cells)": "Cell_class",
        "Non-neuronal cluster (determined from clustering of all cells)": "Non_neuronal_cluster",
        "Neuronal cluster (determined from clustering of inhibitory or excitatory neurons)": "Neuronal_cluster"
    })
    meta = meta.set_index("Cell_name")
    meta = meta.loc[meta["Cell_class"].isin(cell_class_filter.keys())]
    adata = adata[adata.obs_names.isin(meta.index)].copy()
    if neuron_cluster:
        adata.obs = meta.loc[adata.obs_names, ["Cell_class", "Neuronal_cluster"]].copy()
    else:
        adata.obs = meta.loc[adata.obs_names, ["Cell_class"]].copy()
    adata.obs["Cell_class"] = adata.obs["Cell_class"].map(cell_class_filter)

    cell_class_order = list(cell_class_filter.values())
    cat_dtype = CategoricalDtype(categories=cell_class_order, ordered=True)
    adata.obs["Cell_class"] = adata.obs["Cell_class"].astype(cat_dtype)

    mt_mask = adata.var_names.str.startswith("mt")
    mt_fraction = np.array(adata[:, mt_mask].X.sum(axis=1)).flatten() / (np.array(adata.X.sum(axis=1)).flatten() + 1e-6)
    adata = adata[mt_fraction < 0.2, :].copy()

    nonzero_counts = np.array((adata.X != 0).sum(axis=1)).flatten()
    adata = adata[nonzero_counts > 1000, :].copy()

    blank_mask = ~adata.var_names.str.startswith("Blank")
    adata = adata[:, blank_mask].copy()

    sc_total = np.array(adata.X.sum(axis=1)).flatten() + 1e-6
    normalizer = sparse.diags(10_000 / sc_total)
    adata.X = normalizer.dot(adata.X)

    adata.X = adata.X.log1p()

    scaler = MaxAbsScaler()
    adata.X = scaler.fit_transform(adata.X)

    return adata

In [None]:
data_path = Path("../data/mouse_hypothalamus/SingleCell/")

cell_class_dict = {
    'Astrocytes': 'Astrocytes',
    'Microglia': 'Microglia',
    'Macrophage': 'Macrophage',
    'Excitatory': 'Excitatory',
    'Inhibitory': 'Inhibitory',
    'Immature oligodendrocyte': 'OD immature',
    'Newly formed oligodendrocyte': 'OD newly formed',
    'Mature oligodendrocyte': 'OD mature',
    'Ependymal': 'Ependymal',
    'Endothelial': 'Endothelial',
    'Mural': 'Mural',
    'Fibroblast': 'Fibroblast',
}

adata = load_scRNA_data(
    mtx_path = data_path / "GSE113576_matrix.mtx",
    barcodes_path = data_path / "GSE113576_barcodes.tsv",
    genes_path = data_path / "GSE113576_genes.tsv",
    meta_path = data_path / "aau5324_Moffitt_Table-S1.xlsx",
    cell_class_filter = cell_class_dict
)

In [None]:
adata = adata[adata.obs.sort_values("Cell_class").index]

sc_data = adata.to_df()
sc_DE_MOD1_df = sc_data[DE_genes_MOD1]
sc_DE_MOD2_df = sc_data[DE_genes_MOD2]
sc_DE = pd.concat([sc_DE_MOD1_df, sc_DE_MOD2_df], axis=1)

sc_cell_class = adata.obs['Cell_class']

### Ovrlpy pipeline

In [None]:
n_components = 15

In [None]:
dataset = ovrlpy.Ovrlp(
    signal_coordinate_df,
    n_components=n_components,
    n_workers=8,
)

dataset.analyse()

In [None]:
signal_integrity = dataset.integrity_map
signal_strength = dataset.signal_map
pseudocells = dataset.pseudocells

doublets = dataset.detect_doublets(min_signal=3, integrity_sigma=1)

### cell type map
Cell types are colored according to the original publication ([Moffitt et al., 2018](https://www.science.org/doi/10.1126/science.aau5324)).

In [None]:
all_labels = sorted(merfish_data['Cell_class'].unique())
colormap = plt.cm.get_cmap('tab20b', len(all_labels))
label_to_color = {label: colormap(i) for i, label in enumerate(all_labels)}
colors = merfish_data['Cell_class'].map(label_to_color)

fig, ax = plt.subplots(figsize=(6*CM, 6*CM), dpi=600)

ax.scatter(
    merfish_data["x"], merfish_data["y"],
    s=2, c=colors, edgecolors='none'
)

ax.set(
    xticks=[], yticks=[],
    xlim=(-10, 1810), ylim=(-10, 1810),
    aspect='equal'
)

for spine in ax.spines.values():
    spine.set_linewidth(0.3)

plt.title('mouse hypothalamus', fontsize=6, fontweight='light')

handles = [
    mlines.Line2D(
        [0], [0], marker='.', linestyle='None',
        color='none',
        markerfacecolor=label_to_color[label],
        markeredgecolor='none',
        markersize=4,
        label=label
    )
    for label in all_labels
]

ax.legend(
    handles=handles, fontsize=4, loc='upper center',
    frameon=False, bbox_to_anchor=(0.5, 0), ncol=3
)

plt.subplots_adjust(right=0.8)
plt.tight_layout()
fig.savefig(fig_dir / "CellTypes.pdf")
plt.show()

### MOD1 marker transcripts
`BANKSY` ([Singhal, et al.](https://www.nature.com/articles/s41588-024-01664-3)) detects two subtypes of mature oligodendrocytes (MOD1 and MOD2) and their subtype-specific markers.   

#### MOD1 marker transcripts

In [None]:
plt.figure(figsize=(6*CM, 6*CM), dpi=600)

plt.scatter(
    MOD1_signals["x"],
    MOD1_signals["y"],
    s=0.15,
    c='salmon', 
    edgecolors='none',
)

plt.scatter(
    MOD1_df["x"],
    MOD1_df["y"],
    s=0.5, 
    facecolors='none',
    edgecolors="#080808",
    linewidths=0.13
)

label_to_color = {
    'MOD1 Marker Transcripts': 'salmon',
    'MOD1 Cells': '#080808',
}

handles = [
    mlines.Line2D(
        [0], [0],
        marker='o',
        color='none',
        markerfacecolor=color if 'Marker' in label else 'none',
        markeredgecolor=color,
        linestyle='None',
        markersize=2,
        markeredgewidth=0.3 if 'Marker' not in label else 0,
        label=label
    )
    for label, color in label_to_color.items()
]

ax = plt.gca()

ax.legend(handles=handles, fontsize=4, loc='upper center', frameon=False, 
          bbox_to_anchor=(0.5, 0), ncol=2)

ax.set_xticks([])
ax.set_yticks([])
ax.spines[["top", "right", "left", "bottom"]].set_linewidth(0.3)
ax.set_aspect('equal')
ax.set_xlim(-10,1810)
ax.set_ylim(-10,1810)
plt.title("MOD1 cells & marker transcripts", fontsize=6)
plt.tight_layout()  
plt.savefig(fig_dir /"MOD1marker.pdf")
plt.show()

#### MOD2 marker transcripts

In [None]:
plt.figure(figsize=(6*CM, 6*CM), dpi=600)

plt.scatter(
    MOD2_signals["x"],
    MOD2_signals["y"],
    s=0.15,
    c='lightblue', 
    edgecolors='none'
)

plt.scatter(
    MOD2_df["x"],
    MOD2_df["y"],
    s=0.5, 
    facecolors='none',
    edgecolors="#080808",
    linewidths=0.13
)

label_to_color = {
    'MOD2 Marker Transcripts': 'lightblue',
    'MOD2 Cells': '#080808',
}

handles = [
    mlines.Line2D(
        [0], [0],
        marker='o',
        color='none',
        markerfacecolor=color if 'Marker' in label else 'none',
        markeredgecolor=color,
        linestyle='None',
        markersize=2,
        markeredgewidth=0.3 if 'Marker' not in label else 0,
        label=label
    )
    for label, color in label_to_color.items()
]

ax = plt.gca()

ax.legend(handles=handles, fontsize=4, loc='upper center', frameon=False, 
          bbox_to_anchor=(0.5, 0), ncol=2)


ax.set_xticks([])
ax.set_yticks([])
ax.spines[["top", "right", "left", "bottom"]].set_linewidth(0.3)
ax.set_xlim(-10,1810)
ax.set_ylim(-10,1810)
ax.set_aspect('equal')
plt.title("MOD2 cells & marker transcripts", fontsize=6)
plt.tight_layout()  
plt.savefig(fig_dir /"MOD2marker.pdf")
plt.show()

### VSI distribution within MOD subtypes
VSI values of pixels within MOD1 and MOD2 cell boundaries.

In [None]:
from shapely.geometry import Polygon
from matplotlib.path import Path as mPath

def extract_cell_vsi(
    boundary_df, 
    integrity,
    strength,
    integrity_size=1800
):
    """
    Extracts the cell integrity and strength arrays based on polygonal boundaries.
    """

    cell_integrity = np.zeros((integrity_size, integrity_size))
    cell_strength = np.zeros((integrity_size, integrity_size))

    for idx, row in boundary_df.iterrows():
        x_coords = np.array(row['boundaryX'])
        y_coords = np.array(row['boundaryY'])

        valid_mask = ~np.isnan(x_coords) & ~np.isnan(y_coords)
        x_coords = x_coords[valid_mask]
        y_coords = y_coords[valid_mask]

        if len(x_coords) < 3:
            continue

        polygon = Polygon(zip(x_coords, y_coords))
        if not polygon.is_valid:
            continue

        x_min, x_max = int(np.floor(polygon.bounds[0])), int(np.ceil(polygon.bounds[2]))
        y_min, y_max = int(np.floor(polygon.bounds[1])), int(np.ceil(polygon.bounds[3]))

        x_min, x_max = max(0, x_min), min(integrity_size, x_max)
        y_min, y_max = max(0, y_min), min(integrity_size, y_max)

        y_indices, x_indices = np.meshgrid(range(y_min, y_max), range(x_min, x_max), indexing='ij')
        points = np.column_stack([x_indices.ravel(), y_indices.ravel()])

        path = mPath(np.column_stack((x_coords, y_coords)))
        epsilon = 0.5
        mask = path.contains_points(points, radius=epsilon)
        mask = mask.reshape(y_indices.shape)

        subgrid_int = integrity[y_min:y_max, x_min:x_max]
        subgrid_str = strength[y_min:y_max, x_min:x_max]

        cell_integrity[y_min:y_max, x_min:x_max] = np.where(
            mask, subgrid_int, cell_integrity[y_min:y_max, x_min:x_max]
        )
        cell_strength[y_min:y_max, x_min:x_max] = np.where(
            mask, subgrid_str, cell_strength[y_min:y_max, x_min:x_max]
        )

    return cell_integrity, cell_strength

In [None]:
MOD1_int, MOD1_str = extract_cell_vsi(MOD1_boundaries, signal_integrity, signal_strength)
MOD2_int, MOD2_str = extract_cell_vsi(MOD2_boundaries, signal_integrity, signal_strength)

In [None]:
def plot_histogram(ax, cell_integrity, cell_strength, signal_threshold, cmap, label, ylim=(1e-1,32), title=None):
    n, bins, patches = ax.hist(
        cell_integrity[cell_strength > signal_threshold],
        bins=50,
        range=(0, 1),
        density=True,
        edgecolor='black',
        linewidth=0.3,
        alpha=0.8,
        rasterized=True
    )

    for i, patch in enumerate(patches):
        patch.set_facecolor(cmap(i / len(patches)))

    ax.set_xlim(0, 1)
    ax.set_ylim(ylim)
    ax.set_yscale('log', base=2)
    ax.set_ylabel("Density", fontsize=5)
    ax.set_xlabel(label, fontsize=5)
    ax.spines[["top", "right"]].set_visible(False)
    ax.spines[['left','bottom']].set_linewidth(0.3)
    ax.xaxis.set_tick_params(labelsize=5, width=0.3, length=1)
    ax.yaxis.set_tick_params(labelsize=5, width=0.3, length=1)
    ax.set_title(title, fontsize=6)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(9*CM, 4*CM), dpi=600)

plot_histogram(ax, 
               cell_integrity=MOD1_int, 
               cell_strength=MOD1_str, 
               signal_threshold=3, 
               cmap=BIH_CMAP, 
               label="Vertical Signal Integrity", 
               ylim=(1e-1,64), 
               title="Vertical Signal Integrity within MOD1")
plt.savefig(fig_dir /"VSImod1.pdf")
plt.show()

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(9*CM, 4*CM), dpi=600)

plot_histogram(ax, 
               cell_integrity=MOD2_int, 
               cell_strength=MOD2_str, 
               signal_threshold=3, 
               cmap=BIH_CMAP, 
               label="Vertical Signal Integrity", 
               ylim=(1e-1,64), 
               title="Vertical Signal Integrity within MOD2")
plt.savefig(fig_dir /"VSImod2.pdf")
plt.show()

### Marker expression in non-oligodendrocyte cells (scRNA-seq)

In [None]:
noOD_sc_data = pd.concat([sc_DE, sc_cell_class], axis=1)
noOD_sc_data = noOD_sc_data[~noOD_sc_data['Cell_class'].str.startswith("OD")]

noOD_sc_cell_class = noOD_sc_data['Cell_class']
noOD_sc_data = noOD_sc_data.drop('Cell_class', axis=1)

Gene_Group = pd.DataFrame([0,0,0,0,0,1,1,1,1,1,1,1,1,1], columns=['cluster'])

In [None]:
from scipy.cluster.hierarchy import linkage, leaves_list
import seaborn as sns

HEATMAP_CMAP = sns.color_palette("RdYlBu_r", as_cmap=True)

In [None]:
def plot_annotate_heatmap(cluster_data, cluster_labels, gene_groups=None, zscore=True, cmap=HEATMAP_CMAP,
                          cluster_text_y=-1.2, show_cluster=True,
                          show_cluster_lines=True, DE_g_line=True):

    cluster_data = cluster_data.copy()
    cluster_data['Cell_class'] = cluster_labels

    cluster_data = cluster_data.sort_values(by='Cell_class')
    cell_class_col = cluster_data['Cell_class']

    numeric_data = cluster_data.drop(columns=['Cell_class']).apply(pd.to_numeric, errors='coerce')
    numeric_data = numeric_data.dropna(axis=1).loc[:, ~numeric_data.T.duplicated()]

    cluster_data = pd.concat([cell_class_col, numeric_data], axis=1)
    expression_data = cluster_data.drop('Cell_class', axis=1)
    expression_data = expression_data.loc[:, ~expression_data.columns.duplicated()]

    cluster_labels_sorted = cluster_data['Cell_class'].values
    unique_labels = pd.Series(cluster_labels_sorted).unique()

    if zscore:
        expression_data = expression_data.apply(lambda x: (x - x.mean()) / x.std(), axis=0)
        vmin, vmax = -3, 3
    else:
        vmin, vmax = 0, 5

    new_order = []
    for label in unique_labels:
        indices = np.where(cluster_labels_sorted == label)[0]
        subset = expression_data.iloc[indices, :]
        if subset.shape[0] > 1:
            linkage_matrix = linkage(subset, method='ward')
            sorted_indices = indices[leaves_list(linkage_matrix)]
        else:
            sorted_indices = indices
        new_order.extend(sorted_indices)

    reordered_expression_data = expression_data.iloc[new_order, :]
    reordered_cluster_labels = cluster_labels_sorted[new_order]

    if gene_groups is not None:
        new_gene_order = []
        for label in sorted(set(gene_groups['cluster'])):
            gene_indices = np.where(gene_groups == label)[0]
            subset = reordered_expression_data.iloc[:, gene_indices]
            if subset.shape[1] > 1:
                linkage_matrix = linkage(subset.T, method='average')
                sorted_gene_indices = gene_indices[leaves_list(linkage_matrix)]
            else:
                sorted_gene_indices = gene_indices
            new_gene_order.extend(sorted_gene_indices)
    else:
        new_gene_order = leaves_list(linkage(reordered_expression_data.T, method='average'))

    reordered_expression_data = reordered_expression_data.iloc[:, new_gene_order]

    fig, ax = plt.subplots(figsize=(6*CM, 8*CM), dpi=600)
    img = sns.heatmap(
        reordered_expression_data,
        vmin=vmin, vmax=vmax,
        cmap=cmap,
        xticklabels=True,
        yticklabels=False,
        cbar=True,
        ax=ax,
        cbar_kws={"shrink": 0.5}
    )

    ax.set_xticklabels(ax.get_xticklabels(), fontsize=5, rotation=90)
    ax.xaxis.set_tick_params(width=0.3, length=2)
    ax.set_ylabel("")
    ax.set_xlabel("")
    for spine in ax.spines.values():
        spine.set_linewidth(0.3)

    cbar = img.collections[0].colorbar
    cbar.ax.tick_params(labelsize=4, width=0.2, length=0.7)
    for spine in cbar.ax.spines.values():
        spine.set_linewidth(0.3)

    cluster_boundaries = []
    for label in unique_labels:
        indices = np.where(reordered_cluster_labels == label)[0]
        if len(indices) == 0:
            continue
        start, end = indices[0], indices[-1]
        y_pos = (start + end) / 2
        if show_cluster:
            ax.text(cluster_text_y, y_pos, str(label), ha='center', va='center', rotation=0, fontsize=4)
        cluster_boundaries.append(end)

    if show_cluster_lines:
        for boundary in cluster_boundaries[:-1]:
            ax.axhline(y=boundary + 0.1, color='black', linestyle='--', linewidth=0.3)

    if DE_g_line:
        ax.vlines(x=5, ymin=ax.get_ylim()[0], ymax=ax.get_ylim()[1], color='black', linestyle='--', linewidth=0.3)

    
    plt.title("heatmap for markers in scRNA-seq data", fontsize=6)
    plt.savefig(fig_dir /"heatmap.pdf")
    plt.show()


In [None]:
plot_annotate_heatmap(cluster_data = noOD_sc_data, cluster_labels=noOD_sc_cell_class, gene_groups=Gene_Group, cluster_text_y=-2.3)

### Overlapping ROIs
Examples of MOD2 cells spatially overlapping with annotated excitatory and inhibitory neurons  

In [None]:
windowsize = 60
window_size = windowsize / 2

signal_threshold=3.0
roi_scatter_kwargs = dict(marker=".", alpha=0.8, s=5)

embedding = pseudocells.obsm["2D_UMAP"]
RGB = pseudocells.obsm["RGB"]
spatial = pseudocells.obsm["spatial"]

#### UMAP

In [None]:
fig, ax = plt.subplots(figsize=(3*CM, 3*CM), dpi=600)
ax.scatter(
    embedding[:, 0],
    embedding[:, 1],
    c=RGB,
    alpha=0.5,
    marker=".",
    edgecolors='none',
    s=2,
)
ax.set(aspect='equal', adjustable='box')

ax.set_xlim(-8,17)
ax.set_ylim(-8,17)

for spine in ax.spines.values():
        spine.set_linewidth(0.3)

ax.set_xticks([])
ax.set_yticks([])
plt.title("UMAP", fontsize=6)
plt.tight_layout()
plt.savefig(fig_dir /"UMAP.pdf")
plt.show()


#### Cell Map

In [None]:
fig, ax = plt.subplots(figsize=(3*CM, 3*CM), dpi=600)
ax.scatter(
    spatial[:, 0],
    spatial[:, 1],
    c=RGB,
    alpha=0.8,
    marker=".",
    edgecolors='none',
    s=2,
)
ax.set(aspect="equal")

x_center, y_center = doublets["x", "y"].row(49)
x0 = x_center - window_size
y0 = y_center - window_size
square = Rectangle(
    (x0, y0),
    width=windowsize,
    height=windowsize,
    linewidth=0.4,
    fill=False,
    edgecolor="k",
)
ax.add_patch(square)

x_center, y_center = doublets["x", "y"].row(158)
x0 = x_center - window_size
y0 = y_center - window_size
square = Rectangle(
    (x0, y0),
    width=windowsize,
    height=windowsize,
    linewidth=0.4,
    fill=False,
    edgecolor="k",
)
ax.add_patch(square)

ax.set_xticks([])
ax.set_yticks([])
for spine in ax.spines.values():
        spine.set_linewidth(0.3)
ax.set_xlim(-30,1830)
ax.set_ylim(-30,1830)
ax.set_aspect('equal')
plt.title("cell type", fontsize=6)
plt.tight_layout()
plt.savefig(fig_dir /"CellTypeOvrlpy.pdf")
plt.show()


#### Doublet1: Inhibitory

doublet_to_show = 49  
x,y = 292, 924

In [None]:
doublet_to_show = 49
x, y = doublets["x", "y"].row(doublet_to_show)
roi = ((x - window_size, x + window_size), (y - window_size, y + window_size))

In [None]:
ROI_in  = dataset.transcripts.filter(
            pl.col("x").is_between(x - window_size, x + window_size)
            & pl.col("y").is_between(y - window_size, y + window_size)
        ).clone().sort("z")

_, embedding_color = dataset.transform_transcripts(ROI_in)
ROI_in = ROI_in.with_columns(RGB=embedding_color)

In [None]:
# VSI map
fig, ax = plt.subplots(figsize=(3*CM, 3*CM), dpi=600)

ax.set_facecolor("black")
img = ax.imshow(
    signal_integrity,
    cmap=BIH_CMAP,
    alpha=((signal_strength / signal_threshold).clip(0, 1) ** 2),
    vmin=0,
    vmax=1,
)
ax.invert_yaxis()
ax.set(xlim=roi[0], ylim=roi[1])

cbar = fig.colorbar(img, ax=ax, shrink=0.8)
cbar.ax.tick_params(labelsize=6, width=0.3)
for spine in cbar.ax.spines.values():
    spine.set_linewidth(0.3)

_plot_scalebar(ax, dx=1, units="um", location="lower left", length_fraction=0.4, fontsize=5, color="white")

ax.set(xticks=[], yticks=[])
ax.set_title("vertical signal integrity", fontsize=6)
ax.set_aspect('equal')

plt.savefig(fig_dir / "INvsi.pdf")
plt.show()

In [None]:
roi_top = ROI_in.filter(pl.col("z") > pl.col("z_center"))

(x_min, x_max), (y_min, y_max) = roi
fig, ax = plt.subplots(figsize=(3*CM, 3*CM), dpi=600)
ax.scatter(
    roi_top["x"],
    roi_top["y"],
    c=roi_top["RGB"].to_numpy(),
    **roi_scatter_kwargs,
    edgecolors='none'
)
ax.set(xlim=(x_min, x_max), ylim=(y_min, y_max))

filtered_df = boundaries_df[
    (boundaries_df['x'] >= x_min) & (boundaries_df['x'] <= x_max) &
    (boundaries_df['y'] >= y_min) & (boundaries_df['y'] <= y_max)
]

ax.spines[["top", "right"]].set_visible(True)
for _, row in filtered_df.iterrows():
    ax.plot(row['boundaryX'], row['boundaryY'], c='#2C2C2C', linewidth=0.7)

ax.set_xticks([])
ax.set_yticks([])
ax.set_aspect('equal')

for spine in ax.spines.values():
    spine.set_linewidth(0.3)

plt.title("top", fontsize=6)
plt.tight_layout()
plt.savefig(fig_dir / "INtop.pdf")
plt.show()

In [None]:
roi_bottom = ROI_in.filter(pl.col("z") < pl.col("z_center"))[::-1]
(x_min, x_max), (y_min, y_max) = roi

fig, ax = plt.subplots(figsize=(3*CM, 3*CM), dpi=600)
ax.scatter(
    roi_bottom["x"],
    roi_bottom["y"],
    c=roi_bottom["RGB"].to_numpy(),
    **roi_scatter_kwargs,
    edgecolors='none',
)
ax.set(xlim=(x_min, x_max), ylim=(y_min, y_max))

filtered_df = boundaries_df[
    (boundaries_df['x'] >= x_min) & (boundaries_df['x'] <= x_max) &
    (boundaries_df['y'] >= y_min) & (boundaries_df['y'] <= y_max)
]

for _, row in filtered_df.iterrows():
    ax.plot(row['boundaryX'], row['boundaryY'], c='#2C2C2C', linewidth=0.7)

ax.set_xticks([])
ax.set_yticks([])
ax.set_aspect('equal')

for spine in ax.spines.values():
    spine.set_linewidth(0.3)

plt.title("bottom", fontsize=6)
plt.tight_layout()  
plt.savefig(fig_dir /"INbottom.pdf")
plt.show()


#### Doublet2: Excitatory

doublet_to_show = 158  

In [None]:
doublet_to_show = 158
x, y = doublets["x", "y"].row(doublet_to_show)
roi = ((x - window_size, x + window_size), (y - window_size, y + window_size))

In [None]:
ROI_ex  = dataset.transcripts.filter(
            pl.col("x").is_between(x - window_size, x + window_size)
            & pl.col("y").is_between(y - window_size, y + window_size)
        ).clone().sort("z")

_, embedding_color = dataset.transform_transcripts(ROI_ex)
ROI_ex = ROI_ex.with_columns(RGB=embedding_color)

In [None]:
# VSI map
fig, ax = plt.subplots(figsize=(3*CM, 3*CM), dpi=600)

ax.set_facecolor("black")
img = ax.imshow(
    signal_integrity,
    cmap=BIH_CMAP,
    alpha=((signal_strength / signal_threshold).clip(0, 1) ** 2),
    vmin=0,
    vmax=1,
)
ax.invert_yaxis()
ax.set(xlim=roi[0], ylim=roi[1])

cbar = fig.colorbar(img, ax=ax, shrink=0.8)
cbar.ax.tick_params(labelsize=6, width=0.3)
for spine in cbar.ax.spines.values():
    spine.set_linewidth(0.3)

ax.set(xticks=[], yticks=[])
ax.set_title("vertical signal integrity", fontsize=6)
ax.set_aspect('equal')

plt.savefig(fig_dir / "EXvsi.pdf")
plt.show()

In [None]:
roi_top = ROI_ex.filter(pl.col("z") > pl.col("z_center"))
(x_min, x_max), (y_min, y_max) = roi

fig, ax = plt.subplots(figsize=(3*CM, 3*CM), dpi=600)
ax.scatter(
    roi_top["x"],
    roi_top["y"],
    c=roi_top["RGB"].to_numpy(),
    **roi_scatter_kwargs,
    edgecolors='none',
    rasterized=True
)

filtered_df = boundaries_df[
    (boundaries_df['x'] >= x_min) & (boundaries_df['x'] <= x_max) &
    (boundaries_df['y'] >= y_min) & (boundaries_df['y'] <= y_max)
]

ax.spines[["top", "right"]].set_visible(True)
for _, row in filtered_df.iterrows():
    ax.plot(row['boundaryX'], row['boundaryY'], c='#2C2C2C', linewidth=0.7)

ax.set(xlim=(x_min, x_max), ylim=(y_min, y_max))
ax.set(xticks=[], yticks=[])
ax.set_aspect('equal')

for spine in ax.spines.values():
    spine.set_linewidth(0.3)

plt.title("top", fontsize=6)
plt.tight_layout()
plt.savefig(fig_dir / "EXtop.pdf")
plt.show()

In [None]:
roi_bottom = ROI_ex.filter(pl.col("z") < pl.col("z_center"))[::-1]
(x_min, x_max), (y_min, y_max) = roi

fig, ax = plt.subplots(figsize=(3*CM, 3*CM), dpi=600)
ax.scatter(
    roi_bottom["x"],
    roi_bottom["y"],
    c=roi_bottom["RGB"].to_numpy(),
    **roi_scatter_kwargs,
    edgecolors='none',
)

filtered_df = boundaries_df[
    (boundaries_df['x'] >= x_min) & (boundaries_df['x'] <= x_max) &
    (boundaries_df['y'] >= y_min) & (boundaries_df['y'] <= y_max)
]

ax.spines[["top", "right"]].set_visible(True)
for _, row in filtered_df.iterrows():
    ax.plot(row['boundaryX'], row['boundaryY'], c='#2C2C2C', linewidth=0.7)

ax.set(xlim=(x_min, x_max), ylim=(y_min, y_max))
ax.set(xticks=[], yticks=[])
ax.set_aspect('equal')

for spine in ax.spines.values():
    spine.set_linewidth(0.3)

plt.title("bottom", fontsize=6)
plt.tight_layout()
plt.savefig(fig_dir / "EXbottom.pdf")
plt.show()