In [None]:
import SpatialDE

import numpy as np
import scipy
import pandas as pd
import scanpy as sc
import anndata as ad

from tqdm.auto import trange, tqdm

from pathlib import Path

DPI = 300
FONTSIZE = 20  # 42
sc.settings.set_figure_params(
    scanpy=True, dpi=100, transparent=True, vector_friendly=True, dpi_save=DPI
)
from matplotlib import rcParams

rcParams["pdf.fonttype"] = 42

Use spatialde2 combined with cell type count estimates obtained from computational deconvolution workflows that leverage reference scRNA-seq datasets to estimate cell type abundance

In [None]:
SAMPLE_NAME = "concat_withWu2022"

In [None]:
# store results
DIR2SAVE = Path(
    f"/data/BCI-CRC/nasrine/data/CRC/spatial/CRC_LM_VISIUM/CRC_LM_VISIUM_04_08_09_11/cell2loc_spatialde2/{SAMPLE_NAME}"
)  # to change
DIR2SAVE.mkdir(parents=True, exist_ok=True)

In [None]:
# figures
FIG2SAVE = DIR2SAVE.joinpath("figures/")
FIG2SAVE.mkdir(parents=True, exist_ok=True)
# set the global variable: sc.settings.figdir to save all plots
sc.settings.figdir = FIG2SAVE

### load cell2location output 

In [None]:
cell2loc_data = sc.read_h5ad(
    f"/data/BCI-CRC/nasrine/data/CRC/spatial/CRC_LM_VISIUM/CRC_LM_VISIUM_04_08_09_11/cell2location/{SAMPLE_NAME}/cell2location_map-no_cycling_TME/sp.h5ad"
)

In [None]:
cell2loc_data

### add mRNA abundance as dataframe to adata.obsm

We use the absolute amount of mRNA contributed by each cell type to each spot. We leverage the 5% percentile of the posterior distribution of this parameter (mRNA counts), representing the number of mRNA molecules confidently assigned to each cell type.

In [None]:
cell2loc_data.obsm["q05_mRNA_abundance_u_sf"] = pd.DataFrame(
    cell2loc_data.uns["mod"]["post_sample_q05"]["u_sf_mRNA_factors"],
    index=cell2loc_data.obs_names,
    columns=[
        f"q05_mRNA_abundance_u_sf_{i}" for i in cell2loc_data.uns["mod"]["factor_names"]
    ],
)

In [None]:
cell2loc_data.obsm["q05_mRNA_abundance_u_sf"]

## Prepare input to spatialDE2 using cell type counts provided from cell2location
#### create a dataframe of the mRNA abundance

mRNA abundance is scaled by the total RNA content of every cell type, computed using the reference cell-type signatures provided to the model scaled by the difference between technologies

* add the mRNA abundance as X (make the abundance an integer because we need counts for the model)
* add the spot barcodes as .obs index
* add the cell type names as .var index 
* copy other stuff from the visium slide

In [None]:
# create a dataframe
cell2loc_input = ad.AnnData(
    X=cell2loc_data.obsm["q05_mRNA_abundance_u_sf"].to_numpy().round().astype(np.int32),
    obs=pd.DataFrame(
        data=cell2loc_data.uns["mod"]["post_sample_q05"]["u_sf_mRNA_factors"]
        .round()
        .astype(np.int32),
        index=cell2loc_data.obs_names,
        columns=[f"{i}" for i in cell2loc_data.uns["mod"]["factor_names"]],
    ),
    var=pd.DataFrame(index=cell2loc_data.uns["mod"]["factor_names"]),
    uns=cell2loc_data.uns,
    obsm=cell2loc_data.obsm,
)

In [None]:
cell2loc_input.obs

### Tissue segmentation

*aims to assign a cluster label to each location based on its gene expression profile and the identity of its neighboring locations, with the underlying assumption that neighboring locations likely have the same label, i.e. the segmentation should be spatially smooth.*

In [None]:
smoothness_param = [0.1, 0.2, 0.5, 0.7, 1, 1.2, 1.5, 2, 2.2, 2.3, 2.5, 3]

In [None]:
for s in smoothness_param:
    print(f"-- Segmentation with s={s}")
    c2l_segm, _ = SpatialDE.tissue_segmentation(
        adata=cell2loc_input,
        rng=np.random.default_rng(seed=42),
        params=SpatialDE.TissueSegmentationParameters(smoothness_factor=s),
    )

    # sc.pl.spatial(cell2loc_input, color=f"segmentation_labels")

    print(cell2loc_input.obs.segmentation_labels.value_counts())

### chose optimal parameter 

In [None]:
optimal_s = 1.2 # fom 1.2 to 2.3

In [None]:
c2l_segm, _ = SpatialDE.tissue_segmentation(
    adata=cell2loc_input,
    rng=np.random.default_rng(seed=42),
    params=SpatialDE.TissueSegmentationParameters(smoothness_factor=optimal_s),
)

#### Number of spots per region

In [None]:
cell2loc_input.obs.segmentation_labels.value_counts()

### save file 

In [None]:
cell2loc_input.write(DIR2SAVE.joinpath(f"sp_segmentation_smoothness{optimal_s}.h5ad"))

In [None]:
optimal_s