# Analysis of _multimodal mouse liver regeneration atlas_

We apply SiFT to the pre-processed _Multimodal liver regeneration atlas_[[1]](https://doi.org/10.1101/2023.02.24.529873) over mouse data.

Uses AnnData objects created in [1_spatialregenration_liver-preprocessing.ipynb]()

[[1] Matchett, Kylie P., John W. Wilson-Kanamori, Jordan R. Portman, Andreas Kapourani, Frederic Fercoq, Stephanie May, John BG Mackey et al. "Multimodal decoding of human liver regeneration." bioRxiv (2023): 2023-02.](https://doi.org/10.1101/2023.02.24.529873)


## Import packages

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os 
import sys

import pandas as pd
import numpy as np
import pickle as pkl
import scanpy as sc
import squidpy as sq
import decoupler as dc
import scFates as scf
from dython.nominal import associations
import warnings
import scipy

import matplotlib.pyplot as plt
from matplotlib import rcParams, cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
import seaborn as sns

import sift

In [3]:
warnings.simplefilter("ignore", UserWarning)
warnings.simplefilter("ignore", FutureWarning)

In [4]:
# sys.path.append("../../")
# from paths import DATA_DIR, FIG_DIR

DATA_DIR = "/cs/labs/mornitzan/zoe.piran/research/projects/SiFT_analysis/spatial-regenration_liver/data/"
FIG_DIR = "/cs/labs/mornitzan/zoe.piran/research/projects/SiFT_analysis/spatial-regenration_liver/figures/"

## Helper functions

In [5]:
def process_adata(
    adata,
    hvg=False,
    n_top_genes = 2000,
    use_rep = "X_pca",
    key = "sample",
    pseudocount = "layer",
    resolution=1
):
    for key_ in ["X_pca", "X_umap"]:
        if key_ in adata.obsm:
            del adata.obsm[key_] 
    for key_ in ["leiden", "neighbors", "pca", "umap"]:
        if key_ in adata.uns:
            del adata.uns[key_]
    
    for key_ in adata.obs:
        if "score" in key_:
            del adata.obs[key_]
        if "leiden" in key_:
            del adata.obs[key_]
    
    if hvg:
        print("hvg")
        if "hvg" in adata.uns:
            del adata.uns["hvg"]
        sc.pp.highly_variable_genes(
            adata,
            n_top_genes=3000,
            subset=True,
            layer="counts",
            flavor="seurat_v3",
        )
    
    print("pca")
    sc.tl.pca(adata)
    
        
    if use_rep == "X_pca_harmony":
        print("pca_harmony")
        sc.external.pp.harmony_integrate(adata, key, basis="X_pca", adjusted_basis="X_pca_harmony")
        
    print("nns")
    sc.pp.neighbors(adata, use_rep=use_rep)
        
    
    print("umap")
    sc.tl.umap(adata)

    print("leiden")
    sc.tl.leiden(adata, resolution = resolution)
    
    if pseudocount == "layer":
        print("add pseudocount layer")
        adata.layers["pseudocount"] = adata.X.copy()
        for batch in adata.obs[key].cat.categories:
            adata.layers["pseudocount"][adata.obs[key].isin([batch]), :] -= adata.X[adata.obs[key].isin([batch])].min()
    elif pseudocount:
        print("add pseudocount to `.X`")
        for batch in adata.obs[key].cat.categories:
            adata.X[adata.obs[key].isin([batch]), :] -= adata.X[adata.obs[key].isin([batch])].min()
        

In [6]:
def rank_genes_groups(
    adata,
    groups=None,
    n_genes = 20,
    gene_symbols = None,
    bold_genes = None,
    key = 'rank_genes_groups',
    fontsize = 8,
    ncols = 4,
    sharey = True,
    title = None,
    show = None,
    ax = None,
    **kwds,
):
    """\
    Plot ranking of genes.

    Parameters
    ----------
    adata
        Annotated data matrix.
    groups
        The groups for which to show the gene ranking.
    gene_symbols
        Key for field in `.var` that stores gene symbols if you do not want to
        use `.var_names`.
    n_genes
        Number of genes to show.
    fontsize
        Fontsize for gene names.
    ncols
        Number of panels shown per row.
    sharey
        Controls if the y-axis of each panels should be shared. But passing
        `sharey=False`, each panel has its own y-axis range.
    {show_save_ax}


    Examples
    --------

    .. plot::
        :context: close-figs

        import scanpy as sc
        adata = sc.datasets.pbmc68k_reduced()
        sc.pl.rank_genes_groups(adata)


    Plot top 10 genes (default 20 genes)

    .. plot::
        :context: close-figs

        sc.pl.rank_genes_groups(adata, n_genes=10)

    .. currentmodule:: scanpy

    See also
    --------
    tl.rank_genes_groups

    """
    if bold_genes is None:
        bold_genes = []
    if 'n_panels_per_row' in kwds:
        n_panels_per_row = kwds['n_panels_per_row']
    else:
        n_panels_per_row = ncols
    if n_genes < 1:
        raise NotImplementedError(
            "Specifying a negative number for n_genes has not been implemented for "
            f"this plot. Received n_genes={n_genes}."
        )

    reference = str(adata.uns[key]['params']['reference'])
    group_names = adata.uns[key]['names'].dtype.names if groups is None else groups
    # one panel for each group
    # set up the figure
    n_panels_x = min(n_panels_per_row, len(group_names))
    n_panels_y = np.ceil(len(group_names) / n_panels_x).astype(int)

    from matplotlib import gridspec

    fig = plt.figure(
        figsize=(
            n_panels_x * rcParams['figure.figsize'][0],
            n_panels_y * rcParams['figure.figsize'][1],
        )
    )
    gs = gridspec.GridSpec(nrows=n_panels_y, ncols=n_panels_x, wspace=0.22, hspace=0.3)

    ax0 = None
    ymin = np.Inf
    ymax = -np.Inf
    for count, group_name in enumerate(group_names):
        gene_names = adata.uns[key]['names'][group_name][:n_genes]
        scores = adata.uns[key]['scores'][group_name][:n_genes]

        # Setting up axis, calculating y bounds
        if sharey:
            ymin = min(ymin, np.min(scores))
            ymax = max(ymax, np.max(scores))

            if ax0 is None:
                ax = fig.add_subplot(gs[count])
                ax0 = ax
            else:
                ax = fig.add_subplot(gs[count], sharey=ax0)
        else:
            ymin = np.min(scores)
            ymax = np.max(scores)
            ymax += 0.3 * (ymax - ymin)

            ax = fig.add_subplot(gs[count])
            ax.set_ylim(ymin, ymax)

        ax.set_xlim(-0.9, n_genes - 0.1)

        # Mapping to gene_symbols
        if gene_symbols is not None:
            if adata.raw is not None and adata.uns[key]['params']['use_raw']:
                gene_names = adata.raw.var[gene_symbols][gene_names]
            else:
                gene_names = adata.var[gene_symbols][gene_names]

        # Making labels
        for ig, gene_name in enumerate(gene_names):
            if gene_name in bold_genes:
                ax.text(
                    ig,
                    scores[ig],
                    gene_name,
                    rotation='vertical',
                    verticalalignment='bottom',
                    horizontalalignment='center',
                    fontsize=fontsize,
                    weight="bold"
                )
            else:
                ax.text(
                    ig,
                    scores[ig],
                    gene_name,
                    rotation='vertical',
                    verticalalignment='bottom',
                    horizontalalignment='center',
                    fontsize=fontsize
                    )

        ax.set_title('{} vs. {}'.format(group_name, reference))
        if count >= n_panels_x * (n_panels_y - 1):
            ax.set_xlabel('ranking')

        # print the 'score' label only on the first panel per row.
        if count % n_panels_x == 0:
            ax.set_ylabel('score')

    if sharey is True:
        ymax += 0.3 * (ymax - ymin)
        ax.set_ylim(ymin, ymax)

In [7]:
def rank_genes_group(
    adata,
    group_name,
    n_genes = 20,
    gene_symbols = None,
    bold_genes = None,
    key = 'rank_genes_groups',
    fontsize=8,
    title = None,
    show = None,
    ax = None,
    **kwds,
):
    """\
    Plot ranking of genes.

    Parameters
    ----------
    adata
        Annotated data matrix.
    groups
        The groups for which to show the gene ranking.
    gene_symbols
        Key for field in `.var` that stores gene symbols if you do not want to
        use `.var_names`.
    n_genes
        Number of genes to show.
    fontsize
        Fontsize for gene names.
    ncols
        Number of panels shown per row.
    sharey
        Controls if the y-axis of each panels should be shared. But passing
        `sharey=False`, each panel has its own y-axis range.
    {show_save_ax}


    Examples
    --------

    .. plot::
        :context: close-figs

        import scanpy as sc
        adata = sc.datasets.pbmc68k_reduced()
        sc.pl.rank_genes_groups(adata)


    Plot top 10 genes (default 20 genes)

    .. plot::
        :context: close-figs

        sc.pl.rank_genes_groups(adata, n_genes=10)

    .. currentmodule:: scanpy

    See also
    --------
    tl.rank_genes_groups

    """
    if bold_genes is None:
        bold_genes = []
    if n_genes < 1:
        raise NotImplementedError(
            "Specifying a negative number for n_genes has not been implemented for "
            f"this plot. Received n_genes={n_genes}."
        )

    reference = str(adata.uns[key]['params']['reference'])
    gene_names = adata.uns[key]['names'][group_name][:n_genes]
    scores = adata.uns[key]['scores'][group_name][:n_genes]
    
    ymin = 0
    ymax = np.max(scores)
    ymax += 0.3 * (ymax - ymin)
    ax.set_ylim(ymin, ymax)
    ax.set_xlim(-0.9, n_genes - 0.1)

    # Mapping to gene_symbols
    if gene_symbols is not None:
        if adata.raw is not None and adata.uns[key]['params']['use_raw']:
            gene_names = adata.raw.var[gene_symbols][gene_names]
        else:
            gene_names = adata.var[gene_symbols][gene_names]

    # Making labels
    for ig, gene_name in enumerate(gene_names):
        if gene_name in bold_genes:
            ax.text(
                    ig,
                    scores[ig],
                    gene_name,
                    rotation='vertical',
                    verticalalignment='bottom',
                    horizontalalignment='center',
                    fontsize=fontsize,
                    weight="bold"
                )
        else:
            ax.text(
                    ig,
                    scores[ig],
                    gene_name,
                    rotation='vertical',
                    verticalalignment='bottom',
                    horizontalalignment='center',
                    fontsize=fontsize
                )

    ax.set_title(title if title is not None else '{} vs. {}'.format(group_name, reference))
    
    ax.set_xlabel('ranking')
    ax.set_ylabel('score')

## Set Parameters


In [8]:
import mplscience
mplscience.set_style()
plt.rcParams["legend.scatterpoints"] = 1

In [9]:
gene_groups ={
    "migration": [
        "AKAP12","ITGB1-DT","FMNL2","ITGA2","TNFAIP8","PLOD2","ITGAV","BICC1","TPM1","KLHL29","CREB5","DTNA","MIR4435-2HG","GMDS",
        "ANXA2","NDRG1","GNA13","DGKH","AL359313.1",
        "RAB3B","MKL1","PRKCA","AL138828.1","PRAG1","SLC38A1","CYTOR","HKDC1","PTPRM","ASAP1","FLNB","DUSP5","SAMD4A",
        "FAT1","UBASH3B","MAP4K4","TXNRD1","LPGAT1","LUCAT1",
        "MYH9","BCL2L1","SERPINE1","PALLD","CBLB","PKN2-AS1","HIF1A","ASPH","ARHGAP12","FAM13A","RELN","ITGB5"
    ],
    "cycling": [
        "MCM5", "PCNA", "TYMS", "FEN1", "MCM7","MCM4", "RRM1", "UNG", "GINS2", "MCM6", "CDCA7", "DTL", "PRIM1", "UHRF1",
        "CENPU", "HELLS", "RFC2", "POLR1B","NASP","RAD51AP1", "GMNN", "WDR76", "SLBP", "CCNE2", "UBR7", "POLD3", "MSH2", "ATAD2", "RAD51", "RRM2", "CDC45", "CDC6", "EXO1", "TIPIN", "DSCC1",
        "BLM", "CASP8AP2", "USP1","CLSPN", "POLA1", "CHAF1B", "MRPL36", "E2F8", "HMGB2","CDK1", "NUSAP1", "UBE2C","BIRC5","TPX2", "TOP2A","NDC80" ,
        "CKS2", "NUF2", "CKS1B", "MKI67","TMPO", "CENPF","TACC3","PIMREG", "SMC4", "CCNB2", "CKAP2L", "CKAP2", "AURKB", "BUB1", "KIF11","ANP32E", "TUBB4B", "GTSE1", 
        "KIF20B", "HJURP", "CDCA3","JPT1", "CDC20", "TTK", 
        "CDC25C", "KIF2C", "RANGAP1", "NCAPD2", "DLGAP5", "CDCA2", "CDCA8", "ECT2", "KIF23", "HMMR", "AURKA","PSRC1",
        "ANLN", "LBR","CKAP5","CENPE", "CTCF", "NEK2", "G2E3", "GAS2L3", "CBX5", "CENPA"
    ],
    "hepatocytes": [
        "TTR","TF","HP","CYP2A6", "CYP2E1","CYP3A4", "HAL"
    ],
    "myofibroblasts": [
        "ACTA2","COL1A1","COL1A2","COL3A1"
    ],
    "portal": [
        "Pigr","Hsd17b13","Gls2","Hal","Sds","Aldob","Rida","Hpx","Cyp2f2","Uox","Serpina12","Mup20"
    ],
    "central" : [
       "Aldh2", "Aldh1a1","Cyp2e1","Gsta3","Cyb5a","Cyp3a11","Cyp2c29","Elovl3","Ang",
        "Slco1b2","Akr1c6","Car3","Rgn","Gstm1","Mup11","Sult2a8","Csad",
        "Rnase4","Pon1","Lect2","Cyp2d9","Mgst1","Cyp2c67"]
}

In [10]:
for gr in gene_groups:
    gene_groups[gr] = [x.capitalize() for x in gene_groups[gr]]

Use `R` to download `mouse` geneset

```{R}
library(progeny)
model_data <- progeny::model_mouse_full

write.table(model_data ,'./progeny_mouse_geneset.txt')
```

In [11]:
gene_set = pd.read_csv(DATA_DIR + "progeny_mouse_geneset.txt", sep=" ")
gene_set.rename(columns = {'gene':'target','pathway':'source','p.value':'p_value'},inplace = True)
gene_set = gene_set.reindex(columns=['source','target','weight','p_value'])

n_genes = 100
gene_set = gene_set.sort_values('p_value').groupby('source').head(n_genes).sort_values(['source', 'p_value']).reset_index()

In [12]:
order =  [ 
    'APAP_24h_S1_mouse', 
    'APAP_24h_S2_mouse',
    'APAP_36h_S1_mouse',
    'APAP_36h_S2_mouse',
    'APAP_48h_S1_mouse', 
    'APAP_48h_S2_mouse',
    'APAP_48h_S3_mouse',
    'APAP_48h_S4_mouse'
]

In [13]:
sift_leiden_colors = ["#F0F0C9","#124E78","#F2BB05", "#D74E09","#6E0E0A"]
sample_colors = ["#a6cee3", "#1f78b4","#b2df8a", "#33a02c","#995e24", "#FF9E3D", "#ffc48a", "#FFE7CF"]

## Load data

In [None]:
adatas = {}
for filename in os.listdir(DATA_DIR):
    if "mouse.h5ad" in filename:
        if "APAP" in filename:
            key = filename.split("adata_")[1].split(".")[0]
            adatas[key] = sc.read(DATA_DIR + filename)
            sc.tl.leiden(adatas[key], resolution = 0.5)
            sq.gr.spatial_neighbors(adatas[key])
            sq.gr.interaction_matrix(adatas[key], cluster_key="leiden")

In [None]:
fig, axs = plt.subplots(2, 4, figsize=(12, 6))
idx = 0
for idx, key in enumerate(order):
    i,j = np.unravel_index(idx, (2,4))
    sq.plt.spatial_scatter(adatas[key], color="leiden", size=6, title=key.split("APAP_")[1].split("_mouse")[0], ax=axs[i,j])        
    
plt.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(2, 4, figsize=(12, 4))
idx = 0
for idx, key in enumerate(order):
    i,j = np.unravel_index(idx, (2,4))
    sq.pl.interaction_matrix(adatas[key], cluster_key="leiden", title=key.split("APAP_")[1].split("_mouse")[0], ax=axs[i,j])
    
plt.tight_layout()
plt.show()

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(3, 3))

sq.pl.spatial_scatter(adatas["APAP_36h_S1_mouse"], color="Anxa2", size=2, title="", ax=axs)        
plt.tight_layout()
plt.savefig(FIG_DIR + "spatial_example.png",  dpi=300, bbox_inches="tight", transparent=True)

plt.show()

## Create merge

In [None]:
adatas_APAP = sc.concat({key: adatas[key] for key in adatas.keys()}, label="sample")

In [None]:
adatas_APAP.obs["time"] = [float(x.split("_")[1].split("h")[0]) for x in adatas_APAP.obs["sample"]]
adatas_APAP.obs["time"]  = adatas_APAP.obs["time"].astype("category")

In [None]:
process_adata(
    adatas_APAP, 
    use_rep="X_pca_harmony", 
    resolution=0.5, 
    pseudocount=False
)
sc.tl.rank_genes_groups(adatas_APAP, groupby="leiden", use_raw=False)
sc.pl.rank_genes_groups(adatas_APAP)

for gr, genes in gene_groups.items():
    print(f"scoring {gr} genes.")
    sc.tl.score_genes(adatas_APAP, gene_list=genes, score_name=f"{gr}_score")
    
dc.run_mlm(mat=adatas_APAP, net=gene_set, source='source', target='target', weight='weight', verbose=False, use_raw=False)
for col in adatas_APAP.obsm["mlm_estimate"]:
    adatas_APAP.obs[f"{col}_score"] = adatas_APAP.obsm["mlm_estimate"][col]

In [None]:
adatas_APAP.obs["central_score"] = (adatas_APAP.obs["central_score"] - adatas_APAP.obs["central_score"].min()) / (adatas_APAP.obs["central_score"].max() - adatas_APAP.obs["central_score"].min())
adatas_APAP.obs["portal_score"] = (adatas_APAP.obs["portal_score"] - adatas_APAP.obs["portal_score"].min()) / (adatas_APAP.obs["portal_score"].max() - adatas_APAP.obs["portal_score"].min())
adatas_APAP.obs["zonation_specifity_score"] = adatas_APAP.obs["central_score"] / (adatas_APAP.obs["central_score"] + adatas_APAP.obs["portal_score"] )

In [None]:
adatas_APAP.obs["sample"] = adatas_APAP.obs["sample"].cat.reorder_categories(order)
adatas_APAP.obs["leiden_samples"] = adatas_APAP.obs["leiden"].copy()

In [None]:
fig, axs = plt.subplots(2, 4, figsize=(12, 6))

for idx, sample in enumerate(adatas_APAP.obs["sample"].cat.categories):
    adatas[sample].obs["leiden_orig"] = adatas_APAP[adatas_APAP.obs["sample"].isin([sample])].obs["leiden"].copy()
    sq.gr.spatial_neighbors(adatas[sample])
    sq.gr.interaction_matrix(adatas[sample], cluster_key="leiden_orig")
    adatas_APAP.uns[f"leiden_sample_interactions_{sample}"] = adatas[sample].uns["leiden_interactions"].copy()
    adatas_APAP.uns[f"leiden_orig_interactions_{sample}"] = adatas[sample].uns["leiden_orig_interactions"].copy()
    
    # del adatas[sample].uns["leiden_orig_colors"]
    i,j = np.unravel_index(idx, (2,4))
    sq.pl.spatial_scatter(adatas[sample], 
                          color="leiden_orig", 
                          size=6, 
                          title=sample.split("APAP_")[1].split("_mouse")[0], ax=axs[i,j],
                          palette="Dark2",
                          legend_loc=None
                         ) 
    
plt.tight_layout()
plt.savefig(FIG_DIR + "leiden_orig.png",  dpi=300, bbox_inches="tight", transparent=True)

plt.show()

In [None]:
fig, axs = plt.subplots(2, 4, figsize=(12, 6))

for idx, sample in enumerate(adatas_APAP.obs["sample"].cat.categories):
    adatas[sample].obs["leiden_orig"] = adatas_APAP[adatas_APAP.obs["sample"].isin([sample])].obs["leiden"].copy()
    sq.gr.spatial_neighbors(adatas[sample])
    sq.gr.interaction_matrix(adatas[sample], cluster_key="leiden_orig")
    adatas_APAP.uns[f"leiden_sample_interactions_{sample}"] = adatas[sample].uns["leiden_interactions"].copy()
    adatas_APAP.uns[f"leiden_orig_interactions_{sample}"] = adatas[sample].uns["leiden_orig_interactions"].copy()
    
    # del adatas[sample].uns["leiden_orig_colors"]
    i,j = np.unravel_index(idx, (2,4))
    sq.pl.spatial_scatter(adatas[sample], 
                          color="leiden_orig", 
                          size=6, 
                          title=sample.split("APAP_")[1].split("_mouse")[0], ax=axs[i,j],
                          palette="Dark2"
                         ) 
    
plt.tight_layout()
plt.savefig(FIG_DIR + "leiden_orig_legend.png",  dpi=300, bbox_inches="tight", transparent=True)

plt.show()

In [None]:
fig, axs = plt.subplots(2, 4, figsize=(12, 6))

for idx, sample in enumerate(adatas_APAP.obs["sample"].cat.categories):
    df = pd.DataFrame(adatas[sample].uns["leiden_orig_interactions"].copy(), 
                      index=adatas[sample].obs["leiden_orig"].cat.categories, 
                      columns=adatas[sample].obs["leiden_orig"].cat.categories
                     )
    
    i,j = np.unravel_index(idx, (2,4))
    sns.heatmap(df, ax=axs[i,j], cmap="coolwarm", cbar=False)
    axs[i,j].set_title(sample.split("APAP_")[1].split("_mouse")[0])
    
plt.tight_layout()
plt.savefig(FIG_DIR + "leiden_orig_spatial_corr.png",  dpi=300, bbox_inches="tight", transparent=True)

plt.show()

In [None]:
fig, axs = plt.subplots(2, 4, figsize=(12, 6))

for idx, sample in enumerate(adatas_APAP.obs["sample"].cat.categories):
    df = pd.DataFrame(adatas[sample].uns["leiden_orig_interactions"].copy(), 
                      index=adatas[sample].obs["leiden_orig"].cat.categories, 
                      columns=adatas[sample].obs["leiden_orig"].cat.categories
                     )
    
    i,j = np.unravel_index(idx, (2,4))
    sns.heatmap(df, ax=axs[i,j], cmap="coolwarm")
    
plt.tight_layout()
plt.savefig(FIG_DIR + "leiden_orig_spatial_corr_cbar.png",  dpi=300, bbox_inches="tight", transparent=True)

plt.show()

In [None]:
fig, axs = plt.subplots(2, 3, figsize=(18, 6))
sc.pl.umap(adatas_APAP, color=["sample"], ax=axs[0, 0], show=False, frameon=False)
sc.pl.umap(adatas_APAP, color=["leiden"], ax=axs[0, 1], show=False, frameon=False)
sc.pl.umap(adatas_APAP, color=["leiden_samples"], ax=axs[0, 2], show=False, frameon=False)
sc.pl.umap(adatas_APAP, color=["migration_score"], ax=axs[1, 0], show=False, frameon=False)
sc.pl.umap(adatas_APAP, color=["zonation_specifity_score"], ax=axs[1, 1], show=False, frameon=False)
sc.pl.umap(adatas_APAP, color=["Anxa2"], ax=axs[1, 2], show=False, frameon=False)
    

plt.tight_layout()
plt.show()

In [None]:
adatas_APAP.write(DATA_DIR + f"adatas_mouse_APAP.h5ad")

In [None]:
with open(DATA_DIR + f"adatas_mouse_APAP.pkl", "wb") as handle:
    pkl.dump(adatas, handle)

## Load processed

In [14]:
adatas_APAP = sc.read(DATA_DIR + f"adatas_mouse_APAP.h5ad")

In [None]:
with open(DATA_DIR + f"adatas_mouse_APAP.pkl", "rb") as handle:
    adatas = pkl.load(handle)

## Apply `SiFT`

In [None]:
adatas_sift = {}

Filter the control signal

In [None]:
for key, adata in adatas.items():
    adatas_sift[key] = sift.sifter(
        adata=adata,
        kernel_key="spatial",
        metric="rbf",
        embedding_key="X",
        pseudocount=False,
        kernel_params= {"ignore_self": True, "length_scale": 1},
        copy=True
    )
        
    adatas_sift[key].layers["X_orig"] = adata.X.copy()

In [None]:
adatas_APAP_sift = sc.concat({key: adatas_sift[key] for key in adatas_sift.keys()}, label="sample")

In [None]:
process_adata(
        adatas_APAP_sift, 
        use_rep="X_pca",
        resolution=0.5,
        pseudocount = "layer"
)

sc.tl.rank_genes_groups(adatas_APAP_sift, groupby="leiden", layer="pseudocount")
sc.pl.rank_genes_groups(adatas_APAP_sift)

for gr, genes in gene_groups.items():
    print(f"scoring {gr} genes.")
    sc.tl.score_genes(adatas_APAP_sift, gene_list=genes, score_name=f"{gr}_score", use_raw=False)

dc.run_mlm(mat=adatas_APAP_sift, net=gene_set, source='source', target='target', weight='weight', verbose=False, use_raw=False)
for col in adatas_APAP_sift.obsm["mlm_estimate"]:
    adatas_APAP_sift.obs[f"{col}_score"] = adatas_APAP_sift.obsm["mlm_estimate"][col]
    
adatas_APAP_sift.obs["central_score"] = (adatas_APAP_sift.obs["central_score"] - adatas_APAP_sift.obs["central_score"].min()) / (adatas_APAP_sift.obs["central_score"].max() - adatas_APAP_sift.obs["central_score"].min())
adatas_APAP_sift.obs["portal_score"] = (adatas_APAP_sift.obs["portal_score"] - adatas_APAP_sift.obs["portal_score"].min()) / (adatas_APAP_sift.obs["portal_score"].max() - adatas_APAP_sift.obs["portal_score"].min())
adatas_APAP_sift.obs["zonation_specifity_score"] = adatas_APAP_sift.obs["central_score"] / (adatas_APAP_sift.obs["central_score"] + adatas_APAP_sift.obs["portal_score"] )

In [None]:
adatas_APAP_sift.obs["time"] = [float(x.split("_")[1].split("h")[0]) for x in adatas_APAP_sift.obs["sample"]]
adatas_APAP_sift.obs["time"]  = adatas_APAP_sift.obs["time"].astype("category")

adatas_APAP.obs["leiden_sift"]  = adatas_APAP_sift.obs["leiden"].copy()
adatas_APAP.obs["leiden_orig"]  = adatas_APAP.obs["leiden"].copy()

adatas_APAP_sift.obs["leiden_sift"]  = adatas_APAP_sift.obs["leiden"].copy()
adatas_APAP_sift.obs["leiden_orig"]  = adatas_APAP.obs["leiden_orig"].copy()

del adatas_APAP_sift.obs["leiden"]
del adatas_APAP.obs["leiden"]

del adatas_APAP_sift.uns["leiden"]
del adatas_APAP.uns["leiden"]

In [None]:
sc.tl.rank_genes_groups(adatas_APAP_sift, 
                        groupby="leiden_orig", 
                        layer="X_orig", 
                        key_added="rank_genes_groups_orig",
                       )


In [None]:
rank_genes_groups(
    adatas_APAP_sift, 
    bold_genes=gene_groups["migration"], 
    key="rank_genes_groups_orig",
    n_genes=20,
)
plt.tight_layout()
plt.savefig(FIG_DIR + "rank_genes_orig.png",  dpi=300, bbox_inches="tight", transparent=True)

plt.show()

In [None]:
rank_genes_groups(
    adatas_APAP_sift, 
    bold_genes=gene_groups["migration"], 
    key="rank_genes_groups",
    n_genes=20
)
plt.tight_layout()
plt.savefig(FIG_DIR + "rank_genes_sift.png",  dpi=300, bbox_inches="tight", transparent=True)

plt.show()

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(4, 3.5))
rank_genes_group(
    adatas_APAP_sift,
    group_name="1",
    bold_genes=["Anxa2"], 
    show=False, 
    ax=axs, 
    fontsize=10,
    title="migrating vs. rest"
)

plt.tight_layout()
plt.savefig(FIG_DIR + "rank_genes_migrating_sift.png",  dpi=300, bbox_inches="tight", transparent=True)

plt.show()

In [None]:
newcmp = ListedColormap(sift_leiden_colors)

In [None]:
sns.color_palette(sift_leiden_colors)

In [None]:
fig, axs = plt.subplots(2, 4, figsize=(12, 6))

for idx, sample in enumerate(adatas_APAP.obs["sample"].cat.categories):
    adatas[sample].obs["leiden_sift"] = adatas_APAP_sift[adatas_APAP_sift.obs["sample"].isin([sample])].obs["leiden_sift"].copy()
    sq.gr.spatial_neighbors(adatas[sample])
    sq.gr.interaction_matrix(adatas[sample], cluster_key="leiden_sift")
    
    adatas_APAP.uns[f"leiden_sample_interactions_{sample}"] = adatas[sample].uns["leiden_interactions"].copy()
    adatas_APAP.uns[f"lleiden_sift_interactions_{sample}"] = adatas[sample].uns["leiden_sift_interactions"].copy()
    
    # del adatas[sample].uns["leiden_sift_colors"]
    i,j = np.unravel_index(idx, (2,4))
    sq.pl.spatial_scatter(adatas[sample], 
                          color="leiden_sift", 
                          size=6, 
                          title=sample.split("APAP_")[1].split("_mouse")[0], ax=axs[i,j],
                          palette=newcmp,
                          legend_loc=None
                         ) 
    
plt.tight_layout()
plt.savefig(FIG_DIR + "leiden_sift.png",  dpi=300, bbox_inches="tight", transparent=True)

plt.show()

In [None]:
fig, axs = plt.subplots(2, 4, figsize=(12, 6))

for idx, sample in enumerate(adatas_APAP.obs["sample"].cat.categories):
    adatas[sample].obs["leiden_sift"] = adatas_APAP_sift[adatas_APAP_sift.obs["sample"].isin([sample])].obs["leiden_sift"].copy()
    sq.gr.spatial_neighbors(adatas[sample])
    sq.gr.interaction_matrix(adatas[sample], cluster_key="leiden_sift")
    
    adatas_APAP.uns[f"leiden_sample_interactions_{sample}"] = adatas[sample].uns["leiden_interactions"].copy()
    adatas_APAP.uns[f"leiden_sift_interactions_{sample}"] = adatas[sample].uns["leiden_sift_interactions"].copy()
    
    del adatas[sample].uns["leiden_sift_colors"]
    i,j = np.unravel_index(idx, (2,4))
    sq.pl.spatial_scatter(adatas[sample], 
                          color="leiden_sift", 
                          size=6, 
                          title=sample.split("APAP_")[1].split("_mouse")[0], ax=axs[i,j],
                          palette=newcmp,
                          # legend_loc=None
                         ) 
    
plt.tight_layout()
plt.savefig(FIG_DIR + "leiden_sift_legend.png",  dpi=300, bbox_inches="tight", transparent=True)

plt.show()

In [None]:
fig, axs = plt.subplots(2, 4, figsize=(12, 6))

for idx, sample in enumerate(adatas_APAP.obs["sample"].cat.categories):
    df = pd.DataFrame(adatas[sample].uns["leiden_sift_interactions"].copy(), 
                      index=adatas[sample].obs["leiden_sift"].cat.categories, 
                      columns=adatas[sample].obs["leiden_sift"].cat.categories
                     )
    
    i,j = np.unravel_index(idx, (2,4))
    sns.heatmap(df, ax=axs[i,j], cmap="coolwarm", cbar=False)
    axs[i,j].set_title(sample.split("APAP_")[1].split("_mouse")[0])
    
plt.tight_layout()
plt.savefig(FIG_DIR + "leiden_sift_spatial_corr.png",  dpi=300, bbox_inches="tight", transparent=True)

plt.show()

In [None]:
fig, axs = plt.subplots(2, 4, figsize=(12, 6))

for idx, sample in enumerate(adatas_APAP.obs["sample"].cat.categories):
    df = pd.DataFrame(adatas[sample].uns["leiden_sift_interactions"].copy(), 
                      index=adatas[sample].obs["leiden_sift"].cat.categories, 
                      columns=adatas[sample].obs["leiden_sift"].cat.categories
                     )
    
    i,j = np.unravel_index(idx, (2,4))
    sns.heatmap(df, ax=axs[i,j], cmap="coolwarm")
    
plt.tight_layout()
plt.savefig(FIG_DIR + "leiden_sift_spatial_corr_cbar.png",  dpi=300, bbox_inches="tight", transparent=True)

plt.show()

In [None]:
slides = ["APAP_24h_S1_mouse", "APAP_36h_S2_mouse", "APAP_48h_S2_mouse"]

In [None]:
fig, axs = plt.subplots(3, 2, figsize=(6, 8))

for i, sample in enumerate(slides):
    
    
    sq.pl.spatial_scatter(adatas[sample], 
                          color="leiden_orig", 
                          size=6, 
                          title="", 
                          ax=axs[i,0],
                          palette="Dark2",
                          legend_loc=None
                         ) 
    
    sq.pl.spatial_scatter(adatas[sample], 
                          color="leiden_sift", 
                          size=6, 
                          title="", 
                          ax=axs[i,1],
                          palette=newcmp,
                          legend_loc=None
                         ) 
    for j in range(2):
        axs[i, j].set_xlabel("")
        axs[i, j].set_ylabel("")
        
    axs[i,0].set_ylabel(sample.split("APAP_")[1].split("_")[0])

axs[0,0].set_title("Original")
axs[0,1].set_title("SiFT")
plt.tight_layout()
plt.savefig(FIG_DIR + "leiden_cmp.png",  dpi=300, bbox_inches="tight", transparent=True)

plt.show()



## Leiden distribution analysis

In [None]:
cols = [
    'APAP_24h_S1_mouse', 
    'APAP_24h_S2_mouse',
    'APAP_36h_S1_mouse',
    'APAP_36h_S2_mouse',
    'APAP_48h_S1_mouse',
    'APAP_48h_S2_mouse',
    'APAP_48h_S3_mouse',
    'APAP_48h_S4_mouse',
]

In [None]:
df_leiden = adatas_APAP_sift.obs[["leiden_orig", "leiden_sift", "sample", "time"]]

In [None]:
res = df_leiden.groupby(["leiden_orig", "sample"]).size()
res_orig = res.reset_index().pivot(index="leiden_orig", columns="sample", values=0).reset_index()
res_orig = res_orig.set_index("leiden_orig")
sum_ = res_orig.sum(axis=1)
res_orig = (res_orig.T/sum_).T

In [None]:
res_orig_sum = res_orig.copy()
sum_prev = None
for col in cols:
    if sum_prev is not None:
        sum_prev += res_orig[col]
    else:
        sum_prev = res_orig[col]
    res_orig_sum[col] = sum_prev

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(3, 4))

for i, col in enumerate(cols[::-1]):
    sns.barplot(
        x=col, 
        y="leiden_orig", 
        data=res_orig_sum.reset_index(),
        label=col.split("_")[1].split("_mouse")[0], 
        color=sample_colors[-(i+1)],
        alpha = 1 - i*0.1,
        ax=axs
    )
    

# Add a legend and informative axis label
handles, labels = plt.gca().get_legend_handles_labels()

#add legend to plot
leg = axs.legend(
    handles[::-1],
    labels[::-1], 
    ncol=2, 
    loc="lower right",
    bbox_to_anchor=(1.1, 1.05),
    columnspacing=.2,
    handlelength=1,
    handletextpad=0.6,
    frameon=True
)
for lh in leg.legendHandles:
    lh.set_alpha(1)

    
axs.set(xlim=(0, 1), ylabel="",
       xlabel="fraction",)
axs.set_xlabel("fraction", fontsize=14)
axs.set_ylabel("cluster", fontsize=14)

ticks_loc = axs.get_xticks().tolist()
axs.set_xticks(ticks_loc)
axs.set_xticklabels([f"{x:.1f}" for x in ticks_loc],  fontsize=10)
axs.set_yticklabels(axs.get_yticklabels(), fontsize=10)
sns.despine(left=True, bottom=True)

plt.tight_layout()
plt.savefig(FIG_DIR + "leiden_orig_fractions.png",  dpi=300, bbox_inches="tight", transparent=True)
plt.show()

In [None]:
res = df_leiden.groupby(["leiden_sift", "sample"]).size()
res_sift = res.reset_index().pivot(index="leiden_sift", columns="sample", values=0).reset_index()
res_sift = res_sift.set_index("leiden_sift")
sum_ = res_sift.sum(axis=1)
res_sift = (res_sift.T/sum_).T

In [None]:
res_sift_sum = res_sift.copy()
sum_prev = None
for col in cols:
    if sum_prev is not None:
        sum_prev += res_sift[col]
    else:
        sum_prev = res_sift[col]
    res_sift_sum[col] = sum_prev

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(3, 3.5))

for i, col in enumerate(cols[::-1]):
    sns.barplot(
        x=col, 
        y="leiden_sift", 
        data=res_sift_sum.reset_index(),
        label=col.split("_")[1].split("_mouse")[0], 
        color=sample_colors[-(i+1)],
        alpha = 1 - i*0.1,
        ax=axs
    )
    

# Add a legend and informative axis label
handles, labels = plt.gca().get_legend_handles_labels()

#add legend to plot
leg = axs.legend(
    handles[::-1],
    labels[::-1], 
    ncol=2, 
    loc="lower right",
    bbox_to_anchor=(1.1, 1.05),
    columnspacing=.2,
    handlelength=1,
    handletextpad=0.6,
    frameon=True
)
for lh in leg.legendHandles:
    lh.set_alpha(1)

    
axs.set(xlim=(0, 1), ylabel="",
       xlabel="fraction",)
axs.set_xlabel("fraction", fontsize=14)
axs.set_ylabel("cluster", fontsize=14)

ticks_loc = axs.get_xticks().tolist()
axs.set_xticks(ticks_loc)
axs.set_xticklabels([f"{x:.1f}" for x in ticks_loc],  fontsize=10)
axs.set_yticklabels(axs.get_yticklabels(), fontsize=10)
sns.despine(left=True, bottom=True)

plt.tight_layout()
plt.savefig(FIG_DIR + "leiden_sift_fractions.png",  dpi=300, bbox_inches="tight", transparent=True)
plt.show()

In [None]:
for cluster in adatas_APAP_sift.obs["leiden_sift"].cat.categories:
    ind_genes_names = adatas_APAP_sift.uns["rank_genes_groups"]["names"][cluster]
    ind_genes_pval = adatas_APAP_sift.uns["rank_genes_groups"]["pvals_adj"][cluster]
    ind_genes_scores = adatas_APAP_sift.uns["rank_genes_groups"]["scores"][cluster]
    df_pvalue = pd.DataFrame([ind_genes_names, ind_genes_pval, ind_genes_scores], index=["gene", "pvalue", "scores"]).T
    df_pvalue[df_pvalue["pvalue"] < 0.05].to_csv(DATA_DIR + f"rank_APAP_{cluster}_rbf.csv")
        

In [None]:
adatas_APAP_sift.write(DATA_DIR + "adatas_APAP_sift.h5ad")

## `GO` analysis plot

In [None]:
def extract_df_padj(fnames):
    dfs = {}
    pre_rank = {}
    dfs_padj = {}
    for key, file in fnames.items():
        dfs_padj[key] = pd.read_csv(file)
        dfs_padj[key][r"$-np.log(p_{adj})$"] = -1*np.log10(dfs_padj[key]["p.adjust"]) * np.sign(dfs_padj[key]["NES"])
        dfs_padj[key] = dfs_padj[key].sort_values(by=r"$-np.log(p_{adj})$", ascending=False)
    dfs_padj = pd.concat(dfs_padj, join="inner")
    dfs_padj = dfs_padj.reset_index(level=[0, 1])
    dfs_padj = dfs_padj.rename(columns={"level_0": "cluster group"})
    dfs_padj["cluster"] = [int(x.split("clust")[1]) for x in dfs_padj["cluster group"]]
    
    dfs_padj["Description"] = [" ".join(x.split(" ")[:3] + ["\n"] + x.split(" ")[3:]) if len(x.split(" ")) > 3 else x for x in dfs_padj["Description"] ]
    return dfs_padj

In [None]:
fnames = {"clust1": '/cs/labs/mornitzan/zoe.piran/research/projects/SiFT_analysis/spatial-regenration_liver/data/APAP_clust1_ego.csv'}
dfs_padj = extract_df_padj(fnames)

In [None]:
fig, axs = plt.subplots(1, 1, figsize=(5, 4.5))

sns.stripplot(
    x=r"$-np.log(p_{adj})$",
    y="Description", 
    hue="cluster",
    data=dfs_padj[:8], 
    orient="h",
    ax=axs, 
    palette=[sift_leiden_colors[1]]
)

axs.legend().remove()
axs.set_xlabel(r"$-np.log(p_{adj})$", fontsize=14)
axs.set_ylabel("")

plt.tight_layout()
plt.savefig(FIG_DIR + f"go_enrich_migrating.png",  dpi=300, transparent=True)

plt.show()

## Analyze time trajectory

In [None]:
adatas_APAP_sift = sc.read(DATA_DIR + "adatas_APAP_sift.h5ad")

In [None]:
cat = "1"
sc.pl.umap(adatas_APAP_sift[adatas_APAP_sift.obs["leiden_sift"].isin([cat])], 
           color=["time","migration_score", "portal_score"], 
           frameon=False, 
           legend_loc="on data")
    

In [None]:
adatas_APAP_migrating = adatas_APAP_sift[adatas_APAP_sift.obs["leiden_sift"].isin([cat])].copy()
adatas_APAP_migrating

In [None]:
process_adata(
        adatas_APAP_migrating, 
        use_rep="X_pca",
        resolution=0.3,
        pseudocount = None
)
sc.tl.rank_genes_groups(adatas_APAP_migrating, groupby="leiden",layer="pseudocount")
sc.pl.rank_genes_groups(adatas_APAP_migrating)

for gr, genes in gene_groups.items():
    print(f"scoring {gr} genes.")
    sc.tl.score_genes(adatas_APAP_migrating, gene_list=genes, score_name=f"{gr}_score", use_raw=False)

dc.run_mlm(mat=adatas_APAP_migrating, net=gene_set, source='source', target='target', weight='weight', verbose=False, use_raw=False)
for col in adatas_APAP_migrating.obsm["mlm_estimate"]:
    adatas_APAP_migrating.obs[f"{col}_score"] = adatas_APAP_migrating.obsm["mlm_estimate"][col]
    
    
adatas_APAP_migrating.obs["central_score"] = (adatas_APAP_migrating.obs["central_score"] - adatas_APAP_migrating.obs["central_score"].min()) / (adatas_APAP_migrating.obs["central_score"].max() - adatas_APAP_migrating.obs["central_score"].min())
adatas_APAP_migrating.obs["portal_score"] = (adatas_APAP_migrating.obs["portal_score"] - adatas_APAP_migrating.obs["portal_score"].min()) / (adatas_APAP_migrating.obs["portal_score"].max() - adatas_APAP_migrating.obs["portal_score"].min())
adatas_APAP_migrating.obs["zonation_specifity_score"] = adatas_APAP_migrating.obs["central_score"] / (adatas_APAP_migrating.obs["central_score"] + adatas_APAP_migrating.obs["portal_score"] )

In [None]:
fig, axs = plt.subplots(2, 3, figsize=(12, 6))
sc.pl.umap(adatas_APAP_migrating, color=["leiden"], ax=axs[0, 0], show=False, frameon=False, legend_loc="on data")
sc.pl.umap(adatas_APAP_migrating, color=["time"], ax=axs[0, 1], show=False, frameon=False, legend_loc="on data")
sc.pl.umap(adatas_APAP_migrating, color=["sample"], ax=axs[0, 2], show=False, frameon=False)
sc.pl.umap(adatas_APAP_migrating, color=["central_score"], ax=axs[1, 0], show=False, frameon=False)
sc.pl.umap(adatas_APAP_migrating, color=["portal_score"], ax=axs[1, 1], show=False, frameon=False)
sc.pl.umap(adatas_APAP_migrating, color=["Anxa2"], ax=axs[1, 2], show=False, frameon=False)

plt.tight_layout()
plt.show()

In [None]:
sc.pl.pca_variance_ratio(adatas_APAP_migrating)

In [None]:
scf.tl.curve(adatas_APAP_migrating, Nodes=30, use_rep="X_umap", ndims_rep=2)

In [None]:
scf.pl.graph(adatas_APAP_migrating, basis="umap")

In [None]:
sc.pl.umap(adatas_APAP_migrating, color="time")

In [None]:
fig, axs = plt.subplots(5, 4, figsize=(8, 10))
for n in range(20):
    i,j = np.unravel_index(n, (5,4))
    adatas_APAP_migrating.obs[f"node{n}"] = adatas_APAP_migrating.obsm["X_R"][:, n]
    sc.pl.umap(adatas_APAP_migrating, color=f"node{n}", cmap="Reds", ax=axs[i,j], show=False)

plt.tight_layout()
plt.show()

In [None]:
adatas_APAP_migrating.obs_names_make_unique()

In [None]:
scf.tl.root(adatas_APAP_migrating, "node0")
scf.tl.convert_to_soft(adatas_APAP_migrating, 1, 1000)
scf.tl.pseudotime(adatas_APAP_migrating, n_jobs=20, n_map=100, seed=42)

In [None]:
scf.pl.trajectory(adatas_APAP_migrating, basis="umap", arrows=True, arrow_offset=3)

In [None]:
scf.tl.test_association(adatas_APAP_migrating, n_jobs=20)

In [None]:
scf.tl.test_association(adatas_APAP_migrating, reapply_filters=True, A_cut=.6)
scf.pl.test_association(adatas_APAP_migrating)
plt.show()

In [None]:
adatas_APAP_migrating_fit = adatas_APAP_migrating.copy()
scf.tl.fit(adatas_APAP_migrating_fit, n_jobs=20)

In [None]:
scf.tl.cluster(adatas_APAP_migrating_fit, metric="correlation", resolution=0.5)

In [None]:
adatas_APAP_migrating_fit.obs["seg"] = adatas_APAP_migrating_fit.obs["seg"].astype("category")

In [None]:
for c in adatas_APAP_migrating_fit.var["cluters"].unique():
    
    ind_genes_names = adatas_APAP_migrating_fit.var_names[adatas_APAP_migrating_fit.var.cluters==c]
    ind_genes_pval = adatas_APAP_migrating_fit.var.loc[adatas_APAP_migrating_fit.var.cluters==c, "p_val"]
    ind_genes_scores = adatas_APAP_migrating_fit.var.loc[adatas_APAP_migrating_fit.var.cluters==c, "A"]
    df_pvalue = pd.DataFrame([ind_genes_names, ind_genes_pval, ind_genes_scores], index=["gene", "pvalue", "scores"]).T
    df_pvalue[df_pvalue["pvalue"] < 0.05].to_csv(DATA_DIR + f"rank_APAP_migrating_{c}.csv")
    print(f"cluster {c}")
    scf.pl.trends(adatas_APAP_migrating_fit, 
                  features=adatas_APAP_migrating_fit.var_names[adatas_APAP_migrating_fit.var.cluters==c],
                  basis="umap")
    plt.show()

In [None]:
ind_genes_names = adatas_APAP_migrating_fit.var_names[adatas_APAP_migrating_fit.var.cluters.isin(["0", "2"])]
ind_genes_pval = adatas_APAP_migrating_fit.var.loc[adatas_APAP_migrating_fit.var.cluters.isin(["0", "2"]), "p_val"]
ind_genes_scores = adatas_APAP_migrating_fit.var.loc[adatas_APAP_migrating_fit.var.cluters.isin(["0", "2"]), "A"]
df_pvalue = pd.DataFrame([ind_genes_names, ind_genes_pval, ind_genes_scores], index=["gene", "pvalue", "scores"]).T
df_pvalue[df_pvalue["pvalue"] < 0.05].to_csv(DATA_DIR + f"rank_APAP_migrating_late.csv")
  

In [None]:
ind_genes_names = adatas_APAP_migrating_fit.var_names[adatas_APAP_migrating_fit.var.cluters.isin(["1"])]
ind_genes_pval = adatas_APAP_migrating_fit.var.loc[adatas_APAP_migrating_fit.var.cluters.isin(["1"]), "p_val"]
ind_genes_scores = adatas_APAP_migrating_fit.var.loc[adatas_APAP_migrating_fit.var.cluters.isin(["1"]), "A"]
df_pvalue = pd.DataFrame([ind_genes_names, ind_genes_pval, ind_genes_scores], index=["gene", "pvalue", "scores"]).T
df_pvalue[df_pvalue["pvalue"] < 0.05].to_csv(DATA_DIR + f"rank_APAP_migrating_early.csv")
  

In [None]:
ind_genes_names = adatas_APAP_migrating_fit.var_names
ind_genes_pval = adatas_APAP_migrating_fit.var["p_val"]
ind_genes_scores = adatas_APAP_migrating_fit.var["A"]
df_pvalue = pd.DataFrame([ind_genes_names, ind_genes_pval, ind_genes_scores], index=["gene", "pvalue", "scores"]).T
df_pvalue[df_pvalue["pvalue"] < 0.05].to_csv(DATA_DIR + "rank_APAP_migrating.csv")


In [None]:
adatas_APAP_migrating_fit.var["cluster"] = "late"
adatas_APAP_migrating_fit.var.loc[adatas_APAP_migrating_fit.var.cluters.isin(["1"]) ,"cluster"] = "early"

In [None]:
uns_migrating = adatas_APAP_migrating_fit.uns.copy()

In [None]:
del adatas_APAP_migrating_fit.uns
adatas_APAP_migrating_fit.write(DATA_DIR + "adatas_APAP_migrating.h5ad")

In [None]:
adatas_APAP_migrating_fit.uns = uns_migrating

In [None]:
with open(DATA_DIR + "uns_migrating.pkl", "wb") as handle:
    pkl.dump(uns_migrating, handle)

In [None]:
ord_thre = 0.7

In [None]:
fitted = pd.DataFrame(
    adatas_APAP_migrating_fit.layers["fitted"], 
    index=adatas_APAP_migrating_fit.obs_names, 
    columns=adatas_APAP_migrating_fit.var_names
).T.copy(deep=True)

cell_order = adatas_APAP_migrating_fit.obs["t"].sort_values().index
cell_time = adatas_APAP_migrating_fit.obs["t"].sort_values()

clusters = adatas_APAP_migrating_fit.var["cluster"]
fitted = fitted.loc[:, cell_order]

feature_order = []
for clust in ["early", "late"]:
    fitted_cur = fitted.loc[clusters.isin([clust]), :]
    start_feature = (
        fitted_cur.apply(
                    lambda x: adatas_APAP_migrating_fit.obs.t[fitted_cur.columns][
                        (x - x.min()) / (x.max() - x.min()) > ord_thre
                    ].mean(),
                    axis=1,
                )
                .sort_values()
                .index[0]
            )
    feature_order.append(
                fitted_cur.T.corr(method="pearson")
                .loc[start_feature, :]
                .sort_values(ascending=False)
                .index
            )
feature_order = feature_order[0].append(feature_order[1])
fitted_sorted = fitted.loc[feature_order, :]
clusters = clusters.loc[feature_order]

# fitted_sorted = fitted_sorted.T
# fitted_sorted["t"] = cell_time
# fitted_sorted = fitted_sorted.T

In [None]:
cell_order

In [None]:
cell_time

In [None]:
labels = cell_time.unique()

label_pal = sns.color_palette("viridis", labels.size)[::-1]
label_lut = dict(zip(map(str, labels), label_pal))
col_colors = pd.Series(cell_time.values.astype(str), index=cell_time.index, name="").map(label_lut)

In [None]:
labels = ['early', 'late']

label_pal = ["#f8e61f", "#460b5d"]
label_lut = dict(zip(map(str, labels), label_pal))
row_colors = pd.Series(clusters.values, index=clusters.index, name="").map(label_lut)

In [None]:
# fig, axs = plt.subplots(1, 1, figsize=(4, 6))

pl = sns.clustermap(fitted_sorted, cmap="RdBu_r", vmin=-1, vmax=1, 
                    col_cluster=False, 
                    row_cluster=False,
                    col_colors=col_colors,
                    row_colors=row_colors,
                    figsize=(4,4),
                    yticklabels=0,
                    xticklabels=0,
                   )

cbar = pl.figure.axes[-1]
cbar.set_yticks([])
pl.figure.axes[3].set_yticks([])
pl.figure.axes[3].set_xticks([])
pl.figure.axes[-4].set_xticks([])


plt.tight_layout()

plt.savefig(FIG_DIR + "pseudotime_migrating.png",  dpi=300, transparent=True)
plt.show()

## `GO` analysis plot

In [None]:
def extract_df_padj_enrich(fnames):
    dfs = {}
    pre_rank = {}
    dfs_padj = {}
    for key, file in fnames.items():
        dfs_padj[key] = pd.read_csv(file)
        dfs_padj[key]["GeneRatio"] = [ float(x.split("/")[0]) / float(x.split("/")[1]) for x in dfs_padj[key]["GeneRatio"]]
        dfs_padj[key] = dfs_padj[key].sort_values(by="GeneRatio", ascending=False)
    dfs_padj = pd.concat(dfs_padj, join="inner")
    dfs_padj = dfs_padj.reset_index(level=[0, 1])
    dfs_padj = dfs_padj.rename(columns={"level_0": "cluster group"})
    dfs_padj["cluster"] = [(x.split("clust")[1]) for x in dfs_padj["cluster group"]]
    dfs_padj["Description"] = [" ".join(x.split(" ")[:3] + ["\n"] + x.split(" ")[3:]) if len(x.split(" ")) > 3 else x for x in dfs_padj["Description"] ]
    
    return dfs_padj

In [None]:
fnames = [filename for filename in os.listdir(DATA_DIR) if f"migrating_clust" in filename]
fnames = {filename.split("_")[2] : DATA_DIR + filename for filename in fnames}

dfs_padj = extract_df_padj_enrich(fnames)

In [None]:
pal = ["#f8e61f", "#460b5d"]
sns.color_palette(pal)

In [None]:
for k, clust in enumerate(["early", "late"]):
    fig, axs = plt.subplots(1, 1, figsize=(6, 6))
    sns.stripplot(
            x="GeneRatio",
            y="Description", 
            hue="cluster",
            data=dfs_padj[dfs_padj["cluster"].isin([clust])][:15], 
            orient="h",
            ax=axs, 
            palette=[pal[k]]
    )
    axs.legend(ncol=1, frameon=True, fontsize=16, loc="upper left").remove()
    axs.set_xlabel("GeneRatio", fontsize=16)
    
    plt.tight_layout()
    plt.savefig(FIG_DIR + f"go_enrich_clusters_{clust}.png",  dpi=300, bbox_inches="tight", transparent=True)

    plt.show()