In [None]:
# python libs
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import scanpy as sc
import liana as li
from liana.method import rank_aggregate
import decoupler as dc

import session_info

In [None]:
# Setting up R dependencies
import anndata2ri
import rpy2
from rpy2.robjects import r
import random

anndata2ri.activate()


In [None]:
%%R
suppressPackageStartupMessages({
    library(reticulate)
    library(ggplot2)
    library(tidyr)
    library(dplyr)
    library(purrr)
    library(tibble)
})

In [None]:
%%R
library("nichenetr", lib="/home/d/danilina/mambaforge/envs/scanpy_r/lib/R/library")

In [None]:
# figure settings
sc.settings.set_figure_params(dpi=200, frameon=False)
sc.set_figure_params(dpi=200, facecolor="white")
sc.set_figure_params(figsize=(5, 5))

In [None]:
file = "merged_data.h5ad"
adata = sc.read("../../../data/merged_data.h5ad")
adata

In [None]:
condition = [x for x in ['bleomycin', 'bleo', 'Bleo', 'asbestos'] if x in list(adata.obs["condition"].cat.categories)][0]
control = [x for x in ['saline', 'healthy', 'UT', 'control'] if x in list(adata.obs["condition"].cat.categories)][0]

# Store the counts for later use
adata.layers["counts"] = adata.X.copy()
# log1p normalize the data
sc.pp.normalize_total(adata)
sc.pp.log1p(adata)
# make sure the format is correct
adata.obs["batch"] = adata.obs["batch"].astype("category")
adata.obs["manual_celltype_annotation"] = adata.obs["manual_celltype_annotation"].astype("category")

In [None]:
adatas = dict()
for cond in [condition, control]:
    adatas[cond] = adata[adata.obs["condition"]==cond].copy()

    # run liana consensus
    print("Running rank_aggregate on "+file[:-5]+", "+cond)
    rank_aggregate(
        adatas[cond], groupby="manual_celltype_annotation", resource_name = 'mouseconsensus',
        return_all_lrs=True, use_raw=False, verbose=True)
    liana_res = adatas[cond].uns["liana_res"].drop_duplicates(["ligand_complex", "receptor_complex"]).sort_values(["magnitude_rank", "specificity_rank"],)
    liana_res.to_csv("./results/"+file[:-5]+"_"+cond+"_liana.csv")
    adatas[cond].write("../../../data/liana_anndatas/"+file[:-5]+"_"+cond+"_liana.h5ad", compression='gzip')
    fig = li.pl.dotplot(
        adata=adatas[cond],
        colour="magnitude_rank",
        size="specificity_rank",
        inverse_colour=True,  # we inverse sign since we want small p-values to have large sizes
        inverse_size=True,
        # since the rank_aggregate can also be interpreted as a probability distribution
        # we can again filter them according to their specificity significance
        # yet here the interactions are filtered according to
        # how consistently highly-ranked is their specificity across the methods
        filterby="specificity_rank",
        filter_lambda=lambda x: x <= 0.05,
        # again, we can also further order according to magnitude
        orderby="magnitude_rank",
        orderby_ascending=True,  # prioritize those with lowest values
        top_n=20,  # and we want to keep only the top 20 interactions
        figure_size=(46, 26),
        #size_range=(1, 6),
        return_fig=True
    )   
    fig.save("./results/"+file[:-5]+"_"+cond+"_liana.png", dpi=500, limitsize=False)

nichenet

In [None]:
%%R
# Increase timeout threshold
options(timeout=600)

# Load PK
ligand_target_matrix <- readRDS(url("https://zenodo.org/record/7074291/files/ligand_target_matrix_nsga2r_final_mouse.rds"))
lr_network <- readRDS(url("https://zenodo.org/record/7074291/files/lr_network_mouse_21122021.rds"))

In [None]:
sender_celltypes = ["Fibroblasts", "Interstitial_macrophages", "Epithelial"]
receiver_celltypes = ["Fibroblasts", "Interstitial_macrophages", "Epithelial"]

In [None]:
# Helper function to obtain sufficiently expressed genes
from functools import reduce


def get_expressed_genes(adata, cell_type, expr_prop):
    # calculate proportions
    temp = adata[adata.obs["manual_celltype_annotation"] == cell_type, :]
    a = temp.X.getnnz(axis=0) / temp.X.shape[0]
    stats = (
        pd.DataFrame({"genes": temp.var_names, "props": a})
        .assign(cell_type=cell_type)
        .sort_values("genes")
    )

    # obtain expressed genes
    stats = stats[stats["props"] >= expr_prop]
    expressed_genes = stats["genes"].values

    return expressed_genes

In [None]:
sender_expressed = reduce(
    np.union1d,
    [
        get_expressed_genes(adata, cell_type=cell_type, expr_prop=0.1)
        for cell_type in sender_celltypes
    ],
)
receiver_expressed = reduce(
    np.union1d,
    [
        get_expressed_genes(adata, cell_type=cell_type, expr_prop=0.1)
        for cell_type in receiver_celltypes
    ],
)

In [None]:
%%R -i sender_expressed -i receiver_expressed
# get ligands and receptors in the resource
ligands <- lr_network %>% pull(from) %>% unique()
receptors <- lr_network %>% pull(to) %>% unique()

# only keep the intersect between the resource and the data
expressed_ligands <- intersect(ligands, sender_expressed)
expressed_receptors <- intersect(receptors, receiver_expressed)

# filter the network to only include ligands for which both the ligand and receptor are expressed
potential_ligands <- lr_network %>% 
  filter(from %in% expressed_ligands & to %in% expressed_receptors) %>%
  pull(from) %>% unique()

In [None]:
deg = # all genes from table

In [None]:
# define background of sufficiently expressed genes
background_genes = deg["name"].values

# only keep significant and positive DE genes
deg = deg[(deg["pvals"] <= 0.05) & (deg["logFCs"] > 1)]
# get geneset of interest
geneset_oi = deg["name"].values