## PF atlas: niche signals imprinting cell type identites

In [1]:
suppressPackageStartupMessages(library(nichenetr))
suppressPackageStartupMessages(library(Seurat))
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(patchwork))
suppressPackageStartupMessages(library(data.table))
suppressPackageStartupMessages(library(Matrix))
suppressPackageStartupMessages(library(igraph))
suppressPackageStartupMessages(library(gridExtra))
suppressPackageStartupMessages(library(plotrix))
suppressPackageStartupMessages(library(ggsci))

“package ‘plotrix’ was built under R version 4.0.5”
“package ‘ggsci’ was built under R version 4.0.5”


In [2]:
## set working directory
setwd('/home/niklas/projects/niche_environments_FIBROSIS/HUMAN_invivo/02_figures/NicheNet/')

### Path to required input data

In [3]:
geneset_dir = '/home/niklas/projects/niche_environments_FIBROSIS/HUMAN_invivo/01_data/NicheNet_inputs/221029_PF_atlas_celltype_marker_table_OVERALL_cell_circuits.csv'
table_dir = '/home/niklas/projects/niche_environments_FIBROSIS/HUMAN_invivo/01_data/NicheNet_inputs/'
results_dir = '/home/niklas/projects/niche_environments_FIBROSIS/HUMAN_invivo/01_data/NicheNet_outputs/ligand_activities_cell_circuit/INVIVO_'

### Load NicheNet models and networks

In [4]:
## load NicheNet models and networks ##
ligand_target_matrix <- readRDS('/home/niklas/data/nichenet_models/ligand_target_matrix_HUMAN.rds')
lr_network <- readRDS('/home/niklas/data/nichenet_models/ligand_receptor_network_HUMAN.rds')
weighted_networks_lr <- readRDS('/home/niklas/data/nichenet_models/weighted_ligand_receptor_network_HUMAN.rds')

### Function to perform NicheNet Ligand activity analysis

In [5]:
## function to perform ligand activity analysis ##
ligand_activity_analysis <- function(sender_ct, receiver_ct, receiver_background, geneset_oi, 
                            geneset_title,
                            pct_expr_table, pct_thresh = 0.10,
                            pearson_thresh = 0.05){
    
    
    ## retrieve genes expressed by receiver
    expr_genes_receiver = rownames(pct_expr_table[pct_expr_table[, receiver_background] > pct_thresh, ])
    background_expr_genes = expr_genes_receiver %>% .[. %in% rownames(ligand_target_matrix)]
    
    ## retrieve genes expressed by sender: in this case marker genes
    #list_expr_genes_sender = lapply(sender_ct, function(x){rownames(pct_expr_table[pct_expr_table[, x] > pct_thresh, ])})
    list_expr_genes_sender = marker %>% filter(cell_type == sender_ct)
    # only significant genes: pval_adj < 0.05
    list_expr_genes_sender = list_expr_genes_sender %>% filter(pval_adj < 0.05)
    #list_expr_genes_sender = list_expr_genes_sender %>% filter(pct_background < 0.25)
    expr_genes_sender = list_expr_genes_sender %>% unlist() %>% unique()
    
    ## status message ##
    print(paste0("Using ", length(geneset_oi), " genes differently regulated genes in ", receiver_ct, " (",
                 geneset_title, ")"))
    
    ### STEP1: Ligand activity analysis ###
    ## Define a set of potential ligands and receptors 
    # retrieve ligands and receptors
    ligands = lr_network %>% pull(from) %>% unique()
    receptors = lr_network %>% pull(to) %>% unique()
    # ligands expressed by sender celltypes
    expr_ligands = intersect(ligands, expr_genes_sender) 
    # receptor expressed by receiver celltypes
    expr_receptors = intersect(receptors, expr_genes_receiver)
    ### status messages ###
    print(paste0("Expressed Ligands ", length(expr_ligands)))
    print(paste0("Expressed Receptors ", length(expr_receptors)))
    
    ## filter ligands
    # only consider ligands with matching receptors (according to NicheNets databases)
    potential_ligands = lr_network %>% filter(from %in% expr_ligands & to %in% expr_receptors) %>%
                        pull(from) %>% unique()
    ### status message ###
    print(paste0("Potential Ligands ", length(potential_ligands)))
    
    ## predict ligand activities
    ligand_activities = predict_ligand_activities(geneset = geneset_oi,
                                                  background_expressed_genes = background_expr_genes,
                                                  ligand_target_matrix = ligand_target_matrix,
                                                  potential_ligands = potential_ligands)

    ## rank ligands by pearson correlation coefficient
    ligand_activities = ligand_activities %>% arrange(-pearson) %>% mutate(rank = rank(desc(pearson)))
    
    # filter consider ligands with pearson's correlation >= pearson_tresh
    ligand_activities = ligand_activities %>% filter(pearson >= pearson_thresh)
    
    ### status message ###
    print(paste0("Top ranked ligands ", length(ligand_activities$test_ligand)))
    
    return(ligand_activities)  
}

### Function to perform target gene prediction

In [6]:
target_gene_prediction <- function(best_upstream_ligands, geneset_oi, target_thresh = 0.33, n_targets = 500){
    
    ## identify ligand targets
    active_ligand_target_links_df = best_upstream_ligands %>% 
                                    lapply(get_weighted_ligand_target_links,geneset = geneset_oi, ligand_target_matrix = ligand_target_matrix, n = n_targets) %>% bind_rows() %>% drop_na()
    active_ligand_target_links = prepare_ligand_target_visualization(ligand_target_df = active_ligand_target_links_df, ligand_target_matrix = ligand_target_matrix, cutoff = target_thresh)
    
    ## reformat data
    order_ligands = intersect(best_upstream_ligands, colnames(active_ligand_target_links)) %>% rev() %>% make.names()
    order_targets = active_ligand_target_links_df$target %>% unique() %>% 
                    intersect(rownames(active_ligand_target_links)) %>% make.names()
    rownames(active_ligand_target_links) = rownames(active_ligand_target_links) %>% make.names() 
    colnames(active_ligand_target_links) = colnames(active_ligand_target_links) %>% make.names() # make.names() for heatmap visualization of genes like H2-T23
    
    ## final ligand-target heatmap
    vis_ligand_target = active_ligand_target_links[order_targets,order_ligands] %>% t()
    
    ## output: ligand-target matrix
    return(vis_ligand_target) 
    
}

In [7]:
cell_type_names <- c('Aberrant Basaloid','Myofibroblasts','ectopic EC', 'SMC/Pericytes_ILD', 'Profibrotic Macrophages')
cell_type_labels <- c('Aberrant_Basaloid','Myofibroblasts','ectopic_EC', 'SMC_Pericytes_ILD', 'Profibrotic_Macrophages')

### Run NicheNet iteratively to detect crosstalk between all cells for FC vs CC

In [8]:
## read tables
avg_expr <- read.csv(paste0(table_dir, '221029_PF_atlas_OVERALL_cell_circuits_avg_expr_SCALED_cell_type_level.csv'), row.names = 1, check.names = F, header = T)
pct_expr <- read.csv(paste0(table_dir, '221029_PF_atlas_OVERALL_cell_circuits_pct_expr_cell_type_level.csv'), row.names = 1, check.names = F, header = T)
marker <- read.csv(geneset_dir, header = T)

In [9]:
for(i in 1:length(cell_type_names)){
    
    ## STEP 1: define RECEIVER and SENDER cells
    receiver_ct <- cell_type_names[i]
    receiver_label <- cell_type_labels[i]
    
    all_senders <- cell_type_names[! cell_type_names %in% cell_type_names[i]]
    all_sender_labels <- cell_type_labels[! cell_type_labels %in% cell_type_labels[i]]

    
    ## STEP 2: define geneset of interest
    # read dge table
    geneset <- marker %>% filter(cell_type == receiver_ct)
    # only significant genes: pval_adj < 0.5 and log2FC > threshold
    geneset = geneset %>% filter(pval_adj < 0.05)
    geneset = geneset %>% filter(pct_cell_type > 0.5)
    geneset = geneset %>% top_n(200, logfoldchange)
    # only genes expressed in at least 25% per group
    geneset_oi <- geneset$gene
    geneset_title <- paste0(receiver_ct, ' - tissue identity')
    print(paste0(receiver_ct, ': Proceeding with geneset of interest of length: ', length(geneset_oi)))
    
    ## STEP 3: run ligand activity analysis
    top_ligands_table <- ligand_activity_analysis(all_senders,
                                                  receiver_ct,
                                                  receiver_background = receiver_ct,
                                                  geneset_oi = geneset_oi,
                                                  geneset_title = geneset_title,
                                                  pct_expr_table = pct_expr,
                                                  pearson_thresh = 0.00)
    
    ## save ligand activity analysis
    print(paste0('Writing ligand activity table to:  ', results_dir, 'nichenet_ligand_act_to_', receiver_label, '.csv'))
    write.csv(top_ligands_table, paste0(results_dir, 'nichenet_ligand_act_to_', receiver_label, '.csv'))
    
    ## STEP 5: target gene prediction
    ligand_target_prediction <- target_gene_prediction(best_upstream_ligands = top_ligands_table$test_ligand, geneset_oi = geneset_oi)
    print(paste0('Writing ligand target matrix table to:  ', results_dir, 'nichenet_ligand_target_matrix_to_', receiver_label, '.csv'))
    write.csv(ligand_target_prediction, paste0(results_dir, 'nichenet_ligand_target_matrix_to_', receiver_label, '.csv'))

}

[1] "Aberrant Basaloid: Proceeding with geneset of interest of length: 200"
[1] "Using 200 genes differently regulated genes in Aberrant Basaloid (Aberrant Basaloid - tissue identity)"
[1] "Expressed Ligands 22"
[1] "Expressed Receptors 188"
[1] "Potential Ligands 16"
[1] "Top ranked ligands 16"
[1] "Writing ligand activity table to:  /home/niklas/projects/niche_environments_FIBROSIS/HUMAN_invivo/01_data/NicheNet_outputs/ligand_activities_cell_circuit/INVIVO_nichenet_ligand_act_to_Aberrant_Basaloid.csv"
[1] "Writing ligand target matrix table to:  /home/niklas/projects/niche_environments_FIBROSIS/HUMAN_invivo/01_data/NicheNet_outputs/ligand_activities_cell_circuit/INVIVO_nichenet_ligand_target_matrix_to_Aberrant_Basaloid.csv"
[1] "Myofibroblasts: Proceeding with geneset of interest of length: 92"
[1] "Using 92 genes differently regulated genes in Myofibroblasts (Myofibroblasts - tissue identity)"
[1] "Expressed Ligands 25"
[1] "Expressed Receptors 123"
[1] "Potential Ligands 22"
[1] "T

In [10]:
#for(i in 1:length(sender_names)){
#    
#    ## STEP 1: define SENDER cells
#    sender_ct <- sender_names[i]
#    sender_label <- sender_labels[i]
#    
#    ## loop over all RECEIVER cells
#    for(i in 1:length(cell_type_names)){
#        
#        ## STEP 2: define RECEIVER cells
#        receiver_ct <- cell_type_names[i]
#        receiver_label <- cell_type_labels[i]
#        
#        ## STEP 3: define geneset of interest
#        # read dge table
#        geneset <- marker %>% filter(cell_type == receiver_ct)
#        # only significant genes: pval_adj < 0.5 and log2FC > threshold
#        geneset = geneset %>% filter(pval_adj < 0.05)
#        geneset = geneset %>% filter(pct_cell_type > 0.5)
#        geneset = geneset %>% top_n(logfoldchange, 200)
#        # only genes expressed in at least 25% per group
#        #geneset = geneset %>% filter(paste0('pct.',receiver_ct) > 0.25)
#        geneset_oi <- geneset$gene
#        geneset_title <- paste0(receiver_ct, ' - tissue identity')
#        print(paste0(receiver_ct, ': Proceeding with geneset of interest of length: ', length(geneset_oi)))
#        
#        ## STEP 4: run ligand activity analysis
#        top_ligands_table <- ligand_activity_analysis(sender_ct,
#                                                      receiver_ct,
#                                                      geneset_oi = geneset_oi,
#                                                      geneset_title = geneset_title,
#                                                      pct_expr_table = pct_expr,
#                                                      pearson_thresh = 0.05)
#        
#        ## save ligand activity analysis
#        print(paste0('Writing ligand activity table to:  ', results_dir, 'nichenet_ligand_act_', sender_label, '_to_', receiver_label, '.csv'))
#        write.csv(top_ligands_table, paste0(results_dir, 'nichenet_ligand_act_', sender_label, '_to_', receiver_label, '.csv'))
#        
#        
#        ## STEP 5: target gene prediction
#        ligand_target_prediction <- target_gene_prediction(best_upstream_ligands = top_ligands_table$test_ligand, geneset_oi = geneset_oi)
#        print(paste0('Writing ligand target matrix table to:  ', results_dir, 'nichenet_ligand_target_matrix_', sender_label, '_to_', receiver_label, '.csv'))
#        write.csv(ligand_target_prediction, paste0(results_dir, 'nichenet_ligand_target_matrix_', sender_label, '_to_', receiver_label, '.csv'))
#    }
#}