# Single-sample gene regulatory network inference with PANDA and LIONESS
This workflow generates single sample PANDA-LIONESS GRNs using the R implementations of the tools from The Network Zoo and can be used to reproduce the single sample networks used in (Pop, et al., 2024). As they can take a long time to run and are computationally demanding, we also provide the pre-computed indegrees and outdegrees used for the analysis in the Zenodo repository.

First, we load necessary libraries and set some global parameters.

In [None]:
rm(list=ls())
library(netZooR)
library(data.table)
library(biomaRt)

In [None]:
# setting working directory
wd <- "results"
setwd(wd)

# set data directory
data_tcga <- "../data/TCGA"
data_gep <- "../data/GEPliver"
data_net <- "../data/network_priors"

# defining vector of cancer names for which to generate networks
cancers_tcga <- c("aml", "breast", "colon", "gbm", "kidney", "liver", "lung",
          "melanoma", "ovarian", "sarcoma")

# Preparing data for PANDA
PANDA generates GRNs by passing messages between three networks: a prior network of protein-protein interaction of TFs, a prior network of TF binding motifs at gene promoters and a gene co-expression network. We subset our data to only include information common to both the motif prior and the gene expression data (same set of genes) and the motif prior and PPI (same set of TFs).

In [None]:
# defining functions
#' @name map_gene_ids
#' @description Function that uses biomart to map gene IDs
#' @param genes A vector with gene IDs to be mapped.
#' @param mappings A vector of length 2 containing the filter/attribute
#'  pair to use formapping. These should be valid biomaRt filters/attreibutes.
#' (e.g. c("entrezgene_id", "hgnc") to convert entrez IDs to HGNC symbols).
#' @param mart A character string indicating what mart to use. This must be a
#' valid biomaRt mart. Default "ensembl".
#' @param dataset Which dataset to use. This must be a valid biomaRt dataset.
#' Default is "hsapiens_gene_ensembl".
#'
#' @seealso \code{\link{biomaRt::useMart}}
#' @seealso \code{\link{biomaRt::listFilters}}
#' @seealso \code{\link{biomaRt::listAttributes}}

map_gene_ids <- function(genes, mappings, mart = "ensembl",
                         dataset = "hsapiens_gene_ensembl") {
    # get mart
    mart <- useMart(biomart = mart, dataset = dataset)

    # map
    mapping <- getBM(attributes = mappings[2], filters = mappings[1],
                     values = genes, mart = mart)

    return(mapping)
}

#' @names remove_after_character
#' @description Remves all characters from a string after a given character,
#' including that character.
#' @param string String to be edited.
#' @param character Character to be used. Special characters must be escaped.
#'
#' @returns An edited string

remove_after_character <- function(string, character) {
# Split the string into parts using the specified character
  parts <- strsplit(string, character)[[1]]

  # Check if the split produced more than one part
  if (length(parts) > 1) {
    # return the first part joined together using the specified character
    return(paste(parts[1], collapse = character))
  } else {
    # If there's only one part, return the original string
  return(string)
  }
}

In [None]:
# load panda priors
prior <- data.frame(fread(file.path(data_net, "panda_prior.txt")))
ppi <- data.frame(fread(file.path("ppi2015_freeze.txt")))

for (i in cancers_tcga) {
    # load expression
    exp <- data.frame(fread(file.path(data_tcga, i, "log_exp")))

    # remove everything but the gene symbols
    genes <- exp$probe
    genes <- sapply(genes, remove_after_character, character = "\\.")
    genes <- sapply(genes, remove_after_character, character = "\\|")

    exp$probe <- genes

    # only keep unique genes
    exp <- exp[!duplicated(exp$probe), ]
    genes <- genes[!duplicated(genes)]
    names(genes) <- NULL

    # make gene names row names
    rownames(exp) <- exp$probe
    exp <- exp[, -1]

    # intersect with prior
    idx <- intersect(genes, unique(prior[, 2]))
    exp <- exp[idx, ]
    prior2 <- prior[which(prior[, 2] %in% idx), ]

    # intersect priors
    idx_pri <- intersect(prior2[, 1], ppi[, 1])
    idx_pri <- intersect(idx_pri, ppi[, 2])
    ppi2 <- ppi[which(ppi[, 1] %in% idx_pri), ]
    ppi2 <- ppi[which(ppi[, 2] %in% idx_pri), ]

    sampleorder <- colnames(exp)

    # output files
    write.table(prior2,
                file = file.path(data_tcga, i, "prior.txt"),
                quote = FALSE, row.names = FALSE, col.names = FALSE, sep = "\t")
    write.table(ppi2,
                file = file.path(data_tcga, i, "ppi.txt"),
                quote = FALSE, row.names = FALSE, col.names = FALSE, sep = "\t") 
    write.table(exp,
                file = file.path(data_tcga, i, "log_exp_fil.txt"),
                quote = FALSE, row.names = TRUE, col.names = FALSE, sep = "\t")
    write.table(sampleorder,
                file = file.path(data_tcga, i, "sampleorder.txt"), 
                quote = FALSE, row.names = FALSE, col.names = FALSE, sep = "\t")
}

# Generating networks
We use the `lioness` function from `netZooR` to create single sample networks with PANDA and LIONESS. We then calculate the indegree and outdegree of the resulting networks.

In [None]:
for (i in cancers_tcga) {
    # load the data
    expr <- data.frame(fread(file.path(data_tcga, i, "log_exp_fil.txt")))
    motif <- data.frame(fread(file.path(data_tcga, i, "prior.txt")))
    ppi <- data.frame(fread(file.path(data_tcga, i, "ppi.txt")))
    sampleorder <- data.frame(fread(file.path(data_tcga, i, "sampleorder.txt")))
    
    # run lioness
    net <- lioness(expr= expr,
                   motif = motif,
                   ppi = ppi,
                   network.inference.method = "panda",
                   ncores = 10) # adjust depending on the resources available
    
    names(net) <- sampleorder
    
    # save
    save(net, file = file.path(data_tcga, i, "net.RData"))

    # calculate degree
    indegree <- data.frame(lapply(lioness, function(x) {
        ind <- apply(x, 2, sum)
    }))

    outdegree <- data.frame(lapply(lioness, function(x) {
        out <- apply(x, 1, sum)
    }))

    save(indegree, file = file.path(data_tcga, i, "indegree.RData"))
    save(outdegree, file = file.path(data_tcga, i, "outdegree.RData"))
}