diff --git a/DESCRIPTION b/DESCRIPTION index 29593462..4e8a8718 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Package: netZooR Type: Package Title: Unified methods for the inference and analysis of gene regulatory networks -Version: 1.1.15 +Version: 1.2.1 Date: 2022-07-07 Authors@R: c(person("Marouen", "Ben Guebila", email = "benguebila@hsph.harvard.edu", role = c("aut","cre"), comment = c(ORCID = "0000-0001-5934-966X")), diff --git a/NAMESPACE b/NAMESPACE index b73dc999..29f527a0 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,6 +31,7 @@ export(monsterTransitionNetworkPlot) export(monsterTransitionPCAPlot) export(monsterdTFIPlot) export(otter) +export(pandaDiffEdges) export(pandaPy) export(pandaToAlpaca) export(pandaToCondorObject) diff --git a/R/PANDA.R b/R/PANDA.R index 40f9c3f3..976a32fe 100644 --- a/R/PANDA.R +++ b/R/PANDA.R @@ -65,7 +65,7 @@ #' -pandaPy <- function(expr_file, motif_file=NULL, ppi_file=NULL, computing="cpu", precision="double",save_memory=FALSE, save_tmp=TRUE, keep_expression_matrix=FALSE, modeProcess="union", remove_missing=FALSE){ +pandaPy <- function(expr_file, motif_file=NULL, ppi_file=NULL, computing="cpu", precision="double",save_memory=FALSE, save_tmp=TRUE, keep_expression_matrix=FALSE, modeProcess="union", remove_missing=FALSE, with_header=FALSE){ if(missing(expr_file)){ stop("Please provide the path of gene expression data file to 'expr_file' variable") } @@ -106,6 +106,13 @@ pandaPy <- function(expr_file, motif_file=NULL, ppi_file=NULL, computing="cpu", keepexpression.str <- "keep_expression_matrix=True" } else{ keepexpression.str <- "keep_expression_matrix=False" } + # with header option + if(with_header==FALSE){ + withheader.str <- "with_header=False" + }else if (with_header==TRUE){ + withheader.str <- "with_header=True" + } + # when pre-processing mode is legacy if(modeProcess == "legacy"){ @@ -130,7 +137,9 @@ pandaPy <- function(expr_file, motif_file=NULL, ppi_file=NULL, computing="cpu", reticulate::source_python(pandapath,convert = TRUE) # invoke Python script to create a Panda object - obj.str <- paste("panda_obj=Panda(", expr.str, ",", motif.str,",", ppi.str, ",", computing.str, ",", precision.str, ",", savememory.str, ",", savetmp.str, "," , keepexpression.str, ",", mode.str, ")", sep ='') + obj.str <- paste("panda_obj=Panda(", expr.str, ",", motif.str,",", ppi.str, ",", + computing.str, ",", precision.str, ",", savememory.str, ",", savetmp.str, "," , + keepexpression.str, ",", mode.str, "," , withheader.str, ")", sep ='') # run Python code py_run_string(obj.str) diff --git a/R/SPIDER.R b/R/SPIDER.R new file mode 100644 index 00000000..7ec15aed --- /dev/null +++ b/R/SPIDER.R @@ -0,0 +1,308 @@ +#' Seeding PANDA Interactions to Derive Epigenetic Regulation +#' +#' This function runs the SPIDER algorithm +#' +#' @param motif A motif dataset, a data.frame, matrix or exprSet containing 3 columns. +#' Each row describes an motif associated with a transcription factor (column 1) a +#' gene (column 2) and a score (column 3) for the motif. +#' @param epifilter A binary matrix that is of the same size as motif that will be used as a mask to filter motif +#' for open chromatin region. Motif interactions that fall in open chromatin region will be kept and the others are removed. +#' @param expr An expression dataset, as a genes (rows) by samples (columns) data.frame +#' @param ppi A Protein-Protein interaction dataset, a data.frame containing 3 columns. +#' Each row describes a protein-protein interaction between transcription factor 1(column 1), +#' transcription factor 2 (column 2) and a score (column 3) for the interaction. +#' @param alpha value to be used for update variable, alpha (default=0.1) +#' @param hamming value at which to terminate the process based on hamming distance (default 10^-3) +#' @param iter sets the maximum number of iterations SPIDER can run before exiting. +#' @param progress Boolean to indicate printing of output for algorithm progress. +#' @param output a vector containing which networks to return. Options include "regulatory", +#' "coregulatory", "cooperative". +#' @param zScale Boolean to indicate use of z-scores in output. False will use [0,1] scale. +#' @param randomize method by which to randomize gene expression matrix. Default "None". Must +#' be one of "None", "within.gene", "by.genes". "within.gene" randomization scrambles each row +#' of the gene expression matrix, "by.gene" scrambles gene labels. +#' @param cor.method Correlation method, default is "pearson". +#' @param scale.by.present Boolean to indicate scaling of correlations by percentage of positive samples. +#' @param remove.missing.ppi Boolean to indicate whether TFs in the PPI but not in the motif data should be +#' removed. Only when mode=='legacy'. +#' @param remove.missing.motif Boolean to indicate whether genes targeted in the motif data but not the +#' expression data should be removed. Only when mode=='legacy'. +#' @param remove.missing.genes Boolean to indicate whether genes in the expression data but lacking +#' information from the motif prior should be removed. Only when mode=='legacy'. +#' @param edgelist Boolean to indicate if edge lists instead of matrices should be returned. +#' @param mode The data alignment mode. The mode 'union' takes the union of the genes in the expression matrix and the motif +#' and the union of TFs in the ppi and motif and fills the matrics with zeros for nonintersecting TFs and gens, 'intersection' +#' takes the intersection of genes and TFs and removes nonintersecting sets, 'legacy' is the old behavior with version 1.19.3. +#' #' Parameters remove.missing.ppi, remove.missingmotif, remove.missing.genes work only with mode=='legacy'. +#' @keywords keywords +#' @importFrom matrixStats rowSds +#' @importFrom matrixStats colSds +#' @importFrom Biobase assayData +#' @importFrom reshape melt.array +#' @export +#' @return An object of class "panda" containing matrices describing networks achieved by convergence +#' with SPIDER algorithm.\cr +#' "regNet" is the regulatory network\cr +#' "coregNet" is the coregulatory network\cr +#' "coopNet" is the cooperative network +#' @examples +#' data(pandaToyData) +#' spiderRes <- spider(pandaToyData$motif, pandaToyData$epifilter +#' pandaToyData$expression,pandaToyData$ppi,hamming=.1,progress=TRUE) +#' @references +#' Sonawane, Abhijeet Rajendra, et al. "Constructing gene regulatory networks using epigenetic data." npj Systems Biology and Applications 7.1 (2021): 1-13. +spider <- function(motif,expr=NULL,epifilter=NULL,ppi=NULL,alpha=0.1,hamming=0.001, + iter=NA,output=c('regulatory','coexpression','cooperative'), + zScale=TRUE,progress=FALSE,randomize=c("None", "within.gene", "by.gene"),cor.method="pearson", + scale.by.present=FALSE,edgelist=FALSE,remove.missing.ppi=FALSE, + remove.missing.motif=FALSE,remove.missing.genes=FALSE,mode="union"){ + + randomize <- match.arg(randomize) + if(progress) + print('Initializing and validating') + + if(epifilter[c(1,2),] != motif[c(1,2),]){ + stop('Chromatin accessibility data does not match motif data size and order.') + } + + if(class(expr)=="ExpressionSet") + expr <- assayData(expr)[["exprs"]] + + if (is.null(expr)){ + # Use only the motif data here for the gene list + num.conditions <- 0 + if (randomize!="None"){ + warning("Randomization ignored because gene expression is not used.") + randomize <- "None" + } + } else { + if(mode=='legacy'){ + if(remove.missing.genes){ + # remove genes from expression data that are not in the motif data + n <- nrow(expr) + expr <- expr[which(rownames(expr)%in%motif[,2]),] + message(sprintf("%s genes removed that were not present in motif", n-nrow(expr))) + } + if(remove.missing.motif){ + # remove genes from motif data that are not in the expression data + n <- nrow(motif) + motif <- motif[which(motif[,2]%in%rownames(expr)),] + epifilter <- epifilter[which(motif[,2]%in%rownames(expr)),] + message(sprintf("%s motif edges removed that targeted genes missing in expression data", n-nrow(motif))) + } + # Use the motif data AND the expr data (if provided) for the gene list + # Keep everything sorted alphabetically + expr <- expr[order(rownames(expr)),] + }else if(mode=='union'){ + gene.names=unique(union(rownames(expr),unique(motif[,2]))) + tf.names =unique(union(unique(ppi[,1]),unique(motif[,1]))) + num.TFs <- length(tf.names) + num.genes <- length(gene.names) + # gene expression matrix + expr1=as.data.frame(matrix(0,num.genes,ncol(expr))) + rownames(expr1)=gene.names + expr1[which(gene.names%in%rownames(expr)),]=expr[] + expr=expr1 + #PPI matrix + tfCoopNetwork <- matrix(0,num.TFs,num.TFs) + colnames(tfCoopNetwork)=tf.names + rownames(tfCoopNetwork)=tf.names + Idx1 <- match(ppi[,1], tf.names); + Idx2 <- match(ppi[,2], tf.names); + Idx <- (Idx2-1)*num.TFs+Idx1; + tfCoopNetwork[Idx] <- ppi[,3]; + Idx <- (Idx1-1)*num.TFs+Idx2; + tfCoopNetwork[Idx] <- ppi[,3]; + #Motif matrix + regulatoryNetwork=matrix(0,num.TFs,num.genes) + colnames(regulatoryNetwork)=gene.names + rownames(regulatoryNetwork)=tf.names + Idx1=match(motif[,1], tf.names); + Idx2=match(motif[,2], gene.names); + Idx=(Idx2-1)*num.TFs+Idx1; + regulatoryNetwork[Idx]=motif[,3]*epifilter[,3] + }else if(mode=='intersection'){ + gene.names=unique(intersect(rownames(expr),unique(motif[,2]))) + tf.names =unique(intersect(unique(ppi[,1]),unique(motif[,1]))) + num.TFs <- length(tf.names) + num.genes <- length(gene.names) + # gene expression matrix + expr1=as.data.frame(matrix(0,num.genes,ncol(expr))) + rownames(expr1)=gene.names + interGeneNames=gene.names[which(gene.names%in%rownames(expr))] + expr1[interGeneNames,]=expr[interGeneNames,] + expr=expr1 + #PPI matrix + tfCoopNetwork <- matrix(0,num.TFs,num.TFs) + colnames(tfCoopNetwork)=tf.names + rownames(tfCoopNetwork)=tf.names + Idx1 <- match(ppi[,1], tf.names); + Idx2 <- match(ppi[,2], tf.names); + Idx <- (Idx2-1)*num.TFs+Idx1; + indIdx=!is.na(Idx) + Idx=Idx[indIdx] #remove missing TFs + tfCoopNetwork[Idx] <- ppi[indIdx,3]; + Idx <- (Idx1-1)*num.TFs+Idx2; + indIdx=!is.na(Idx) + Idx=Idx[indIdx] #remove missing TFs + tfCoopNetwork[Idx] <- ppi[indIdx,3]; + #Motif matrix + regulatoryNetwork=matrix(0,num.TFs,num.genes) + colnames(regulatoryNetwork)=gene.names + rownames(regulatoryNetwork)=tf.names + Idx1=match(motif[,1], tf.names); + Idx2=match(motif[,2], gene.names); + Idx=(Idx2-1)*num.TFs+Idx1; + indIdx=!is.na(Idx) + Idx=Idx[indIdx] #remove missing genes + regulatoryNetwork[Idx]=motif[indIdx,3]*epifilter[indIdx,3]; + } + num.conditions <- ncol(expr) + if (randomize=='within.gene'){ + expr <- t(apply(expr, 1, sample)) + if(progress) + print("Randomizing by reordering each gene's expression") + } else if (randomize=='by.gene'){ + rownames(expr) <- sample(rownames(expr)) + expr <- expr[order(rownames(expr)),] + if(progress) + print("Randomizing by reordering each gene labels") + } + } + + if (mode=='legacy'){ + # Create vectors for TF names and Gene names from motif dataset + tf.names <- sort(unique(motif[,1])) + gene.names <- sort(unique(rownames(expr))) + num.TFs <- length(tf.names) + num.genes <- length(gene.names) + } + + # Bad data checking + if (num.genes==0){ + stop("Error validating data. No matched genes.\n Please ensure that gene names in expression data match gene names in motif data") + } + + if(num.conditions==0) { + warning('No expression data given. SPIDER will run based on an identity co-regulation matrix') + geneCoreg <- diag(num.genes) + } else if(num.conditions<3) { + warning('Not enough expression conditions detected to calculate correlation. Co-regulation network will be initialized to an identity matrix.') + geneCoreg <- diag(num.genes) + } else { + + if(scale.by.present){ + num.positive=(expr>0)%*%t((expr>0)) + geneCoreg <- cor(t(expr), method=cor.method, use="pairwise.complete.obs")*(num.positive/num.conditions) + } else { + geneCoreg <- cor(t(expr), method=cor.method, use="pairwise.complete.obs") + } + if(progress) + print('Verified sufficient samples') + } + if (any(is.na(geneCoreg))){ #check for NA and replace them by zero + diag(geneCoreg)=1 + geneCoreg[is.na(geneCoreg)]=0 + } + + if (any(duplicated(motif))) { + warning("Duplicate edges have been found in the motif data. Weights will be summed.") + motif <- aggregate(motif[,3], by=list(motif[,1], motif[,2]), FUN=sum) + } + + # Prior Regulatory Network + if(mode=='legacy'){ + Idx1=match(motif[,1], tf.names); + Idx2=match(motif[,2], gene.names); + Idx=(Idx2-1)*num.TFs+Idx1; + regulatoryNetwork=matrix(data=0, num.TFs, num.genes); + regulatoryNetwork[Idx]=motif[,3] + colnames(regulatoryNetwork) <- gene.names + rownames(regulatoryNetwork) <- tf.names + # PPI data + # If no ppi data is given, we use the identity matrix + tfCoopNetwork <- diag(num.TFs) + # Else we convert our two-column data.frame to a matrix + if (!is.null(ppi)){ + if(any(duplicated(ppi))){ + warning("Duplicate edges have been found in the PPI data. Weights will be summed.") + ppi <- aggregate(ppi[,3], by=list(ppi[,1], ppi[,2]), FUN=sum) + } + if(remove.missing.ppi){ + # remove edges in the PPI data that target TFs not in the motif + n <- nrow(ppi) + ppi <- ppi[which(ppi[,1]%in%tf.names & ppi[,2]%in%tf.names),] + message(sprintf("%s PPI edges removed that were not present in motif", n-nrow(ppi))) + } + Idx1 <- match(ppi[,1], tf.names); + Idx2 <- match(ppi[,2], tf.names); + Idx <- (Idx2-1)*num.TFs+Idx1; + tfCoopNetwork[Idx] <- ppi[,3]; + Idx <- (Idx1-1)*num.TFs+Idx2; + tfCoopNetwork[Idx] <- ppi[,3]; + } + colnames(tfCoopNetwork) <- tf.names + rownames(tfCoopNetwork) <- tf.names + } + + ## Run SPIDER ## + tic=proc.time()[3] + + # adjusting degree distribution + regulatoryNetwork = degreeAdjust(regulatoryNetwork) + + if(progress) + print('Normalizing networks...') + regulatoryNetwork = normalizeNetwork(regulatoryNetwork) + tfCoopNetwork = normalizeNetwork(tfCoopNetwork) + geneCoreg = normalizeNetwork(geneCoreg) + + if(progress) + print('Learning Network...') + + minusAlpha = 1-alpha + step=0 + hamming_cur=1 + if(progress) + print("Using tanimoto similarity") + while(hamming_cur>hamming){ + if ((!is.na(iter))&&step>=iter){ + print(paste("Reached maximum iterations, iter =",iter),sep="") + break + } + Responsibility=tanimoto(tfCoopNetwork, regulatoryNetwork) + Availability=tanimoto(regulatoryNetwork, geneCoreg) + RA = 0.5*(Responsibility+Availability) + + hamming_cur=sum(abs(regulatoryNetwork-RA))/(num.TFs*num.genes) + regulatoryNetwork=minusAlpha*regulatoryNetwork + alpha*RA + + ppi=tanimoto(regulatoryNetwork, t(regulatoryNetwork)) + ppi=update.diagonal(ppi, num.TFs, alpha, step) + tfCoopNetwork=minusAlpha*tfCoopNetwork + alpha*ppi + + CoReg2=tanimoto(t(regulatoryNetwork), regulatoryNetwork) + CoReg2=update.diagonal(CoReg2, num.genes, alpha, step) + geneCoreg=minusAlpha*geneCoreg + alpha*CoReg2 + + if(progress) + message("Iteration", step,": hamming distance =", round(hamming_cur,5)) + step=step+1 + } + + toc=proc.time()[3] - tic + if(progress) + message("Successfully ran SPIDER on ", num.genes, " Genes and ", num.TFs, " TFs.\nTime elapsed:", round(toc,2), "seconds.") + prepResult(zScale, output, regulatoryNetwork, geneCoreg, tfCoopNetwork, edgelist, motif) +} + +#' Function to adjust the degree so that the hub nodes are not penalized in z-score transformation +#' +#' @param A Input adjacency matrix +degreeAdjust <- function(A){ + k1 <- colSums(A)/dim(A,1) + k2 <- rowSums(A)/dim(A,2) + B <- (matrix(replicate(dim(A,2),k1),nrow=dim(A,1)))^2 + B <- B + (matrix(t(replicate(dim(A,2),k2)),nrow=dim(A,1)))^2 + A <- A * sqrt(B); +} \ No newline at end of file diff --git a/README.md b/README.md index 0fe8904c..c141a136 100644 --- a/README.md +++ b/README.md @@ -17,27 +17,57 @@ netZooR is an R package to reconstruct, analyse, and plot biological networks. netZooR currently integrates: +
+PANDA * **PANDA** (Passing Attributes between Networks for Data Assimilation) [[Glass et al. 2013]](http://journals.plos.org/plosone/article?id=10.1371/journal.pone.0064832): PANDA is a method for estimating bipartite gene regulatory networks (GRNs) consisting of two types of nodes: transcription factors (TFs) and genes. An edge between TF $i$ and gene $j$ indicates that gene $j$ is regulated by TF $i$. The edge weight represents the strength of evidence for this regulatory relationship obtained by integrating three types of biological data: gene expression data, protein-protein interaction (PPI) data, and transcription factor binding motif (TFBM) data. PANDA is an iterative approach that begins with a seed GRN estimated from TFBMs and uses message passing between data types to refine the seed network to a final GRN that is consistent with the information contained in gene expression, PPI, and TFBM data. +
+
+CONDOR * **CONDOR** (COmplex Network Description Of Regulators) [[Platig et al. 2016]](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1005033): CONDOR is a tool for community detection in bipartite networks. Many community detection methods for unipartite networks are based on the concept of maximizing a modularity metric that compares the weight of edges within communities to the weight of edges between communities, prioritizing community assignments with higher values of the former relative to the latter. CONDOR extends this concept to bipartite networks by optimizing a bipartite version of modularity defined by [[Barber (2007)]](https://pubmed.ncbi.nlm.nih.gov/18233893/). To enable bipartite community detection on large networks such gene regulatory networks, CONDOR uses a fast unipartite modularity maximization method on one of the two unipartite projections of the bipartite network. In Platig et al. (2016), CONDOR is applied to bipartite networks of single nucleotide polymorphisms (SNPs) and gene expression, where a network edge from a SNP node to a gene node is indicative of an association between the SNP and the gene expression level, commonly known as an expression quantitative trait locus (eQTL). Communities detected with CONDOR contained local hub nodes ("core SNPs") enriched for association with disease, suggesting that functional eQTL relationships are encoded at the community level. +
+
+LIONESS * **LIONESS** (Linear Interpolation to Obtain Network Estimates for Single Samples) [[Kuijjer et al. 2019]](https://doi.org/10.1016/j.isci.2019.03.021): LIONESS is a flexible method for single-sample network integration. The machinery behind LIONESS is a leave-one-out approach. To construct a single-sample network for sample $i$, a first network is estimated on the full dataset and a second network is estimated on the dataset with sample $i$ withheld. The single-sample network is then estimated based on the difference between these two networks. Any method that can be used to estimate a network can be used with LIONESS to estimate single-sample networks. Two common use cases are the use of LIONESS to generate single-sample GRNs based on PANDA and the use of LIONESS to generate single-sample Pearson correlation networks. +
+
+ALPACA * **ALPACA** (ALtered Partitions Across Community Architectures) [[Padi and Quackenbush 2018]](https://www.nature.com/articles/s41540-018-0052-5): ALPACA is a method for differential network analysis that is based on a novel approach to comparison of network community structures. Comparisons of community structure have typically been accomplished by assessing which nodes switch community membership between networks ("community comparison") or by computing the edge weight differences by subtracting the adjacency matrices of two networks and then performing community detection on the resulting differential network ("edge subtraction"). Both these approaches have important limitations. Community comparison is subject to a resolution limit and cannot detect differences smaller than the average community size in a network. Edge subtraction transfers noise from both of the original networks to the differential network, leading to an imprecise estimator. Moreover, positive and negative edge differences cannot be distinguished in the subsequent community detection performed on the differential network. In contrast to community comparison and edge subtraction, ALPACA compares the community structure of two networks by optimizing a new metric: "differential modularity". In the ALPACA algorithm, one network is defined as the reference network and the second is defined as the perturbed network. The differential modularity metric measures the extent to which edges in a community in the perturbed network differ from those that would be expected by random chance according to a null distribution based on the reference network. Community structure of the perturbed network is determined by maximizing this differential modularity. The resulting communities are "differential modules" that show how the perturbed network differs from the reference network at the community level. +
+
+SAMBAR * **SAMBAR** (Subtyping Agglomerated Mutations By Annotation Relations) [[Kuijjer et al.]](https://www.nature.com/articles/s41416-018-0109-7): SAMBAR is a tool for studying cancer subtypes based on patterns of somatic mutations in curated biological pathways. Rather than characterize cancer according to mutations at the gene level, SAMBAR agglomerates mutations within pathways to define a pathway mutation score. To avoid bias based on pathway representation, these pathway mutation scores correct for the number of genes in each pathway as well as the number of times each gene is represented in the universe of pathways. By taking a pathway rather than gene-by-gene lens, SAMBAR both de-sparsifies somatic mutation data and incorporates important prior biological knowledge. Kuijjer et al. (2018) demonstrate that SAMBAR is capable of outperforming other methods for cancer subtyping, producing subtypes with greater between-subtype distances; the authors use SAMBAR for a pan-cancer subtyping analysis that identifies four diverse pan-cancer subtypes linked to distinct molecular processes. +
+
+MONSTER * **MONSTER** (Modeling Network State Transitions from Expression and Regulatory data) [[Schlauch et al.]](https://doi.org/10.1186/s12918-017-0517-y): MONSTER is a method for estimating transitions between network states by modeling the adjacency matrix of one state as a linear transformation of the adjacency matrix of another. Like LIONESS, MONSTER is a flexible method that does not require a particular type of network structure. MONSTER models the perturbation of an initial network A into a perturbed network B according to a matrix product B = AT. T is a transition matrix encoding the changes that map A to B. When A and B are gene regulatory networks, i.e., bipartite networks between TFs and genes, the MONSTER framework leads naturally to the definition of TF involvement as the sum of the off-diagonal weights for a transcription factor $i$ in the transition matrix T. This perspective enables MONSTER to identify differentially involved TFs that contribute to network transitions differently between different conditions. This dimension cannot be captured from a traditional differential expression analysis of TFs, which will not detect TFs that have the same concentration between conditions. +
+
+OTTER * **OTTER** (Optimization to Estimate Regulation) [[Weighill et al.]](https://www.biorxiv.org/content/10.1101/2020.06.23.167999v2.abstract): OTTER is a GRN inference method based on the idea that observed biological data (PPI data and gene co-expression data) are projections of a bipartite GRN between TFs and genes. Specifically, PPI data represent the projection of the GRN onto the TF-TF space and gene co-expression data represent the projection of the GRN onto the gene-gene space. OTTER reframes the problem of GRN inference as a problem of relaxed graph matching and finds a GRN that has optimal agreement with the observed PPI and coexpression data. The OTTER objective function is tunable in two ways: first, one can prioritize matching the PPI data or the coexpression data more heavily depending on one's confidence in the data source; second, there is a regularization parameter that can be applied to induce sparsity on the estimated GRN. The OTTER objective function can be solved using spectral decomposition techniques and gradient descent; the latter is shown to be closely related to the PANDA message-passing approach (Glass et al. 2013). +
+
+CRANE * **CRANE** (Constrained Random Alteration of Network Edges) [[Lim et al.]](https://doi.org/10.3389/fgene.2020.603264): CRANE is a method for determining statistical significance of structural differences between networks. Analysis with CRANE is a four-phase process. The first step of CRANE is to estimate two networks: a reference network and a perturbed network. In the same spirit as LIONESS, CRANE is flexible: any network inference method (e.g., correlation, partial correlation, PANDA) can be used at this stage. In the second step, differential features are determined by comparing the reference and perturbed networks. Here, CRANE is again flexible: such differential features could arise from simple measures such as a comparison of node degree or centrality, or from more nuanced techniques such as differential module detection with ALPACA. Third, a large number of constrained random networks are developed based on the network structure of the reference network. By comparing each random network with the original reference network, a set of null differential measures is obtained. Fourth, the observed differential features from step two can be compared with the null distribution from step three to generate empirical p-values. A typical workflow for applying CRANE in NetZooR would involve fitting PANDA networks in step one and using ALPACA to estimate differential modules in step two. +
+
+EGRET * **EGRET** (Estimating the Genetic Regulatory effects on TFs) [[Weighill et al.]](https://www.genome.org/cgi/doi/10.1101/gr.275107.120): EGRET incorporates genetic variants as a fourth data type in the PANDA message-passing framework, enabling the estimation of genotype-specific GRNs. Genetic variants can alter transcription factor binding by affecting the composition of motif sites on the DNA. Not every genetic variant has such an affect; EGRET incorporates only genetic variants which have (1) been shown to be associated with gene expression (expression quantitative trait loci, or eQTL), and (2) are predicted to affect transcription factor binding based on a tool called QBiC (Martin et al. 2019). This information is used in combination with TFBM predictions as input to the PANDA message-passing framework. The resulting EGRET network is a genotype-specific bipartite GRN that is similar to a PANDA network but incorporates the information contained by individual genetic variation. +
+
+YARN * **YARN** (Yet Another RNa-seq package) [[Paulsson et al.]](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/s12859-017-1847-x): YARN is a package that combines quality control, gene filtering, and normalization steps to streamline the preprocessing of large-scale, multi-tissue gene expression data from resources such as the Genotype-Tissue Expression (GTEx) project. Among other steps, YARN uses principal coordinate analysis (PCoA) to determine if samples collected from different sites on the same tissue (for example, transverse and sigmoid colon) can be treated as "transcriptionally indistinguishable" and grouped together to increase power for downstream analyses. Paulsson et al. (2017) demonstrate the use of YARN to develop a pan-cancer RNA-seq dataset for 30,333 genes from 9435 samples across 38 tissues from the GTEx dataset. +
netZooR also integrates additional functions to: @@ -103,6 +133,14 @@ BiocManager::install("netZooR") For more details please refer to the [documentation website](https://netzoo.github.io/netZooR/). +#### Using bioconda + +netZooR is also available through [Bioconda](https://bioconda.github.io/recipes/bioconductor-netzoor/README.html#package-bioconductor-netzoor) + +```bash +conda install bioconductor-netzoor +``` + ### Python binding This package will invoke Python programming language in R environment through [reticulate](https://rstudio.github.io/reticulate/) package, by default setting there is no additional configuration needed. diff --git a/docs/404.html b/docs/404.html index 40cee443..8ddfd96c 100644 --- a/docs/404.html +++ b/docs/404.html @@ -71,7 +71,7 @@ netZooR - 1.0.4 + 1.1.16 @@ -130,6 +130,9 @@ YARN: Robust Multi-Tissue RNA-Seq Preprocessing and Normalization + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog
  • + +
  • + Changelog