Skip to content

Commit

Permalink
1.2.1 (#271)
Browse files Browse the repository at this point in the history
* add bioc information

* update website

* add crane and docs

* update crane

* update worflow

* update crane

* update workflows

* update workflows

* update workfolw

* update workfolw

* update workfolw

* update workfolw

* Update main.yml

* Update main.yml

* Update main.yml

* fix lioness py

* udpate lioness in R

* include calculations python

* update panda rm missing

* update panda

* update lioness joblib

* revert commits

* ALPACA fix to NAs in community assignement

* version bump

* revert lioness

* update crane with alpaca changes

* update lioness

* update lioness python

* fix test

* update workflows

* put back github actions

* Fixed typo

* Added LIONESS-pearson option + unit test

* changed param labels in LIONESS example

* updated docs for lioness() function

* Update main.yml

* Update main.yml

* Update DESCRIPTION

* fix crane issue #240

* expanded PANDA and LIONESS description

* update crane fix

* added ALPACA, EGRET, and OTTER descriptions

* Update README.md

* typo in na.rm

* update workflow

* update workflow2

* fix-actions-3

* add r lib v2

* add r lib v2

* update condor test

* update condor test

* update condor test

* update condor test

* show testthat output

* add pandoc and use cache

* use built in check

* use built in check

* update actions with built in check

* bump version

* use built in coverage

* use built in coverage

* remove sudo

* update actions

* update actions

* removed sudo from actions

* ugrade to R42

* remove virtualenv

* remove virtualenv

* remove virtualenv

* modify condor test

* update condor test

* update condor test

* adding retiucualte to covr

* adding retiucualte to covr

* add panda test to coverignore

* update actions

* remove lioness tests from coverage

* remove lioness tests from coverage

* added MONSTER and SAMBAR

* remove lionessR test

* Added CONDOR and YARN

* Update README.md

* put back otter in rbuild

* remove condor test

* remove condor test

* remove lioness docs

* add lioness to buildingore

* donttest lionesss

* remove coverage from ubuntu test

* remove rbuilignore

* update rbuildignore

* update rbuildignore

* update rbuildignore

* update rbuildignore

* update rbuildignore

* add back buildignore

* add back buildignore

* add back buildignore

* add back buildignore

* add back buildignore

* add back buildignore

* add back buildignore

* add back buildignore

* add back buildignore

* add back buildignore

* put back tests

* add padanedge diff test

* add back egeret test

* add monster test

* fix monster test

* tolerance in expect equal

* tolerance in expect equal

* tolerance in expect equal

* tolerance in expect equal

* add sourceppi

* fixed PANDA tests

* modify panda test

* update lioness test

* fix expect message

* update tests

* update tests

* update tests

* update tests

* update tests

* update tests

* reduced lioness test data

* reduced lioness test data

* reduced lioness test data

* reduced lioness test data

* reduce test size

* updating bioc (#250)

* updating bioc

* remove empty lines

* remove empty lines

* update citation

* update citation

* Update README.md

* Update README.md

* Update main.yml

* Update main.yml

* Update .Rbuildignore

* Update .Rbuildignore

* update lioness test

* removed two last lioness tests

* removed two last lioness tests

* removed two last lioness tests

* no saving of lioness networks

* no saving of lioness networks

* no saving of lioness networks

* no saving of lioness networks

* update lioness test

* lower sizeof test

* update test data

* update test data

* update test data

* reduce panda tests

* remove old test data

* remove old test data

* Update main.yml

* 1.1.16

update pandapy with header argument (#260)

* trying collapse readme (#261)

* trying collapse readme

* collapse animal description

* Update README.md

* update biconda (#265)

* add spider (#269)

* add spider

* udpate spider

* udpate spider

* update spuder with degreeadjust

* fix spider bug

Co-authored-by: Kate Hoff Shutta <kshutta@umass.edu>
Co-authored-by: katehoffshutta <43797774+katehoffshutta@users.noreply.github.com>
  • Loading branch information
3 people committed Dec 19, 2022
1 parent e36310c commit 7c5b781
Show file tree
Hide file tree
Showing 119 changed files with 19,564 additions and 889 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: netZooR
Type: Package
Title: Unified methods for the inference and analysis of gene regulatory networks
Version: 1.1.15
Version: 1.2.1
Date: 2022-07-07
Authors@R: c(person("Marouen", "Ben Guebila",
email = "benguebila@hsph.harvard.edu", role = c("aut","cre"), comment = c(ORCID = "0000-0001-5934-966X")),
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ export(monsterTransitionNetworkPlot)
export(monsterTransitionPCAPlot)
export(monsterdTFIPlot)
export(otter)
export(pandaDiffEdges)
export(pandaPy)
export(pandaToAlpaca)
export(pandaToCondorObject)
Expand Down
13 changes: 11 additions & 2 deletions R/PANDA.R
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@
#'


pandaPy <- function(expr_file, motif_file=NULL, ppi_file=NULL, computing="cpu", precision="double",save_memory=FALSE, save_tmp=TRUE, keep_expression_matrix=FALSE, modeProcess="union", remove_missing=FALSE){
pandaPy <- function(expr_file, motif_file=NULL, ppi_file=NULL, computing="cpu", precision="double",save_memory=FALSE, save_tmp=TRUE, keep_expression_matrix=FALSE, modeProcess="union", remove_missing=FALSE, with_header=FALSE){

if(missing(expr_file)){
stop("Please provide the path of gene expression data file to 'expr_file' variable") }
Expand Down Expand Up @@ -106,6 +106,13 @@ pandaPy <- function(expr_file, motif_file=NULL, ppi_file=NULL, computing="cpu",
keepexpression.str <- "keep_expression_matrix=True"
} else{ keepexpression.str <- "keep_expression_matrix=False" }

# with header option
if(with_header==FALSE){
withheader.str <- "with_header=False"
}else if (with_header==TRUE){
withheader.str <- "with_header=True"
}

# when pre-processing mode is legacy
if(modeProcess == "legacy"){

Expand All @@ -130,7 +137,9 @@ pandaPy <- function(expr_file, motif_file=NULL, ppi_file=NULL, computing="cpu",
reticulate::source_python(pandapath,convert = TRUE)

# invoke Python script to create a Panda object
obj.str <- paste("panda_obj=Panda(", expr.str, ",", motif.str,",", ppi.str, ",", computing.str, ",", precision.str, ",", savememory.str, ",", savetmp.str, "," , keepexpression.str, ",", mode.str, ")", sep ='')
obj.str <- paste("panda_obj=Panda(", expr.str, ",", motif.str,",", ppi.str, ",",
computing.str, ",", precision.str, ",", savememory.str, ",", savetmp.str, "," ,
keepexpression.str, ",", mode.str, "," , withheader.str, ")", sep ='')

# run Python code
py_run_string(obj.str)
Expand Down
308 changes: 308 additions & 0 deletions R/SPIDER.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,308 @@
#' Seeding PANDA Interactions to Derive Epigenetic Regulation
#'
#' This function runs the SPIDER algorithm
#'
#' @param motif A motif dataset, a data.frame, matrix or exprSet containing 3 columns.
#' Each row describes an motif associated with a transcription factor (column 1) a
#' gene (column 2) and a score (column 3) for the motif.
#' @param epifilter A binary matrix that is of the same size as motif that will be used as a mask to filter motif
#' for open chromatin region. Motif interactions that fall in open chromatin region will be kept and the others are removed.
#' @param expr An expression dataset, as a genes (rows) by samples (columns) data.frame
#' @param ppi A Protein-Protein interaction dataset, a data.frame containing 3 columns.
#' Each row describes a protein-protein interaction between transcription factor 1(column 1),
#' transcription factor 2 (column 2) and a score (column 3) for the interaction.
#' @param alpha value to be used for update variable, alpha (default=0.1)
#' @param hamming value at which to terminate the process based on hamming distance (default 10^-3)
#' @param iter sets the maximum number of iterations SPIDER can run before exiting.
#' @param progress Boolean to indicate printing of output for algorithm progress.
#' @param output a vector containing which networks to return. Options include "regulatory",
#' "coregulatory", "cooperative".
#' @param zScale Boolean to indicate use of z-scores in output. False will use [0,1] scale.
#' @param randomize method by which to randomize gene expression matrix. Default "None". Must
#' be one of "None", "within.gene", "by.genes". "within.gene" randomization scrambles each row
#' of the gene expression matrix, "by.gene" scrambles gene labels.
#' @param cor.method Correlation method, default is "pearson".
#' @param scale.by.present Boolean to indicate scaling of correlations by percentage of positive samples.
#' @param remove.missing.ppi Boolean to indicate whether TFs in the PPI but not in the motif data should be
#' removed. Only when mode=='legacy'.
#' @param remove.missing.motif Boolean to indicate whether genes targeted in the motif data but not the
#' expression data should be removed. Only when mode=='legacy'.
#' @param remove.missing.genes Boolean to indicate whether genes in the expression data but lacking
#' information from the motif prior should be removed. Only when mode=='legacy'.
#' @param edgelist Boolean to indicate if edge lists instead of matrices should be returned.
#' @param mode The data alignment mode. The mode 'union' takes the union of the genes in the expression matrix and the motif
#' and the union of TFs in the ppi and motif and fills the matrics with zeros for nonintersecting TFs and gens, 'intersection'
#' takes the intersection of genes and TFs and removes nonintersecting sets, 'legacy' is the old behavior with version 1.19.3.
#' #' Parameters remove.missing.ppi, remove.missingmotif, remove.missing.genes work only with mode=='legacy'.
#' @keywords keywords
#' @importFrom matrixStats rowSds
#' @importFrom matrixStats colSds
#' @importFrom Biobase assayData
#' @importFrom reshape melt.array
#' @export
#' @return An object of class "panda" containing matrices describing networks achieved by convergence
#' with SPIDER algorithm.\cr
#' "regNet" is the regulatory network\cr
#' "coregNet" is the coregulatory network\cr
#' "coopNet" is the cooperative network
#' @examples
#' data(pandaToyData)
#' spiderRes <- spider(pandaToyData$motif, pandaToyData$epifilter
#' pandaToyData$expression,pandaToyData$ppi,hamming=.1,progress=TRUE)
#' @references
#' Sonawane, Abhijeet Rajendra, et al. "Constructing gene regulatory networks using epigenetic data." npj Systems Biology and Applications 7.1 (2021): 1-13.
spider <- function(motif,expr=NULL,epifilter=NULL,ppi=NULL,alpha=0.1,hamming=0.001,
iter=NA,output=c('regulatory','coexpression','cooperative'),
zScale=TRUE,progress=FALSE,randomize=c("None", "within.gene", "by.gene"),cor.method="pearson",
scale.by.present=FALSE,edgelist=FALSE,remove.missing.ppi=FALSE,
remove.missing.motif=FALSE,remove.missing.genes=FALSE,mode="union"){

randomize <- match.arg(randomize)
if(progress)
print('Initializing and validating')

if(epifilter[c(1,2),] != motif[c(1,2),]){
stop('Chromatin accessibility data does not match motif data size and order.')
}

if(class(expr)=="ExpressionSet")
expr <- assayData(expr)[["exprs"]]

if (is.null(expr)){
# Use only the motif data here for the gene list
num.conditions <- 0
if (randomize!="None"){
warning("Randomization ignored because gene expression is not used.")
randomize <- "None"
}
} else {
if(mode=='legacy'){
if(remove.missing.genes){
# remove genes from expression data that are not in the motif data
n <- nrow(expr)
expr <- expr[which(rownames(expr)%in%motif[,2]),]
message(sprintf("%s genes removed that were not present in motif", n-nrow(expr)))
}
if(remove.missing.motif){
# remove genes from motif data that are not in the expression data
n <- nrow(motif)
motif <- motif[which(motif[,2]%in%rownames(expr)),]
epifilter <- epifilter[which(motif[,2]%in%rownames(expr)),]
message(sprintf("%s motif edges removed that targeted genes missing in expression data", n-nrow(motif)))
}
# Use the motif data AND the expr data (if provided) for the gene list
# Keep everything sorted alphabetically
expr <- expr[order(rownames(expr)),]
}else if(mode=='union'){
gene.names=unique(union(rownames(expr),unique(motif[,2])))
tf.names =unique(union(unique(ppi[,1]),unique(motif[,1])))
num.TFs <- length(tf.names)
num.genes <- length(gene.names)
# gene expression matrix
expr1=as.data.frame(matrix(0,num.genes,ncol(expr)))
rownames(expr1)=gene.names
expr1[which(gene.names%in%rownames(expr)),]=expr[]
expr=expr1
#PPI matrix
tfCoopNetwork <- matrix(0,num.TFs,num.TFs)
colnames(tfCoopNetwork)=tf.names
rownames(tfCoopNetwork)=tf.names
Idx1 <- match(ppi[,1], tf.names);
Idx2 <- match(ppi[,2], tf.names);
Idx <- (Idx2-1)*num.TFs+Idx1;
tfCoopNetwork[Idx] <- ppi[,3];
Idx <- (Idx1-1)*num.TFs+Idx2;
tfCoopNetwork[Idx] <- ppi[,3];
#Motif matrix
regulatoryNetwork=matrix(0,num.TFs,num.genes)
colnames(regulatoryNetwork)=gene.names
rownames(regulatoryNetwork)=tf.names
Idx1=match(motif[,1], tf.names);
Idx2=match(motif[,2], gene.names);
Idx=(Idx2-1)*num.TFs+Idx1;
regulatoryNetwork[Idx]=motif[,3]*epifilter[,3]
}else if(mode=='intersection'){
gene.names=unique(intersect(rownames(expr),unique(motif[,2])))
tf.names =unique(intersect(unique(ppi[,1]),unique(motif[,1])))
num.TFs <- length(tf.names)
num.genes <- length(gene.names)
# gene expression matrix
expr1=as.data.frame(matrix(0,num.genes,ncol(expr)))
rownames(expr1)=gene.names
interGeneNames=gene.names[which(gene.names%in%rownames(expr))]
expr1[interGeneNames,]=expr[interGeneNames,]
expr=expr1
#PPI matrix
tfCoopNetwork <- matrix(0,num.TFs,num.TFs)
colnames(tfCoopNetwork)=tf.names
rownames(tfCoopNetwork)=tf.names
Idx1 <- match(ppi[,1], tf.names);
Idx2 <- match(ppi[,2], tf.names);
Idx <- (Idx2-1)*num.TFs+Idx1;
indIdx=!is.na(Idx)
Idx=Idx[indIdx] #remove missing TFs
tfCoopNetwork[Idx] <- ppi[indIdx,3];
Idx <- (Idx1-1)*num.TFs+Idx2;
indIdx=!is.na(Idx)
Idx=Idx[indIdx] #remove missing TFs
tfCoopNetwork[Idx] <- ppi[indIdx,3];
#Motif matrix
regulatoryNetwork=matrix(0,num.TFs,num.genes)
colnames(regulatoryNetwork)=gene.names
rownames(regulatoryNetwork)=tf.names
Idx1=match(motif[,1], tf.names);
Idx2=match(motif[,2], gene.names);
Idx=(Idx2-1)*num.TFs+Idx1;
indIdx=!is.na(Idx)
Idx=Idx[indIdx] #remove missing genes
regulatoryNetwork[Idx]=motif[indIdx,3]*epifilter[indIdx,3];
}
num.conditions <- ncol(expr)
if (randomize=='within.gene'){
expr <- t(apply(expr, 1, sample))
if(progress)
print("Randomizing by reordering each gene's expression")
} else if (randomize=='by.gene'){
rownames(expr) <- sample(rownames(expr))
expr <- expr[order(rownames(expr)),]
if(progress)
print("Randomizing by reordering each gene labels")
}
}

if (mode=='legacy'){
# Create vectors for TF names and Gene names from motif dataset
tf.names <- sort(unique(motif[,1]))
gene.names <- sort(unique(rownames(expr)))
num.TFs <- length(tf.names)
num.genes <- length(gene.names)
}

# Bad data checking
if (num.genes==0){
stop("Error validating data. No matched genes.\n Please ensure that gene names in expression data match gene names in motif data")
}

if(num.conditions==0) {
warning('No expression data given. SPIDER will run based on an identity co-regulation matrix')
geneCoreg <- diag(num.genes)
} else if(num.conditions<3) {
warning('Not enough expression conditions detected to calculate correlation. Co-regulation network will be initialized to an identity matrix.')
geneCoreg <- diag(num.genes)
} else {

if(scale.by.present){
num.positive=(expr>0)%*%t((expr>0))
geneCoreg <- cor(t(expr), method=cor.method, use="pairwise.complete.obs")*(num.positive/num.conditions)
} else {
geneCoreg <- cor(t(expr), method=cor.method, use="pairwise.complete.obs")
}
if(progress)
print('Verified sufficient samples')
}
if (any(is.na(geneCoreg))){ #check for NA and replace them by zero
diag(geneCoreg)=1
geneCoreg[is.na(geneCoreg)]=0
}

if (any(duplicated(motif))) {
warning("Duplicate edges have been found in the motif data. Weights will be summed.")
motif <- aggregate(motif[,3], by=list(motif[,1], motif[,2]), FUN=sum)
}

# Prior Regulatory Network
if(mode=='legacy'){
Idx1=match(motif[,1], tf.names);
Idx2=match(motif[,2], gene.names);
Idx=(Idx2-1)*num.TFs+Idx1;
regulatoryNetwork=matrix(data=0, num.TFs, num.genes);
regulatoryNetwork[Idx]=motif[,3]
colnames(regulatoryNetwork) <- gene.names
rownames(regulatoryNetwork) <- tf.names
# PPI data
# If no ppi data is given, we use the identity matrix
tfCoopNetwork <- diag(num.TFs)
# Else we convert our two-column data.frame to a matrix
if (!is.null(ppi)){
if(any(duplicated(ppi))){
warning("Duplicate edges have been found in the PPI data. Weights will be summed.")
ppi <- aggregate(ppi[,3], by=list(ppi[,1], ppi[,2]), FUN=sum)
}
if(remove.missing.ppi){
# remove edges in the PPI data that target TFs not in the motif
n <- nrow(ppi)
ppi <- ppi[which(ppi[,1]%in%tf.names & ppi[,2]%in%tf.names),]
message(sprintf("%s PPI edges removed that were not present in motif", n-nrow(ppi)))
}
Idx1 <- match(ppi[,1], tf.names);
Idx2 <- match(ppi[,2], tf.names);
Idx <- (Idx2-1)*num.TFs+Idx1;
tfCoopNetwork[Idx] <- ppi[,3];
Idx <- (Idx1-1)*num.TFs+Idx2;
tfCoopNetwork[Idx] <- ppi[,3];
}
colnames(tfCoopNetwork) <- tf.names
rownames(tfCoopNetwork) <- tf.names
}

## Run SPIDER ##
tic=proc.time()[3]

# adjusting degree distribution
regulatoryNetwork = degreeAdjust(regulatoryNetwork)

if(progress)
print('Normalizing networks...')
regulatoryNetwork = normalizeNetwork(regulatoryNetwork)
tfCoopNetwork = normalizeNetwork(tfCoopNetwork)
geneCoreg = normalizeNetwork(geneCoreg)

if(progress)
print('Learning Network...')

minusAlpha = 1-alpha
step=0
hamming_cur=1
if(progress)
print("Using tanimoto similarity")
while(hamming_cur>hamming){
if ((!is.na(iter))&&step>=iter){
print(paste("Reached maximum iterations, iter =",iter),sep="")
break
}
Responsibility=tanimoto(tfCoopNetwork, regulatoryNetwork)
Availability=tanimoto(regulatoryNetwork, geneCoreg)
RA = 0.5*(Responsibility+Availability)

hamming_cur=sum(abs(regulatoryNetwork-RA))/(num.TFs*num.genes)
regulatoryNetwork=minusAlpha*regulatoryNetwork + alpha*RA

ppi=tanimoto(regulatoryNetwork, t(regulatoryNetwork))
ppi=update.diagonal(ppi, num.TFs, alpha, step)
tfCoopNetwork=minusAlpha*tfCoopNetwork + alpha*ppi

CoReg2=tanimoto(t(regulatoryNetwork), regulatoryNetwork)
CoReg2=update.diagonal(CoReg2, num.genes, alpha, step)
geneCoreg=minusAlpha*geneCoreg + alpha*CoReg2

if(progress)
message("Iteration", step,": hamming distance =", round(hamming_cur,5))
step=step+1
}

toc=proc.time()[3] - tic
if(progress)
message("Successfully ran SPIDER on ", num.genes, " Genes and ", num.TFs, " TFs.\nTime elapsed:", round(toc,2), "seconds.")
prepResult(zScale, output, regulatoryNetwork, geneCoreg, tfCoopNetwork, edgelist, motif)
}

#' Function to adjust the degree so that the hub nodes are not penalized in z-score transformation
#'
#' @param A Input adjacency matrix
degreeAdjust <- function(A){
k1 <- colSums(A)/dim(A,1)
k2 <- rowSums(A)/dim(A,2)
B <- (matrix(replicate(dim(A,2),k1),nrow=dim(A,1)))^2
B <- B + (matrix(t(replicate(dim(A,2),k2)),nrow=dim(A,1)))^2
A <- A * sqrt(B);
}
Loading

0 comments on commit 7c5b781

Please sign in to comment.