-
Notifications
You must be signed in to change notification settings - Fork 1
/
functional_analysis.R
73 lines (59 loc) · 2.59 KB
/
functional_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#' Over representation analysis from statistical results
#'
#' Performs an over-representation analysis from a data frame containing the results of
#' the statistical analysis of omic data. Uses the annotation column to split
#' the features and perform the over-representation analysis.
#'
#' @param df Data frame with results to analyze.
#' @param funCatList A list of character vectors with the functional categories to analyze.
#' @param statusCol Column containing the annotation of features.
#' @param noChangeLabel Label used to indicate not changing features. Rows with a statusCol value equal to this will be removed.
#' @param featCol Column containing the feature to submit for the over-representation analysis (i.e gene symbol).
#' @param ... Other parameters passed to \link[biokit]{overRepresentationAnalysis}.
#'
#' @return A tidy data frame with the results.
#'
#' @export
#'
oraFromStats <- function(df, funCatList, statusCol, noChangeLabel = "No change",
featCol = "feature", ...) {
# remove rows annotated as "not changing"
df <- subset(df, df[ , statusCol] != noChangeLabel)
# split data frame, apply ora and return binded df
resDf <- splitFunMerge(df = df, splitCol = statusCol, fun = function(x) {
toOra <- as.character(x[ , featCol])
oraRes <- overRepresentationAnalysis(features = toOra, funCatList = funCatList, ...)
return(oraRes)
})
return(resDf)
}
#' Gene set enrichment analysis from statistical results
#'
#' @param df Data frame with results to analyze.
#' @param funCatList A list of character vectors with the functional categories to analyze.
#' @param rankCol Column used to perform the pre-ranked gsea analysis.
#' @param featCol Column used to name the rankCol values.
#' @param seed Seed used to avoid reproducibility problems between runs.
#' @param ... Rest of arguments passed to \link[fgsea]{fgseaSimple}.
#'
#' @return A tidy data frame with the results.
#'
#' @export
#'
#' @importFrom fgsea fgseaSimple
#' @importFrom dplyr bind_rows
#'
gseaFromStats <- function(df, funCatList, rankCol, featCol = "feature", splitCol = "comparison", seed =149, nPerm = 1000, ...) {
splitted <- split(df, df[,splitCol])
outDfList <- lapply(splitted, function(intDf) {
toGsea <- intDf[ , rankCol]
names(toGsea) <- intDf[ , featCol]
toGsea <- toGsea[order(toGsea, decreasing = TRUE)]
# set seed to gain reproducibility between analyses
set.seed(seed)
gseaRes <- fgsea::fgseaSimple(pathways = funCatList, stats = toGsea, nperm = nPerm, ...)
return(gseaRes)
})
outDf <- dplyr::bind_rows(outDfList, .id = splitCol)
return(outDf)
}