/
support_metabolite_enrichment.R
177 lines (136 loc) · 6.38 KB
/
support_metabolite_enrichment.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
############## Supporting functions for ORA based on metabolites ###############
#Copyright (C) 2021 Caroline Lohoff, Aurelien Dugourd
#Contact : aurelien.dugourd@bioquant.uni-heidelberg.de
#This program is free software: you can redistribute it and/or modify
#it under the terms of the GNU General Public License as published by
#the Free Software Foundation, either version 3 of the License, or
#(at your option) any later version.
#This program is distributed in the hope that it will be useful,
#but WITHOUT ANY WARRANTY; without even the implied warranty of
#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
#GNU General Public License for more details.
#You should have received a copy of the GNU General Public License
#along with this program. If not, see <http://www.gnu.org/licenses/>.
#' \code{rearrange_dataframe}
#'
#' Function to rearrange the data frame to obtain an enzyme/metabolite df
#' instead of a source/target df, i.e. all enzymes are in one column
#' and all metabolites in the second column.
#'
#' @param network source/target network containing enzymes and metabolites
#' @return data frame with metabolites and their source/target enzymes
#' @export
#' @import sjmisc
rearrange_dataframe <- function(network){
new_cols = c("enzymes", "metabolites")
network[new_cols] <- NA
metabolites_df <- network[c("enzymes", "metabolites", "source", "target")]
##Copy entries from source and target column to metabolite or enzyme column
for (i in 3:ncol(metabolites_df)) #iterate through source and target column
{
for (j in 1:nrow(metabolites_df)) #iterate through rows
{
if (str_contains(metabolites_df[j,i],"cpd:")){
metabolites_df$metabolites[j] <- metabolites_df[j,i]
} else
{
metabolites_df$enzymes[j] <- metabolites_df[j,i]
}
}
}
metabolites_df <- metabolites_df[, -c(3:4)] #delete columns source and target
metabolites_df <- na.omit(metabolites_df) #remove rows containing NA
return(metabolites_df)
}
#' \code{get_pure_kegg_ids}
#'
#' Function to remove the compartment information and "cpd:" from a column with
#' metabolites identifiers in order to get pure KEGG ids
#'
#' @param metabolites_col column in a data frame containing metabolite ids
#' @return column in a data frame containing KEGG ids
#' @export
get_pure_kegg_ids <- function(metabolites_col){
metabolites_col <- sapply(metabolites_col, function(x){
x <- gsub("_.$","",x)
x <- gsub("cpd:","",x)
return(x)
}, simplify = F, USE.NAMES = F)
metabolites_col <- unlist(metabolites_col)
return(metabolites_col)
}
#'\code{map_pathways_to_metabolites}
#'
#'Function to map pathways to enzymes and subsequently to metabolites.
#'
#'@param metab_df data frame with metabolites and their target/source enzymes
#'@param pathways the pathway collection to be used to create a
#' metabolite/pathway table. by default the pathway ontology of redHuman is used.
#'@return data frame containing metabolites and their pathways
#'@export
#'@import sjmisc
#'@import dplyr
map_pathways_to_metabolites <- function(metab_df, pathways = redHuman_pathways){
metab_df$enzymes <- gsub(">.*","",metab_df$enzymes) #delete ">" and digits behind
metab_df$enzymes <- gsub("_reverse","",metab_df$enzymes) #delete "_reverse"
metab_df <- distinct(metab_df) #keep only unique rows
##Split complexes: every enzyme gets a new row in df
for (i in 1:nrow(metab_df)) #iterate through all rows in "enzymes" column
{
if(str_contains(metab_df$enzymes[i],"_"))
{
split_str <- as.vector(unlist(strsplit(as.character(metab_df$enzymes[i]),
split = "_")))
#create new row for every element in split_str (from the 2nd element on)
for (j in 2:length(split_str))
{
new_row <- c(split_str[j], metab_df$metabolites[i])
metab_df <- rbind(metab_df, new_row)
}
metab_df$enzymes[i] <- split_str[1]
}
}
##Reorder rows by two conditions: 1. column "enzymes", 2. column "metabolites"
metab_df <- metab_df[order(metab_df$enzymes, metab_df$metabolites), ]
##Keep only unique rows (if a complex consisted of two or more similar enzymes)
metab_df <- distinct(metab_df)
names(pathways) <- c("gene", "term")
##Add new column "pathways" by merging data frame with KEGG pathways (by enzymes)
metab_pathway_df = merge(metab_df[, c("enzymes", "metabolites")],
pathways[, c("gene", "term")],
by.x = "enzymes",
by.y = "gene",
all.x = TRUE)
colnames(metab_pathway_df)[colnames(metab_pathway_df) == "term"] <- "pathway"
metab_pathway_df$enzymes <- NULL # remove column "enzymes"
##Reorder rows by column "metabolites"
metab_pathway_df <- metab_pathway_df[order(metab_pathway_df$metabolites), ]
metab_pathway_df <- na.omit(metab_pathway_df) #remove rows containing NA
metab_pathway_df <- distinct(metab_pathway_df) #keep only unique rows
return(metab_pathway_df)
}
#'\code{plot_significant_pathways}
#'
#'Function to plot the interesting pathways after metabolite enrichment analysis
#'
#'@param enrichmentDF Data frame containing all significant pathways
#'@return Bar plot of the most significant pathways by enrichment score
#'@export
#'@import ggplot2
plot_significant_pathways <- function(enrichmentDF, score){
##Filter interesting pathways by using the enrichment score
top_pathways <- enrichmentDF[enrichmentDF$score <= -score | enrichmentDF$score >= score, ]
##Sort pathways
top_pathways$pathway <- factor(top_pathways$pathway,
levels = top_pathways$pathway[order(top_pathways$score,
decreasing = FALSE)])
plot <- ggplot(top_pathways,
aes(x = pathway, y = `score`, fill = `score`)) +
geom_bar(stat = "identity") +
coord_flip() +
theme_minimal() +
ggtitle("Pathways Metabolite Enrichment") +
scale_colour_gradient2(low="darkblue", mid="whitesmoke", high="indianred",
midpoint = 0, aesthetics="fill")
return(plot)
}