/
static.R
216 lines (202 loc) · 6.47 KB
/
static.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
#!/usr/bin/env Rscript
#
# This file is part of the `OmnipathR` R package
#
# Copyright
# 2018-2024
# Saez Lab, Uniklinik RWTH Aachen, Heidelberg University
#
# File author(s): Alberto Valdeolivas
# Dénes Türei (turei.denes@gmail.com)
# Attila Gábor
#
# Distributed under the MIT (Expat) License.
# See accompanying file `LICENSE` or find a copy at
# https://directory.fsf.org/wiki/License:Expat
#
# Website: https://r.omnipathdb.org/
# Git repo: https://github.com/saezlab/OmnipathR
#
#' List the static tables available from OmniPath
#'
#' A few resources and datasets are available also as plain TSV files and
#' can be accessed without TLS. The purpose of these tables is to make the
#' most often used OmniPath data available on computers with configuration
#' issues. These tables are not the recommended way to access OmniPath
#' data, and a warning is issued each time they are accessed.
#'
#' @return A data frame listing the available tables.
#'
#' @examples
#' static_tables()
#'
#' @importFrom magrittr %>% extract extract2
#' @importFrom curl curl_fetch_memory
#' @importFrom stringi stri_split_lines1
#' @importFrom tibble tibble
#' @importFrom tidyr separate_wider_regex separate_wider_delim
#' @importFrom stringr str_extract
#' @importFrom lubridate dmy hm
#' @export
#' @seealso \code{\link{static_table}}
static_tables <- function() {
# NSE vs. R CMD check workaround
fname <- organism <- time <- size <- NULL
'omnipath_static' %>%
url_parser %>%
curl_fetch_memory %>%
extract2('content') %>%
rawToChar %>%
stri_split_lines1 %>%
extract(8L:length(.) - 2L) %>%
tibble(fname = .) %>%
separate_wider_regex(
fname,
c(
'<a [^>]+>',
fname = '[A-z_\\d]+.tsv.gz',
'</a>\\s+',
date = '[^\\s]+',
' ',
time = '[\\d:]+', '\\s+',
size = '\\d+'
)
) %>%
separate_wider_delim(
fname,
'_',
names = c('query', 'resource', 'organism'),
cols_remove = FALSE
) %>%
mutate(
organism = str_extract(organism, '^\\d+'),
date = dmy(date),
time = hm(time),
size = as.integer(size),
url = paste0(url_parser('omnipath_static'), fname)
)
}
#' Retrieve a static table from OmniPath
#'
#' A few resources and datasets are available also as plain TSV files and
#' can be accessed without TLS. The purpose of these tables is to make the
#' most often used OmniPath data available on computers with configuration
#' issues. These tables are not the recommended way to access OmniPath
#' data, and a warning is issued each time they are accessed.
#'
#' @param query Character: a query type such as "annotations" or
#' "interactions".
#' @param resource Character: name of the resource or dataset, such as
#' "CollecTRI" or "PROGENy".
#' @param organism Integer: NCBI Taxonomy of the organism: 9606 for human,
#' 10090 for mouse and 10116 for rat.
#' @param strict_evidences Logical: restrict the evidences to the queried
#' datasets and resources. If set to FALSE, the directions and effect signs
#' and references might be based on other datasets and resources.
#' @param wide Convert the annotation table to wide format, which
#' corresponds more or less to the original resource. If the data comes
#' from more than one resource a list of wide tables will be returned.
#' See examples at \code{\link{pivot_annotations}}.
#' @param dorothea_levels Vector detailing the confidence levels of the
#' interactions to be downloaded. In dorothea, every TF-target interaction
#' has a confidence score ranging from A to E, being A the most reliable
#' interactions.
#' By default here we take A, B and C level interactions
#' (\code{c("A", "B", "C")}).
#' It is to note that E interactions are not available in OmnipathR.
#'
#' @return A data frame (tibble) with the requested resource.
#'
#' @examples
#' static_table("annotations", "PROGENy")
#'
#' @importFrom logger log_warn log_error
#' @importFrom magrittr %>% %<>% extract
#' @importFrom stringr str_to_lower str_detect
#' @importFrom dplyr pull filter
#' @importFrom readr read_tsv cols
#' @export
#' @seealso \code{\link{static_tables}}
static_table <- function(
query,
resource,
organism = 9606L,
strict_evidences = TRUE,
wide = TRUE,
dorothea_levels = c('A', 'B', 'C')
) {
# NSE vs. R CMD check workaround
.env <- NULL
log_warn(
sprintf('Accessing `%s` as a static table: this is not ', resource),
'the recommended way to access OmniPath data; it is only a backup ',
'plan for situations when our server or your computer is ',
'experiencing issues.'
)
organism %<>% as.integer %>% as.character
query_l <- str_to_lower(query)
resource_l <- str_to_lower(resource)
datasets <- `if`(
resource_l %in% c('dorothea', 'collectri'),
resource_l,
NULL
)
resources <- `if`(is.null(datasets), resource_l, NULL)
static_tables() %>%
filter(
query == query_l &
str_to_lower(resource) == resource_l &
organism == .env$organism
) %>%
{`if`(
nrow(.) < 1L,
{
msg <-
paste0(
'Static table not available for query `%s`, resource ',
'`%s` and organism `%i`. For a list of available tables ',
'see `static_tables()`.'
) %>%
sprintf(query, resource, organism)
log_error(msg)
stop(msg)
},
.
)} %>%
pull(url) %>%
extract(1L) %>%
omnipath_download(
read_tsv,
col_types = cols(),
progress = FALSE,
show_col_types = FALSE
) %>%
omnipath_post_download(
url = attr(., 'url'),
strict_evidences = strict_evidences,
param =
list(
json_param = list(),
query_type = query_l,
datasets = datasets,
resources = resources
) %>%
add_qt_message
) %>%
{`if`(
query_l == 'annotations' && wide,
pivot_annotations(.),
.
)} %>%
{`if`(
resource_l == 'dorothea',
filter(
.,
str_detect(
dorothea_level,
dorothea_levels %>% paste(collapse = '') %>% sprintf('[%s]', .)
)
),
.
)}
}