/
decoupleR-pre.R
231 lines (210 loc) · 7.52 KB
/
decoupleR-pre.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#' Show methods
#'
#' Prints the methods available in decoupleR. The first column correspond to
#' the function name in decoupleR and the second to the method's full name.
#'
#' @export
#' @examples
#' show_methods()
show_methods <- function(){
db <- tools::Rd_db("decoupleR")
db <- db[grep("run_*", names(db), value = TRUE)]
get_Rd_metadata <- utils::getFromNamespace (".Rd_get_metadata", "tools")
dplyr::bind_rows(lapply(db, function(fun){
name <- get_Rd_metadata(fun, 'name')
title <- get_Rd_metadata(fun, 'title')
tibble::tibble(Function=name, Name=title)
}))
}
#' Intersect network target features with input matrix.
#'
#' Keep only edges which its target features belong to the input matrix.
#' @inheritParams .decoupler_mat_format
#' @inheritParams .decoupler_network_format
#' @param minsize Minimum number of targets per source allowed.
#'
#' @return Filtered tibble.
#' @export
#' @examples
#' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR")
#' mat <- readRDS(file.path(inputs_dir, "mat.rds"))
#' net <- readRDS(file.path(inputs_dir, "net.rds"))
#' intersect_regulons(mat, net, source, target, minsize=4)
intersect_regulons <- function(mat,
network,
.source,
.target,
minsize
) {
.source<- as.name(substitute(.source))
.target<- as.name(substitute(.target))
.source <- enquo(.source)
.target <- enquo(.target)
targets <- rownames(mat)
network %>%
filter(!!.target %in% targets) %>%
group_by(!!.source) %>%
filter(n() >= minsize)
}
#' Filter sources with minsize targets
#'
#' Filter sources of a net with less than minsize targets
#'
#' @param mat_f_names Feature names of mat.
#' @inheritParams .decoupler_network_format
#' @param minsize Integer indicating the minimum number of targets per source.
#'
#' @return Filtered network.
#' @export
#' @examples
#' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR")
#' mat <- readRDS(file.path(inputs_dir, "mat.rds"))
#' net <- readRDS(file.path(inputs_dir, "net.rds"))
#' net <- rename_net(net, source, target, mor)
#' filt_minsize(rownames(mat), net, minsize = 4)
filt_minsize <- function(mat_f_names, network, minsize = 5){
# NSE vs. R CMD check workaround
n <- source <- target <- NULL
# Find shared targets
shared_targets <- intersect(
mat_f_names,
network$target
)
# Find sizes of sources after intersect and filter by minsize
sources <- network %>%
dplyr::filter(target %in% shared_targets) %>%
dplyr::group_by(source) %>%
dplyr::summarise(n=dplyr::n()) %>%
dplyr::filter(n >= minsize) %>%
dplyr::pull(source)
# Filter sources
network <- network %>%
dplyr::filter(source %in% sources)
if (nrow(network) == 0) {
stop(stringr::str_glue('Network is empty after intersecting it with mat and
filtering it by sources with at least {minsize} targets. Make sure mat and
network have shared target features or reduce the number assigned to minsize'))
}
return(network)
}
#' Pre-processing for methods that fit networks.
#'
#' - If `center` is true, then the expression values are centered by the
#' mean of expression across the samples.
#'
#' @inheritParams .decoupler_mat_format
#' @inheritParams .decoupler_network_format
#' @param sparse Deprecated parameter.
#' @param na.rm Should missing values (including NaN) be omitted from the
#' calculations of [base::rowMeans()]?
#' @param center Logical value indicating if `mat` must be centered by
#' [base::rowMeans()].
#'
#' @return A named list of matrices to evaluate in methods that fit models, like
#' `.mlm_analysis()`.
#' - mat: Features as rows and samples as columns.
#' - mor_mat: Features as rows and columns as source.
#' @export
#' @examples
#' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR")
#' mat <- readRDS(file.path(inputs_dir, "mat.rds"))
#' net <- readRDS(file.path(inputs_dir, "net.rds"))
#' net <- rename_net(net, source, target, mor)
#' .fit_preprocessing(net, mat, center = FALSE, na.rm = FALSE, sparse = FALSE)
.fit_preprocessing <- function(network, mat, center, na.rm, sparse) {
# Create empty mor_mat from original feature universe from mat, then fill in
sources <- unique(network$source)
targets <- rownames(mat)
mor_mat <- matrix(0, ncol = length(sources), nrow=nrow(mat))
colnames(mor_mat) <- sources
rownames(mor_mat) <- targets
weights <- network$mor * network$likelihood
for (i in 1:nrow(network)) {
.source <- network$source[[i]]
.target <- network$target[[i]]
.weight <- weights[[i]]
if (.target %in% targets) {
mor_mat[[.target,.source]] <- .weight
}
}
if (center) {
mat <- mat - rowMeans(mat, na.rm)
}
if (is.null(colnames(mat))){
colnames(mat) <- 1:ncol(mat)
}
list(mat = mat, mor_mat = mor_mat)
}
#' Check correlation (colinearity)
#'
#' Checks the correlation across the regulators in a network.
#' @inheritParams .decoupler_network_format
#'
#' @return Correlation pairs tibble.
#' @export
#' @examples
#' inputs_dir <- system.file("testdata", "inputs", package = "decoupleR")
#' net <- readRDS(file.path(inputs_dir, "net.rds"))
#' check_corr(net, .source='source')
check_corr <- function(network,
.source = "source",
.target = "target",
.mor = "mor",
.likelihood = NULL){
# NSE vs. R CMD check workaround
correlation <- likelihood <- weight <- NULL
source <- as.symbol(.source)
target <- as.symbol(.target)
mor <- as.symbol(.mor)
network <- network %>%
dplyr::mutate(likelihood=1) %>%
dplyr::mutate(weight = (!!mor)*likelihood) %>%
dplyr::select(!!source, !!target, weight)
network_wide <- network %>%
tidyr::pivot_wider(names_from = !!target, values_from = weight, values_fill = 0) %>%
tibble::column_to_rownames(.source)
cor_source <- stats::cor(t(network_wide))
cor_source[lower.tri(cor_source, diag = TRUE)] <- NA
cor_source <- cor_source %>%
as.data.frame() %>%
tibble::rownames_to_column(.source) %>%
tidyr::pivot_longer(!(!!source), names_to = paste0(.source, ".2"), values_to = "correlation") %>%
dplyr::filter(!is.na(correlation)) %>%
dplyr::arrange(desc(correlation))
cor_source
}
#' Generate a toy `mat` and `network`.
#'
#' @param n_samples Number of samples to simulate.
#' @param seed A single value, interpreted as an integer, or NULL for random
#' number generation.
#'
#' @return List containing `mat` and `network`.
#' @export
#' @examples
#' data <- get_toy_data()
#' mat <- data$mat
#' network <- data$network
get_toy_data <- function(n_samples = 24, seed = 42){
network <- tibble::tibble(
source = c('T1','T1','T1','T2','T2','T2','T3','T3','T3','T3'),
target = c('G01','G02','G03','G06','G07','G08','G06','G07','G08','G11'),
mor = c(1,1,0.7,1,0.5,1,-0.5,-3,-1,1)
)
n_features <- 12
n <- round(n_samples/2)
res = n_samples %% 2
r1 <- c(8,8,8,8,0,0,0,0,0,0,0,0)
r2 <- c(0,0,0,0,8,8,8,8,0,0,0,0)
rep(matrix(r1, ncol=12),n)
matrix(c(rep(r1, n), ncol=12, nrow=12))
matrix(c(rep(r1, n),rep(r2, n)), ncol = 24)
mat <- matrix(c(rep(r1,n),rep(r2,n)), nrow=12)
withr::with_seed(seed, {
rand <- matrix(abs(stats::rnorm(dim(mat)[1]*dim(mat)[2])), nrow=12)
})
mat <- mat + rand
rownames(mat) <- c('G01','G02','G03','G04','G05','G06','G07','G08','G09','G10','G11','G12')
colnames(mat) <- lapply(1:dim(mat)[2], function(i){paste0('S',sprintf("%02d", i))})
return(list(mat=mat, network=network))
}