-
Notifications
You must be signed in to change notification settings - Fork 1
/
plot.density.profile.R
581 lines (488 loc) · 31.1 KB
/
plot.density.profile.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
#' @title Plot of NGS density signal at specific regions from deepTools matrices.
#'
#' @description Plots the density profile of NGS data signals, using as input a score matrix computed by deeptools's computeMatrix function or by \link{computeMatrix.deeptools} from this package.
#'
#' @param matrix.file A single string indicating a full path to a matrix.gz file generated by \code{deepTools/computeMatrix} or by \link{computeMatrix.deeptools}, or a list generated by the function \link{read.computeMatrix.file}.
#' @param plot.by.group Logical value to define whether plot by group of regions or by sample. By default \code{TRUE}.
#' @param missing.data.as.zero Logical value to define whether treat missing data as 0. If set as \code{FALSE} missing data will be converted to \code{NA} and will be excluded from the computations of the signal. By default \code{TRUE}.
#' @param sample.names Samples names could be defined by a string vector. If set as \code{NULL} sample names will be get automatically by the matrix file. By default \code{NULL}. \cr Example: \code{c("sample1", "sample2", "sample3")}
#' @param region.names Region names could be defined by a string vector. If set as \code{NULL} sample names will be get automatically by the matrix file. By default \code{NULL}. \cr Example: \code{c("regionA", "regionB")}
#' @param signal.type String indicating the signal to be computed and plotted. Available parameters are "mean", "median" and "sum". By default "mean".
#' @param error.type String indicating the type of error to be computed and plotted. Available parameters are "sem" and "sd", standard error mean and standard deviation respectively. By default "sem". Parameter considered only when \code{plot.error = TRUE)}.
#' @param plot.error Logical value to define whether to plot the error around the signal. By default \code{TRUE}.
#' @param error.transparency Numeric value to define the alpha/transparency of the error. By default 0.125. Parameter considered only when \code{plot.error = TRUE)}.
#' @param title Title of each plot could be defined by a string vector. If set as \code{NULL} titles will be generated automatically. By default \code{NULL}. \cr Example: \code{c("Title1", "Title2")}
#' @param x.lab Single string or string vector to define the X-axis label for all the plots. By default \code{NULL}, the label will be defined automatically.
#' @param y.lab Single string or string vector to define the Y-axis label for all the plots. By default \code{NULL}, the label will be defined automatically.
#' @param line.type Vector to define each line type. Both numeric and string codes are accepted. If only one element is given this will be applied to all the lines. By default "solid". \cr Example 1: \code{c("solid", "dashed")}. \cr Example 2: \code{c(1, 2)}
#' @param line.width Numeric value to define the line width for all the plots. By default 0.5.
#' @param x.lim List of numeric vectors with two elements each to define the range of the X-axis. To set only one side use NA for the side to leave automatic. If only one range is given this one will be applied to all the plots. By default \code{NULL}, the range will be defined automatically. \cr Example \code{list(c(0, 20), c(NA, 30), c(0, NA), c(NA, NA))}.,
#' @param y.lim List of numeric vectors with two elements each to define the range of the Y-axis. To set only one side use NA for the side to leave automatic. If only one range is given this one will be applied to all the plots. By default \code{NULL}, the range will be defined automatically. \cr Example \code{list(c(0, 20), c(NA, 30), c(0, NA), c(NA, NA))}.,
#' @param y.identical.auto Logical value to define whether use the same Y-axis range for all the plots automatically depending on the values. Not used when \code{y.lim} is not \code{NULL}. By default \code{TRUE}.
#' @param y.ticks.interval A number indicating the interval/bin spacing two ticks on the Y-axis. By default \code{NULL}: ticks are assigned automatically. Active only when \code{y.identical.auto = TRUE} and \code{y.lim != NULL}.
#' @param y.digits A numeric value to define the number of digits to use for the y.axis values. By default \code{1} (eg. 1.5).
#' @param text.size Numeric value to define the size of the text for the labels of all the plots. By default 12.
#' @param legend.position Any ggplot supported value for the legend position (eg. "none", top", "bottom", "left", "right", c(fraction.x, fraction.y)). By default \code{c(0.2, 0.85)}.
#' @param plot.vertical.lines Logical value to define whether to plot a dashed gray vertical line in correspondence of the reference points of each plot. By default \code{TRUE}.
#' @param write.reference.points Logical value to define whether to indicate the reference points on each plot. Applied only when \code{x.lim} is \code{NULL}. By default \code{TRUE}.
#' @param colors Vector to define the line and error area colors. If only one value is provided it will applied to all the samples/groups. If the number of values is lower than the the required one, a random set of colors will be generated. All standard R.colors values are accepted. By default \code{c("#00A5CF", "#F8766D", "#AC88FF", "#E08B00", "#00BA38", "#BB9D00", "#FF61C9", "gray30")}.
#' @param n.row.multiplot Numeric value to define the number of rows in the final multiplot.
#' @param multiplot.export.file If a string with the name of a PDF file is provided the multiplot will be exported. By default \code{NULL}.
#' @param real.width.single.plot Numeric value, in inches, to define the real width of each plot in the multiplot exported, if required. By default 2.9 inches.
#' @param real.height.single.plot Numeric value, in inches, to define the real height of each plot in the multiplot exported, if required. By default 3.5 inches.
#' @param by.row Logical value to define whether the plots should be arranged by row. By default \code{TRUE}.
#' @param print.multiplot Logical value to define whether to print the multiplot once created. By default \code{FALSE}.
#'
#' @return The functions returns a list containing:
#' \itemize{
#' \item \code{data.table} with the computed values used for the plot;
#' \item \code{metadata} table with the information gotten from the matrix_file.gz;
#' \item \code{plot.list} with a plot for each list element;
#' \item \code{multiplot} with the image of all the plots together.
#' }
#'
#' @examples
#' plot.density.profile(
#' matrix.file = "/path.to/matrix.file.gz", plot.by.group = TRUE,
#' missing.data.as.zero = NULL, sample.names = NULL, region.names = NULL,
#' signal.type = "mean", error.type = "sem", plot.error = TRUE,
#' error.transparency = 0.125, title = NULL, x.lab = NULL, y.lab = NULL,
#' line.type = "solid", line.width = 0.5, x.lim = NULL, y.lim = NULL,
#' y.identical.auto = TRUE, y.ticks.number = 5, text.size = 12,
#' plot.vertical.lines = TRUE, colors = c("red", "blue", "#00BA38"),
#' n.row.multiplot = 1, multiplot.export.file = "/path.to/multiplot.pdf",
#' real.width.single.plot = 2.5, real.height.single.plot = 3,
#' print.multiplot = FALSE)
#'
#' @details To know more about the deepTools's function \code{computeMatrix} see the package manual at the following link: \cr \url{https://deeptools.readthedocs.io/en/develop/content/tools/computeMatrix.html}.
#'
#' @export plot.density.profile
#'
# @import dplyr
# @import ggplot2
# @importFrom data.table fread
# @importFrom stringr str_split
# @importFrom robustbase colMedians
# @importFrom matrixStats colSds
# @importFrom purrr reduce
# @importFrom cowplot plot_grid
# @importFrom tools toTitleCase
# @importFrom labeling extended
plot.density.profile = function(
matrix.file,
plot.by.group = T,
missing.data.as.zero = NULL,
sample.names = NULL,
region.names = NULL,
signal.type = "mean", # type of signal computation c("mean", "median", "sum")
error.type = "sem", # type of error available only of signal.type %in% c("mean", "median"). Possible values c("sem", "sd")
plot.error = T,
error.transparency = 0.125,
title = NULL, # string vector
x.lab = NULL, # single string vector
y.lab = NULL, # string vector
line.type = "solid", # can be a vector for different lines
line.width = 0.5, # can be a vector for different lines
x.lim = NULL, # list of vectors c(min, max) or single vector
y.lim = NULL, # list of vectors c(min, max) or single vector
y.identical.auto = T, # subordinated to y.lim which has the priority
y.ticks.interval = NULL, # only for 'y.identical.auto'
y.digits = 1,
text.size = 12,
legend.position = c(0.2, 0.85),
plot.vertical.lines = T,
write.reference.points = T,
colors = c("#00A5CF", "#F8766D", "#AC88FF", "#E08B00", "#00BA38", "#BB9D00", "#FF61C9", "gray30"),
n.row.multiplot = 1,
multiplot.export.file = NULL,
real.width.single.plot = 2.9, #inch
real.height.single.plot = 3.5, #inch
by.row = TRUE,
print.multiplot = F) {
##############################################################################
## ------------------------- BEGIN OF THE FUNCTION ------------------------ ##
##############################################################################
# Load all libraries
require(dplyr)
require(ggplot2)
# require(data.table)
# require(stringr)
# require(robustbase)
# require(matrixStats)
# require(purrr)
# require(cowplot)
# require(tools)
# require(labeling)
# Check if Rseb is up-to-date #
Rseb::actualize(update = F, verbose = F)
##############################################################################
## Convert and check x.lim and y.lim and title
### Check that the parameters are vectors or lists
if (!is.null(x.lim) & !(class(x.lim) %in% c("numeric", "list"))) {
return(warning("The 'x.lim' parameter must be a vector or list of numeric vectors c(min, max) or NULL. \nTo set just one boundary replace a number with NA."))
}
if (!is.null(y.lim) & !(class(y.lim) %in% c("numeric", "list"))) {
return(warning("The 'y.lim' parameter must be a vector or list of numeric vectors c(min, max) or NULL. \nTo set just one boundary replace a number with NA."))
}
### Check whether parameters are single vectors and convert them to 1-element lists eventually
if (!is.null(x.lim) & class(x.lim) == "numeric") {x.lim = list(x.lim)}
if (!is.null(y.lim) & class(y.lim) == "numeric") {y.lim = list(y.lim)}
### Check x.lab and y.lab class
if (!is.null(title) & class(title) != "character") {return(warning("The 'title' parameter must be a single string or a string vector."))}
if (!is.null(x.lab) & class(x.lab) != "character") {return(warning("The 'x.lab' parameter must be a single string or a string vector."))}
if (!is.null(y.lab) & class(y.lab) != "character") {return(warning("The 'y.lab' parameter must be a single string or a string vector."))}
### Check the size and type of each element in the lists
if (!is.null(x.lim)) {
for (i in 1:length(x.lim)) {
if (length(x.lim[[i]]) != 2 | class(x.lim[[i]]) != "numeric") {
return(warning("One ore more vectors of the 'x.lim' list do not have exactly 2 elements and/or are not numeric vectors."))}
}
}
if (!is.null(y.lim)) {
for (i in 1:length(y.lim)) {
if (length(y.lim[[i]]) != 2 | class(y.lim[[i]]) != "numeric") {
return(warning("One ore more vectors of the 'y.lim' list do not have exactly 2 elements and/or are not numeric vectors."))}
}
}
### Check title vector
if (!is.null(title) & class(title) != "character") {return(warning("The 'title' parameter must be a string vector or NULL."))}
### Check that the signal method is allowed
if (class(signal.type) != "character" | !(signal.type %in% c("mean", "median", "sum")) | length(signal.type) != 1) {return(warning("The 'siganl.type' parameter must be one among [mean, median, sum]."))}
##############################################################################
# Import/read the matrix.gz file
if (class(matrix.file) == "character" & length(matrix.file) == 1) {
m = Rseb::read.computeMatrix.file(matrix.file)
metadata = m$metadata
matrix.data = m$matrix.data
} else if (class(matrix.file) == "list" & length(matrix.file) == 3 & unique(names(matrix.file) == c("metadata", "matrix.data", "original.file.path"))) {
metadata = matrix.file$metadata
matrix.data = matrix.file$matrix.data
} else {
return(warning("The 'matrix.file' must be a single string indicating a full path to a matrix.gz file generated by deepTools/computeMatrix or by Rseb::computeMatrix.deeptools(), or a list generated by the function Rseb::read.computeMatrix.file."))
}
# Generate metadata variables
sample_start_column = as.numeric(as.vector(stringr::str_split(string = filter(.data = metadata, parameters == "sample_boundaries")$values, pattern = ",")[[1]]))
sample_names = as.vector(stringr::str_split(string = filter(.data = metadata, parameters == "sample_labels")$values, pattern = ",")[[1]])
group_start_row = as.numeric(as.vector(stringr::str_split(string = filter(.data = metadata, parameters == "group_boundaries")$values, pattern = ",")[[1]]))
group_names = as.vector(stringr::str_split(string = filter(.data = metadata, parameters == "group_labels")$values, pattern = ",")[[1]])
##############################################################################
# Get the number at which the matrix data starts (after 'chr' 'start' 'end' 'strand' etc.)
number.info.columns = ncol(matrix.data) - sample_start_column[length(sample_start_column)]
# Remove NaN values and substitute them with 0 if missing.data.as.zero = TRUE or NA if FALSE
missing.data.as.zero.automatic = as.logical(toupper(filter(.data = metadata, parameters == "missing_data_as_zero")$values))
if (is.null(missing.data.as.zero)) {
missing.data.as.zero = missing.data.as.zero.automatic
} else if (is.logical(missing.data.as.zero)) {
if (missing.data.as.zero != missing.data.as.zero.automatic) {
message(paste("The parameter 'missing.data.as.zero' set in this function as ", as.character(missing.data.as.zero),
", differs from the automatic obtained from the matrix file which is set as ", as.character(missing.data.as.zero.automatic),
".\nMissing data will be treated as set in this function: 'missing.data.as.zero = ", as.character(missing.data.as.zero), "'.", sep = ""))}
} else {return(warning("The parameter 'missing.data.as.zero' must be a logical value or NULL."))}
is.nan.data.frame = function(data.frame) do.call(cbind, lapply(data.frame, is.nan))
matrix.data[is.nan(matrix.data)] = ifelse(test = missing.data.as.zero,
yes = 0,
no = NA)
message(ifelse(test = missing.data.as.zero,
yes = "Missing data have been converted to 0 since the parameter 'missing.data.as.zero' is TRUE.",
no = "Missing data have been converted to NA since the parameter 'missing.data.as.zero' is FALSE. \nNA values will be excluded from the statistical computations."))
##############################################################################
# Generate a list with a table per sample
samples.table.list = list()
# Change sample and group names by custom ones
if (!is.null(sample.names)) {
if (length(unique(sample.names)) == length(sample_names)) {
sample_names = sample.names
} else {message("Default sample.names have been used because custom valus are not unique and/or incomplete")}
}
if (!is.null(region.names)) {
if (length(unique(region.names)) == length(group_names)) {
group_names = region.names
} else {message("Default region.names have been used because custom valus are not unique and/or incomplete")}
}
for (i in 1:length(sample_names)) {
# define the limits of the sample keeping the first lines
start.col = sample_start_column[i] + number.info.columns + 1
end.col = sample_start_column[i+1] + number.info.columns
sample.table =
matrix.data %>%
select(c(1:number.info.columns, start.col:end.col)) %>%
# adding a column with the groups names
mutate(group = rep(group_names,
times = c(group_start_row[-1] - group_start_row[-length(group_start_row)])))
# Add the single table to a list
samples.table.list[[i]] = sample.table
}
names(samples.table.list) = sample_names
##############################################################################
# Generate the statistical tables
sample.stat.table.list = list()
for (s in 1:length(sample_names)) {
group.stats.table.list = list()
# Computations of the stats per group in order to then merge the different group tables
for (g in 1:length(group_names)) {
group.table =
samples.table.list[[s]] %>%
filter(group == group_names[g]) %>%
select(-c(1:number.info.columns), -group)
group.mean = colMeans(group.table, na.rm = T)
group.median = robustbase::colMedians(as.matrix(group.table), na.rm = T)
group.sum = colSums(group.table, na.rm = T)
group.sd = matrixStats::colSds(as.matrix(group.table), na.rm = T)
group.sem = group.sd / sqrt(nrow(group.table))
group.stats.table.list[[g]] =
data.frame(group = group_names[g],
mean = group.mean,
median = group.median,
sum = group.sum,
sd = group.sd,
sem = group.sem)
} # 'g' For end
# x-axes values, distance from center
upstream = as.numeric(as.vector(stringr::str_split(string = filter(.data = metadata, parameters == "upstream")$values, pattern = ",")[[1]]))[s]
downstream = as.numeric(as.vector(stringr::str_split(string = filter(.data = metadata, parameters == "downstream")$values, pattern = ",")[[1]]))[s]
body = as.numeric(as.vector(stringr::str_split(string = filter(.data = metadata, parameters == "body")$values, pattern = ",")[[1]]))[s]
bin = as.numeric(as.vector(stringr::str_split(string = filter(.data = metadata, parameters == "bin_size")$values, pattern = ",")[[1]]))[s]
distance.range = seq(from = -upstream, to = (body + downstream), by = bin)
distance.range = distance.range[distance.range != 0]
sample.stat.table.list[[s]] =
purrr::reduce(.x = group.stats.table.list, .f = rbind) %>%
mutate(distance = rep(distance.range, length(group_names)),
sample = sample_names[s])
} # 's' For end
# Merge of all the tables in a single one
full.stat.table = purrr::reduce(.x = sample.stat.table.list, .f = rbind)
##############################################################################
# Generation of the plots
plot.list = list()
# Remove error plotting option when using the 'sum' mode, since it means nothing for a sum
if (signal.type == "sum") {
plot.error = F
message("The error will not be plotted since mode 'sum' is used.")
}
# Function to check the length of x.lim, y.lim, title
check.parameter.length =
function(parameter, parameter.name, plots.number) {
if (!is.null(parameter) & length(parameter) != plots.number & length(parameter) > 1) {
message(paste("The parameter", parameter.name, "has been set as NULL since the number of elements was different from the number of plots required."))
return(NULL)}
else {return(parameter)}
}
###########
### Generate a plot by group (each plot is a group, each signal/line is a sample)
if (plot.by.group == T) {
# Define the colors for the samples
if ((length(colors) > 1 & length(colors) < length(sample_names)) | length(colors) == 0) {colors = rainbow(length(sample_names))}
for (i in 1:length(group_names)) {
# Check if plot the center line
regionEnd = as.numeric(as.vector(stringr::str_split(string = filter(.data = metadata, parameters == "body")$values, pattern = ",")[[1]]))
if (length(regionEnd) == 1) {regionEnd = rep(regionEnd, length(group_names))}
if (plot.vertical.lines == T) {
center.line.value = unique(c(0, regionEnd[i]))} else {
center.line.value = NULL}
# Repeat the value of x.lim, y.lim, title parameters if their length is 1
if (!is.null(x.lim) & length(x.lim) == 1) {x.lim = lapply(1:length(group_names), function(j) x.lim[[1]])}
if (!is.null(y.lim) & length(y.lim) == 1) {y.lim = lapply(1:length(group_names), function(j) y.lim[[1]])}
if (!is.null(title) & length(title) == 1) {title = rep(title, length(group_names))}
if (!is.null(x.lab) & length(x.lab) == 1) {x.lab = rep(x.lab, length(group_names))}
if (!is.null(y.lab) & length(y.lab) == 1) {y.lab = rep(y.lab, length(group_names))}
# Check the length of the x.lim, y.lim, title parameters
x.lim = check.parameter.length(parameter = x.lim, parameter.name = "'x.lim'", plots.number = length(group_names))
y.lim = check.parameter.length(parameter = y.lim, parameter.name = "'y.lim'", plots.number = length(group_names))
title = check.parameter.length(parameter = title, parameter.name = "'title'", plots.number = length(group_names))
x.lab = check.parameter.length(parameter = x.lab, parameter.name = "'x.lab'", plots.number = length(group_names))
y.lab = check.parameter.length(parameter = y.lab, parameter.name = "'y.lab'", plots.number = length(group_names))
# Select only current group subtable
current.table = full.stat.table %>% filter(group == group_names[i])
# Define which value to use for the scores
if (signal.type == "mean") {
score = current.table$mean} else if (signal.type == "median") {
score = current.table$median} else if (signal.type == "sum") {
score = current.table$sum} else {
return(warning("The signal.type must be one among 'mean', 'median', 'sum'."))
}
# Define which value to use for error
if (error.type == "sem") {
error = current.table$sem} else if (error.type == "sd") {
error = current.table$sd} else {
return(warning("The error.type must be one between 'sem' or 'sd'."))
}
# Plot saving in the list
plot.list[[i]] =
Rseb::density_plot(
samples = current.table$sample,
scores = score,
positions = current.table$distance,
variance_scores = error,
ylab = ifelse(test = is.null(y.lab),
yes = ifelse(test = plot.error == T,
yes = paste(tools::toTitleCase(signal.type), "\u00b1", toupper(error.type), "of density signal"),
no = paste(tools::toTitleCase(signal.type), "of density signal")),
no = y.lab[i]),
xlab = ifelse(test = is.null(x.lab),
yes = "Distance from reference point [bp]",
no = x.lab[i]),
line_type = line.type,
x_lim = x.lim[[i]],
y_lim = y.lim[[i]],
x_intercept = center.line.value,
colors = colors,
title = ifelse(test = is.null(title),
yes = group_names[i],
no = title[i]),
text_size = text.size,
variance = plot.error,
print_plot = F,
line_width = line.width,
variance_opacity = error.transparency) +
theme(legend.position = legend.position,
legend.background = element_blank())
names(plot.list)[i] = group_names[i]
} # for loop end
########
### Generate a plot by sample (each plot is a sample, each signal/line is a group)
} else { # if end
# Define the colors for each group
if ((length(colors) > 1 & length(colors) < length(group_names)) | length(colors) == 0) {colors = rainbow(length(group_names))}
for (i in 1:length(sample_names)) {
# Check if plot the center line
regionEnd = as.numeric(as.vector(stringr::str_split(string = filter(.data = metadata, parameters == "body")$values, pattern = ",")[[1]]))
if (length(regionEnd) == 1) {regionEnd = rep(regionEnd, length(sample_names))}
if (plot.vertical.lines == T) {
center.line.value = unique(c(0, regionEnd[i]))} else {
center.line.value = NULL}
# Repeat the value of x.lim, y.lim, title parameters if their length is 1
if (!is.null(x.lim) & length(x.lim) == 1) {x.lim = lapply(1:length(sample_names), function(j) x.lim[[1]])}
if (!is.null(y.lim) & length(y.lim) == 1) {y.lim = lapply(1:length(sample_names), function(j) y.lim[[1]])}
if (!is.null(title) & length(title) == 1) {title = rep(title, length(sample_names))}
if (!is.null(x.lab) & length(x.lab) == 1) {x.lab = rep(x.lab, length(sample_names))}
if (!is.null(y.lab) & length(y.lab) == 1) {y.lab = rep(y.lab, length(sample_names))}
# Check the length of the x.lim, y.lim, title parameters
x.lim = check.parameter.length(parameter = x.lim, parameter.name = "'x.lim'", plots.number = length(sample_names))
y.lim = check.parameter.length(parameter = y.lim, parameter.name = "'y.lim'", plots.number = length(sample_names))
title = check.parameter.length(parameter = title, parameter.name = "'title'", plots.number = length(sample_names))
x.lab = check.parameter.length(parameter = x.lab, parameter.name = "'x.lab'", plots.number = length(sample_names))
y.lab = check.parameter.length(parameter = y.lab, parameter.name = "'y.lab'", plots.number = length(sample_names))
# Select only current group subtable
current.table = full.stat.table %>% filter(sample == sample_names[i])
# Define which value to use for the scores
if (signal.type == "mean") {
score = current.table$mean} else if (signal.type == "median") {
score = current.table$median} else if (signal.type == "sum") {
score = current.table$sum} else {
return(warning("The signal.type must be one among 'mean', 'median', 'sum'"))
}
# Define which value to use for error
if (error.type == "sem") {
error = current.table$sem} else if (error.type == "sd") {
error = current.table$sd} else {
return(warning("The error.type must be one between 'sem' or 'sd'"))
}
# Plot saving in the list
plot.list[[i]] =
Rseb::density_plot(
samples = current.table$group,
scores = score,
positions = current.table$distance,
variance_scores = error,
ylab = ifelse(test = is.null(y.lab),
yes = ifelse(test = plot.error == T,
yes = paste(tools::toTitleCase(signal.type), "\u00b1", toupper(error.type), "of density signal"),
no = paste(tools::toTitleCase(signal.type), "of density signal")),
no = y.lab[i]),
xlab = ifelse(test = is.null(x.lab),
yes = "Distance from reference point [bp]",
no = x.lab[i]),
line_type = line.type,
x_lim = x.lim[[i]],
y_lim = y.lim[[i]],
x_intercept = center.line.value,
colors = colors,
title = ifelse(test = is.null(title),
yes = sample_names[i],
no = title[i]),
text_size = text.size,
variance = plot.error,
print_plot = F,
line_width = line.width,
variance_opacity = error.transparency) +
theme(legend.position = legend.position,
legend.background = element_blank())
# Add a name to the plot, element in the list
names(plot.list)[i] = sample_names[i]
} # for end
} # else end
# Set the same Y-axes for all the plots, if required by 'y.identical.auto = T'
if (y.identical.auto == T & is.null(y.lim) & length(plot.list) > 1) {
plot.list = Rseb::uniform.y.axis(plot.list = plot.list, ticks.each = y.ticks.interval, digits = y.digits)
}
# if (y.identical.auto == T & is.null(y.lim) & length(plot.list) > 1) {
# y.min_list = c()
# y.max_list = c()
#
# for (i in 1:length(plot.list)) {
# y.min_list = c(y.min_list, ggplot_build(plot.list[[i]])$layout$panel_params[[1]]$y$limits[1])
# y.max_list = c(y.max_list, ggplot_build(plot.list[[i]])$layout$panel_params[[1]]$y$limits[2])
# }
#
# breaks = labeling::extended(dmin = floor(min(y.min_list)),
# dmax = ceiling(max(y.max_list)),
# m = y.ticks.number)
#
# for (i in 1:length(plot.list)) {
# plot.list[[i]] =
# plot.list[[i]] +
# scale_y_continuous(breaks = breaks,
# limits = c(min(y.min_list), max(y.max_list)))
# }
# }
# Reshape the X-axis to include the reference points, if required by 'write.reference.points = T'
if (write.reference.points == T & is.null(x.lim)) {
reference_point = as.vector(stringr::str_split(string = filter(.data = metadata, parameters == "ref_point")$values, pattern = ",")[[1]])
body = as.numeric(as.vector(stringr::str_split(string = filter(.data = metadata, parameters == "body")$values, pattern = ",")[[1]]))
if (length(reference_point) < length(plot.list)) {reference_point = rep(reference_point[1], length(plot.list))}
if (length(body) < length(plot.list)) {body = rep(body[1], length(plot.list))}
for (i in 1:length(plot.list)) {
labels = ggplot_build(plot.list[[i]])$layout$panel_params[[1]]$x$get_labels()
# Substitute the 0 value
labels = gsub(pattern = "^0$",
replacement = ifelse(test = reference_point[i] == "null",
yes = "start",
no = reference_point[i]),
x = labels)
# Substitute the 'end region' value when the plot is in scale-regions mode
if (reference_point[i] == "null") {
for (k in 1:length(labels)) {
# check if the current label could be a number
if (grepl("[-]?[0-9]+[.]?[0-9]*|[-]?[0-9]+[L]?|[-]?[0-9]+[.]?[0-9]*[eE][0-9]+", labels[k])) {
if (as.numeric(labels[k]) >= body[i]) {labels[k] = as.character(as.numeric(labels[k]) - body[i])}
}
}
labels = gsub(pattern = "^0$", replacement = "end", x = labels)
}
plot.list[[i]] = plot.list[[i]] + scale_x_continuous(labels = labels)
} # end For
} # end If
##############################################################################
# generate the multiplot
multiplot = cowplot::plot_grid(plotlist = plot.list, nrow = n.row.multiplot, byrow = by.row)
# Print multiplot if required
if (print.multiplot == T) {print(multiplot)}
# Export the file if required
if (!is.null(multiplot.export.file)) {
pdf(file = multiplot.export.file,
height = real.height.single.plot * n.row.multiplot,
width = real.width.single.plot * ceiling(length(plot.list)/n.row.multiplot))
print(multiplot)
message("Multiplot exported as: ", multiplot.export.file)
dev.off()
}
##############################################################################
# RETURN a list of data
return(list(data.table = full.stat.table,
metadata = metadata,
plot.list = plot.list,
multiplot = multiplot))
} # End function
################################################################################
#----------------------------- END OF FUNCTION --------------------------------#
################################################################################