-
Notifications
You must be signed in to change notification settings - Fork 5
/
mmnetwork.R
229 lines (217 loc) · 9.04 KB
/
mmnetwork.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
#' @title Network plot of mate-pair or paired-end connected scaffold reads
#'
#' @description Plots all connected scaffolds in a network plot. Scaffolds can then be highlighted and extracted using the locator and selection features.
#'
#' @param mm (\emph{required}) A dataframe loaded with \code{\link{mmload}}.
#' @param network (\emph{required}) Paired-end or mate-pair connections between scaffolds in long format. The first and second columns must contain all connected scaffold pairs and the third column the number of connections.
#' @param min_connections Filter all scaffold pairs with equal to or less than this number of connections. (\emph{Default: } \code{2})
#' @param color_by Color the scaffolds by a variable in \code{mm}. (\emph{Default: } \code{NULL})
#' @param color_scale_log10 (\emph{Logical}) Log10-scale the color gradient when \code{color_by} is set and the variable is continuous (\emph{Default: } \code{FALSE})
#' @param locator (\emph{Logical}) When \code{TRUE}, left-clicks in the plot are captured and the exact x/y-coordinates of the mouse clicks are captured and returned. These coordinates can be used to highlight a selection of scaffolds in the plot, and to extract all scaffolds within the selection. (\emph{Default: } \code{FALSE})
#' @param selection A 2-column dataframe with the x and y coordinates of points with which to draw a polygon onto the plot to highlight a selected region. A selection can be obtained by using the locator feature (by \code{locator = TRUE}). (\emph{Default: } \code{NULL})
#' @param highlight_labels A dataframe or vector of scaffold names whose labels to highlight in the plot (colored by \code{highlight_color}). (\emph{Default: } \code{NULL})
#' @param highlight_color The color with which to highlight the scaffold labels defined by \code{highlight_labels}. (\emph{Default: } \code{"darkred"})
#' @param links_scale A factor to scale the sizes of the links plotted between scaffolds. (\emph{Default: } \code{1})
#' @param scaffold_labels Add labels with the scaffold names of \emph{all} scaffolds.
#' @param print_nolinks (\emph{Logical}) Print the names of all scaffolds with no links to other scaffolds to the console. (\emph{Default: } \code{FALSE})
#' @param seed Network plots are based on Random Number Generation, and this is used to set a specific random seed (with \code{set.seed}) allowing for reproducible network plots. Set to NULL to generate a unique network plot with each run of \code{mmnetwork}. (\emph{Default: } \code{0})
#'
#' @export
#'
#' @return A ggplot object. Note that mmgenome2 hides all warnings produced by ggplot objects.
#'
#' @section Using the locator with mmnetwork:
#' Unlike a plot generated by \code{\link{mmplot}}, the coordinates of points in each dimension in a \code{\link{mmnetwork}} plot is not contained within the provided dataframe \code{mm} itself. As such \code{\link{mmnetwork}} does not have a corresponding extract function like \code{\link{mmextract}} is for \code{\link{mmplot}}. Instead, a subset of \code{mm} containing all scaffolds within the selection polygon is available in the returned ggplot object with \code{plot$data_in_selection}.
#'
#' @import ggplot2
#' @importFrom igraph graph.data.frame layout_with_fr V
#' @importFrom dplyr filter
#' @importFrom sp point.in.polygon
#' @importFrom tibble as_tibble
#' @importFrom ggrepel geom_text_repel
#'
#' @examples
#' library(mmgenome2)
#' data(mmgenome2)
#' data(paired_ends)
#' mmgenome2
#' selection <- data.frame(
#' cov_C13.11.25 = c(7.2, 16.2, 25.2, 23.3, 10.1),
#' cov_C14.01.09 = c(47, 77, 52.8, 29.5, 22.1)
#' )
#' mmgenome2_extraction <- mmextract(mmgenome2, selection = selection)
#' mmgenome2_extraction
#' p <- mmnetwork(mmgenome2_extraction,
#' network = paired_ends,
#' min_connections = 1,
#' color_by = "taxonomy"
#' )
#' p
#' @author Kasper Skytte Andersen \email{ksa@@bio.aau.dk}
#' @author Soren M. Karst \email{smk@@bio.aau.dk}
#' @author Mads Albertsen \email{MadsAlbertsen85@@gmail.com}
mmnetwork <- function(mm,
network,
min_connections = 1,
color_by = NULL,
color_scale_log10 = FALSE,
locator = FALSE,
selection = NULL,
highlight_labels = NULL,
highlight_color = "darkred",
links_scale = 1,
scaffold_labels = FALSE,
print_nolinks = FALSE,
seed = 0) {
# Checks and error messages before anything else
if (isTRUE(locator) & !is.null(selection)) {
stop("Using the locator and highlighting a selection at the same time is not supported.", call. = FALSE)
}
## Subset the network, if no connections return error
snetwork <- dplyr::filter(network, network[[1]] %in% mm[[1]] & network[[2]] %in% mm[[1]] & network[["connections"]] >= min_connections)
if (!nrow(snetwork) > 0) {
stop("There are no connections between any of the scaffolds.", call. = FALSE)
}
## Convert to graph
g <- igraph::graph.data.frame(snetwork, directed = F)
## Calculate a layout
if (!is.null(seed)) set.seed(seed)
layout <- igraph::layout_with_fr(g)
## Extract layout coordinates
gpoints <- merge(data.frame("scaffold" = igraph::V(g)$name, "x" = layout[, 1], "y" = layout[, 2]), mm, by = 1)
## Extract link coordinates
links <- merge(snetwork, gpoints[, 1:3], by.x = "scaffold1", by.y = "scaffold")
links <- merge(links, gpoints[, 1:3], by.x = "scaffold2", by.y = "scaffold")
colnames(links)[4:7] <- c("x", "y", "xend", "yend")
p <- ggplot(
data = gpoints,
aes_string(
x = "x",
y = "y",
size = "length"
)
) +
geom_segment(
data = links,
aes_string(
x = "x",
y = "y",
xend = "xend",
yend = "yend"
),
color = "darkgrey",
size = log10(links[["connections"]]) * links_scale
) +
scale_size_area(name = "Scaffold length", max_size = 20)
if (!is.null(color_by)) {
if (is.factor(mm[[color_by]]) | is.character(mm[[color_by]])) {
p <- p +
geom_point(alpha = 0.1, color = "black") +
geom_point(
data = subset(gpoints, gpoints[[color_by]] != "NA"),
aes_string(color = color_by),
shape = 1
) +
guides(colour = guide_legend(override.aes = list(alpha = 1, size = 5, shape = 19)))
} else if (is.numeric(mm[[color_by]])) {
p <- p +
geom_point(alpha = 0.7, aes_string(color = color_by))
if (isTRUE(color_scale_log10)) {
p <- p +
scale_colour_gradientn(colours = c("blue", "green", "red"), trans = "log10")
} else {
p <- p +
scale_colour_gradientn(colours = c("blue", "green", "red"))
}
}
} else {
p <- p +
geom_point(alpha = 0.1, color = "black")
}
p <- p +
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.text = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank(),
panel.border = element_blank(),
panel.background = element_blank(),
legend.key = element_blank()
)
##### Highlight selected scaffolds #####
if (!is.null(highlight_labels)) {
if (is.data.frame(highlight_labels)) {
highlight_labels <- highlight_labels[[1]]
}
scaffolds <- as.character(highlight_labels)
d <- dplyr::filter(gpoints, scaffold %in% scaffolds)
p <- p +
geom_text(
data = d,
color = highlight_color,
size = 4,
label = d[["scaffold"]]
)
}
##### label all scaffolds #####
if (isTRUE(scaffold_labels)) {
p <- p +
ggrepel::geom_text_repel(
label = gpoints[[1]],
size = 4,
color = "black"
)
}
if (isTRUE(print_nolinks)) {
nolinks <- mm[["scaffold"]][!(mm[["scaffold"]] %in% gpoints$scaffold)]
cat("The following scaffolds have no links to other scaffolds:\n")
cat(paste0(nolinks, collapse = ", "))
}
##### Locator and selection #####
if (isTRUE(locator)) {
points <- mmlocator(p)
}
if (isTRUE(locator) | !is.null(selection)) {
if (!isTRUE(locator) & !is.null(selection)) {
points <- selection
}
p$selection <- points
in_polygon <- sp::point.in.polygon(
point.x = p$data[[colnames(points)[1]]],
point.y = p$data[[colnames(points)[2]]],
pol.x = points[[1]],
pol.y = points[[2]],
mode.checked = TRUE
)
scaffolds <- as.character(p$data$scaffold[which(in_polygon > 0)])
p$data_in_selection <- tibble::as_tibble(mm[which(mm[[1]] %in% scaffolds), ])
p <- p +
geom_point(
data = points,
aes_(
x = points[[1]],
y = points[[2]]
),
color = "black",
size = 2,
inherit.aes = FALSE,
na.rm = TRUE
) +
geom_polygon(
data = points,
aes_(
x = points[[1]],
y = points[[2]]
),
fill = NA,
size = 0.5,
lty = 2,
color = "darkred",
inherit.aes = FALSE,
na.rm = TRUE
)
}
return(p)
}