-
Notifications
You must be signed in to change notification settings - Fork 1
/
dot-plot.R
406 lines (360 loc) · 11.6 KB
/
dot-plot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
#' Extract a Branch from a Panicle Graph
#'
#' This function returns a `igraph` with the branch
#' developing from a selected node.
#'
#' @param panicle A graph of class `igraph`
#' @param vert A number, the vertex of `panicle`` from which to start
pull_branch <- function(panicle,
vert)
{
# The secondary branch is composed by all vertexes
# downstream the branch node
sub_verts <-
panicle %>%
igraph::subcomponent(vert,
mode = "out")
sub_graph <-
panicle %>%
igraph::induced_subgraph(sub_verts)
}
#' Make a Tibble with the Vertex ID of the Longest Path
#'
#' This function takes a branch `igraph` as object and
#' a starting `vertex` as object, finds the longest path
#' that starts from that vertex.
#'
#' The longest path is assumend to be the main branch axis
#'
#' This function returns a character vector that
#' stores `type` of the vertexes (Secondary, Spikelet, etc.)
#' along that axis ordered from the starting vertex
#' (supplied in `vert_rank`) going outward.
#'
#' @param branch an `igraph` object storing data for a
#' panicle branch.
#' @param vert_rank a number indicating the `rank` attribute
#' that identifies the staring vertex.
make_idline <- function(branch,
vert_rank)
{
# since this is a new graph, I have to identify
# the generating vertex by the attribute rank.
# Because that attribute is carried in from the
# original panicle graph
start_node <-
branch %>%
igraph::vertex_attr() %>%
{which(.$rank == vert_rank)}
# I assume and that the end of the branch axis
# correspond to the furthest vertex, and use this
# trick to identify it
top_node <-
branch %>%
igraph::distances(v = start_node,
mode = "out") %>%
which.max()
# and use that node to estimate the path that
# contains all the nodes on the main axis
main_path <-
branch %>%
igraph::shortest_paths(from = start_node,
to = top_node,
mode = "out")
# I am interested in the types of the nodes in the
# branch...
# and in their original rank
node_types <-
branch %>%
igraph::vertex_attr() %>%
{dplyr::tibble(type = .$type, original_rank = .$rank)}
# ...ranked along the nodes in the main axis of the
# branch
# this returns a character vector of ordered nodes
out <-
main_path$vpath %>%
purrr::flatten_int() %>%
node_types[., ] %>%
dplyr::mutate(branchwise_rank = main_path$vpath %>%
purrr::flatten_int())
# get the nodes that are downstream
# secondary branches and not in the
# main path
get_down_secondary <- function(vert_id) {
# the ids of vertexes direcltly downstream
down_vert_ids <-
igraph::neighbors(graph = branch,
v = vert_id,
mode = "out")
# the number of vertexes downstream
verts_down <- 0
for(i in down_vert_ids) {
if(! i %in% vpath){
subcomps <-
branch %>%
igraph::subcomponent(i,
mode = "out") %>%
length()
verts_down <- verts_down + subcomps
}
}
return(verts_down)
}
out <-
out %>%
dplyr::mutate(nodes_downstream = list(
igraph::neighbors(
graph = branch,
v = .data$branchwise_rank,
mode = "out")
))
# scaffold?
vpath <- main_path$vpath %>%
purrr::flatten_int()
# Save nodes downstream in the output of out
out <-
out %>%
dplyr::mutate(nodes_downstream = .data$branchwise_rank %>%
purrr::map_dbl(get_down_secondary))
return(out)
}
#' Identify Start and End Generating node
#'
#' This function select the start generating node
#' as the one with the highest x coordinate and the
#' end as the node with the lowest
#'
#' This approach is prone to error is the panicle
#' is not orientated from right to left in the
#' P-TRAP picture.
#'
#' This function is used and improved by `get_base()`,
#' which select the base node as the generating
#' node with no parents
#'
#' @param panicle a panicle graph.
get_generating <- function(panicle) {
panicle %>%
igraph::as_long_data_frame() %>%
dplyr::filter(.data$from_type == "Generating") %>%
dplyr::select(.data$from_x, .data$from_rank) %>%
dplyr::distinct() %>%
dplyr::arrange(
dplyr::desc(.data$from_x)
) %>%
dplyr::pull(.data$from_rank)
}
#' Get the node at the base of the panicle
#'
#' This function returns the rank of the node (vertex) at the
#' base of the panicle. We assume that the base node
#' is the node with type: `Generating` with no
#' children
#'
#' @param panicle a panicle graph
get_base <- function(panicle) {
# the base node is one of the generating
gens <- get_generating(panicle)
# that have no parent nodes in its neighbors
parents <-
gens %>%
purrr::map(
~igraph::neighbors(panicle,
v = .,
mode = "in")
)
gens[which(
parents %>% purrr::map_int(length) == 0
)]
}
#' Turn a Panicle Graph in a Tibble useful for a dotplot
#'
#' @param panicle a panicle graph.
#' @param silently logical, if `FALSE` prints intermediate data
#' while producing the panicle tibble. Defaults to `TRUE`.
#'
#' @export
panicle_tibble <- function(panicle,
silently = TRUE)
{
# get the generating node
gens <- get_generating(panicle)
# the base node is the one with no parents
start <- get_base(panicle)
# the other is the top node
to <- gens[which(gens != start)]
# we assume that the main axis of the rachis
# is the shortest (and only!) path between
# the two generating nodes
main_path <-
panicle %>%
igraph::shortest_paths(from = start,
to = to,
mode = "out") %>%
.$vpath %>%
purrr::flatten_int()
# I need a way to select neighbours
# with any other attribute than primary
not_primary <- function(v) {
#
# get the neighborough nodes
nb <- igraph::neighbors(graph = panicle,
v = v,
mode = "out")
# and its attributes
nb_attr <- igraph::vertex_attr(graph = panicle,
index = nb)
keep <- nb$type != "Primary" & nb$type != "Generating"
nb[keep] %>% as.numeric()
}
# All nodes where a primary branch starts
branch_starts <-
main_path %>%
purrr::map(not_primary)
# sometimes two branches start from the same node
# in case, First, print a note
if(!silently) {
double_branches <-
branch_starts %>%
purrr::map_dbl(length) %>%
{which(. > 1)}
if(length(double_branches > 0))
paste("Multiple branches detected on node:", double_branches,
"\n This node was split in multiple nodes") %>%
message()
}
# Then solve it by arbitrarily splitting that node
# in two
branch_starts <-
branch_starts %>%
purrr::flatten_dbl()
# pull a subgraph from every primary node
tb <- tibble::tibble(vert_rank = branch_starts,
branch = branch_starts %>%
purrr::map(
~pull_branch(panicle = panicle,
vert = .))
)
if(!silently) {print(tb)}
# new version
tb_list <-
tb %>%
purrr::pmap(make_idline) %>%
purrr::map(
~dplyr::mutate(., node_rank = 1:dplyr::n())
)
tb_list <-
1:length(tb_list) %>%
purrr::map(~dplyr::mutate(tb_list[[.]], primary_rank = .)) %>%
purrr::reduce(dplyr::bind_rows)
# Two columns: original_rank and branchwise_rank are confusing
# and are useful only for internal calculations:
# remove them from the final output
tb_list <-
tb_list %>%
dplyr::select(-.data$original_rank, -.data$branchwise_rank) %>%
# and rename one in a clearer way
dplyr::rename(secondary_rank = "node_rank")
}
#' Plot the Output of `panicle_tibble()`
#'
#' Returns a `ggplot2` object.
#'
#' This is a small utility plot function. You can use it
#' on the output of `panicle_tibble()` to represent it as a
#' tileplot.
#'
#' Although we provide this function as utility, we suggest that you design
#' your own plotting function, because only in this way you will reach the
#' versatlity required for exploratory data analysis. We provide ideas
#' on how to achieve this in the vignettes.
#'
#' This tileplot is inspired by the plots in
#' https://www.nature.com/articles/s41598-018-30395-9
#'
#' @param pan_tbl A tibble, the output of `panicle_tibble()`
#' @param draw.plot Logical. Should the function draw a plot on the graphic device?
#' Defaults to `FALSE`.
#'
#' @export
panicle_tileplot <- function(pan_tbl, draw.plot = FALSE)
{
p <-
pan_tbl %>%
ggplot2::ggplot(
ggplot2::aes_string(
x = "secondary_rank",
y = "primary_rank",
fill = "type")) +
ggplot2::geom_tile(
colour = "grey80",
size = 2) +
ggplot2::geom_text(data = . %>%
dplyr::filter(.data$type == "Seconday"),
ggplot2::aes_string(label = "nodes_downstream"),
colour = "grey20")
if(draw.plot) print(p)
return(p)
}
#' Plot the Output of `panicle_tibble()`
#'
#' This function is the same as `panicle_tileplot()`, with extra customization
#' of the output plot.
#'
#' Returns a `ggplot2` object.
#'
#' This is a small utility plot function. You can use it
#' on the output of `panicle_tibble()` to represent it as a
#' tileplot.
#'
#' Although we provide this function as utility, we suggest that you design
#' your own plotting function, because only in this way you will reach the
#' versatlity required for exploratory data analysis. We provide ideas
#' on how to achieve this in the vignettes.
#'
#' This tileplot is inspired by the plots in
#' https://www.nature.com/articles/s41598-018-30395-9
#'
#' @param pan_tbl A tibble, the output of `panicle_tibble()`
#' @param draw.plot Logical. Should the function draw a plot on the graphic device?
#' Defaults to `FALSE`.
#'
#' @export
panicle_tileplot2 <- function(pan_tbl, draw.plot = FALSE)
{
p <-
pan_tbl %>%
ggplot2::ggplot(
ggplot2::aes(
x = secondary_rank %>% as.character() %>% forcats::as_factor(),
y = primary_rank %>% as.character() %>% forcats::as_factor(),
fill = nodes_downstream)) + # nodes on 2ndary branches to fill colours +
ggplot2::geom_tile(colour = "grey80",
size = 1.5) +
ggplot2::geom_text(data = . %>%
dplyr::filter(type == "Seconday"), # Show the number for secondary nodes
ggplot2::aes(label = nodes_downstream),
colour = "grey30",
fontface = "bold",
size = 5) +
# fixed xy ratio, each tile is a square
ggplot2::coord_fixed() +
# set scale of colours and
# proper labels for the colour scales
ggplot2::scale_fill_viridis_c(
breaks = function(limits) c(0, 2:max(limits)),
labels = function(breaks) dplyr::case_when(breaks == 0 ~ "1\n[Spikelet]",
TRUE ~ as.character(breaks)),
guide = ggplot2::guide_legend(nrow = 1,
# keyheight = unit(7, units = "mm"),
# keywidth = unit(7, units = "mm"),
override.aes = list(size = 0))) +
# fill guide on top
ggplot2::theme(legend.position = "top") +
# Clear axis names
ggplot2::labs(x = "Nodes rank along primary branches",
y = "Rank along the rachis",
fill = "Nodes on\n secondary branch")
if(draw.plot) print(p)
return(p)
}