/
nest.R
167 lines (147 loc) · 5.22 KB
/
nest.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
#' Obtain a nested parse table from a character vector
#'
#' Parses `text` to a flat parse table and subsequently changes its
#' representation into a nested parse table with [nest_parse_data()].
#' @param text A character vector to parse.
#' @return A nested parse table. See [tokenize()] for details on the columns
#' of the parse table.
#' @importFrom purrr when
#' @keywords internal
compute_parse_data_nested <- function(text) {
parse_data <- tokenize(text) %>%
add_terminal_token_before() %>%
add_terminal_token_after()
parse_data$child <- rep(list(NULL), length(parse_data$text))
pd_nested <- parse_data %>%
nest_parse_data() %>%
flatten_operators() %>%
when(any(parse_data$token == "EQ_ASSIGN") ~ relocate_eq_assign(.), ~.)
pd_nested
}
#' Enhance the mapping of text to the token "SPECIAL"
#'
#' Map text corresponding to the token "SPECIAL" to a (more) unique token
#' description.
#' @param pd A parse table.
#' @keywords internal
enhance_mapping_special <- function(pd) {
pipes <- pd$token == "SPECIAL" & pd$text == "%>%"
pd$token[pipes] <- special_and("PIPE")
ins <- pd$token == "SPECIAL" & pd$text == "%in%"
pd$token[ins] <- special_and("IN")
others <- pd$token == "SPECIAL" & !(pipes | ins)
pd$token[others] <- special_and("OTHER")
pd
}
special_and <- function(text) {
paste0("SPECIAL-", text)
}
#' Add information about previous / next token to each terminal
#'
#' @param pd_flat A flat parse table.
#' @name add_token_terminal
#' @keywords internal
NULL
#' @rdname add_token_terminal
#' @keywords internal
add_terminal_token_after <- function(pd_flat) {
terminals <- pd_flat %>%
filter(terminal) %>%
arrange(pos_id)
tibble(pos_id = terminals$pos_id, token_after = lead(terminals$token, default = "")) %>%
left_join(pd_flat, ., by = "pos_id")
}
#' @rdname add_token_terminal
#' @keywords internal
add_terminal_token_before <- function(pd_flat) {
terminals <- pd_flat %>%
filter(terminal) %>%
arrange(pos_id)
tibble(
id = terminals$id,
token_before = lag(terminals$token, default = "")
) %>%
left_join(pd_flat, ., by = "id")
}
#' @describeIn add_token_terminal Removes column `terimnal_token_before`. Might
#' be used to prevent the use of invalidated information, e.g. if tokens were
#' added to the nested parse table.
#' @keywords internal
remove_terminal_token_before_and_after <- function(pd_flat) {
pd_flat$token_before <- NULL
pd_flat$token_after <- NULL
pd_flat
}
#' Helper for setting spaces
#'
#' @param spaces_after_prefix An integer vector with the number of spaces
#' after the prefix.
#' @param force_one Whether spaces_after_prefix should be set to one in all
#' cases.
#' @return An integer vector of length spaces_after_prefix, which is either
#' one (if `force_one = TRUE`) or `space_after_prefix` with all values
#' below one set to one.
#' @keywords internal
set_spaces <- function(spaces_after_prefix, force_one) {
if (force_one) {
n_of_spaces <- rep(1, length(spaces_after_prefix))
} else {
n_of_spaces <- pmax(spaces_after_prefix, 1L)
}
n_of_spaces
}
#' Nest a flat parse table
#'
#' `nest_parse_data` groups `pd_flat` into a parse table with tokens that are
#' a parent to other tokens (called internal) and such that are not (called
#' child). Then, the token in child are joined to their parents in internal
#' and all token information of the children is nested into a column "child".
#' This is done recursively until we are only left with a nested tibble that
#' contains one row: The nested parse table.
#' @param pd_flat A flat parse table including both terminals and non-terminals.
#' @seealso [compute_parse_data_nested()]
#' @return A nested parse table.
#' @importFrom purrr map2
#' @keywords internal
nest_parse_data <- function(pd_flat) {
if (all(pd_flat$parent <= 0)) return(pd_flat)
pd_flat$internal <- with(pd_flat, (id %in% parent) | (parent <= 0))
split_data <- split(pd_flat, pd_flat$internal)
child <- split_data$`FALSE`
internal <- split_data$`TRUE`
internal$internal_child <- internal$child
internal$child <- NULL
child$parent_ <- child$parent
joined <-
child %>%
nest_(., "child", setdiff(names(.), "parent_")) %>%
left_join(internal, ., by = c("id" = "parent_"))
nested <- joined
nested$child <- map2(nested$child, nested$internal_child, combine_children)
nested <- nested[, setdiff(names(nested), "internal_child")]
nest_parse_data(nested)
}
#' Combine child and internal child
#'
#' Binds two parse tables together and arranges them so that the tokens are in
#' the correct order.
#' @param child A parse table or `NULL`.
#' @param internal_child A parse table or `NULL`.
#' @details Essentially, this is a wrapper around [dplyr::bind_rows()], but
#' returns `NULL` if the result of [dplyr::bind_rows()] is a data frame with
#' zero rows.
#' @keywords internal
combine_children <- function(child, internal_child) {
bound <- bind_rows(child, internal_child)
if (nrow(bound) == 0) return(NULL)
bound[order(bound$pos_id), ]
}
#' Get the start right
#'
#' On what line does the first token occur?
#' @param pd_nested A nested parse table.
#' @return The line number on which the first token occurs.
#' @keywords internal
find_start_line <- function(pd_nested) {
pd_nested$line1[1]
}