/
parse_help.R
228 lines (206 loc) · 8.13 KB
/
parse_help.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
#' Parses commandline help options to return vector of valid flag names
#'
#' When using cmdfun to write lazy shell wrappers, the user can easily mistype
#' a commandline flag since there is not text completion. Some programs behave
#' unexpectedly when flags are typed incorrectly, and for this reason return uninformative error messages.
#'
#' `cmd_help_parse_flags` tries to grab flags from --help documentation which
#' can be used for error checking. It will try to parse flags following "-" or
#' "--" while ignoring hyphenated words in help text. Although this should cover
#' most use-cases, it may be necessary to write a custom help-text parser for
#' nonstandard tools. Inspect this output **carefully** before proceeding. Most
#' often, characters are leftover at the **end** of parsed names, which will
#' require additional parsing.
#'
#' @seealso \code{\link{cmd_help_flags_similar}} \code{\link{cmd_help_flags_suggest}}
#'
#' @param help_lines `character` vector containing the output of "command
#' --help", or similar output. Optional: pass either `stdout`, or `stderr` output from
#' processx::run(), must set `processx = TRUE`.
#' @param split_newline `logical(1)` if set to TRUE will split string on "\\n" before
#' parsing (useful when parsing output from `processx`).
#'
#' @return character vector of flag names parsed from help text
#' @export
#'
#' @examples
#' if (.Platform$OS.type == "unix" & file.exists("/bin/tar")) {
#' # below are two examples parsing the --help method of GNU tar
#'
#' # with processx
#' if (require(processx)) {
#' out <- processx::run("tar", "--help", error_on_status = FALSE)
#' fn_flags <- cmd_help_parse_flags(out$stdout, split_newline = TRUE)
#' }
#'
#' # with system2
#' lines <- system2("tar", "--help", stderr = TRUE)
#' fn_flags <- cmd_help_parse_flags(lines)
#'
#' # NOTE: some of the "tar" flags contain the extra characters: "\\[", "\\)", and ";"
#' # ie "one-top-level\[" which should be "one-top-level"
#' # These can be additionally parsed using
#' gsub("[\\[;\\)]", "", fn_flags)
#' }
#'
cmd_help_parse_flags <- function(help_lines, split_newline = FALSE){
stopifnot(is.logical(split_newline))
if (split_newline){
help_lines <- strsplit(help_lines, "\n")[[1]]
}
help_lines %>%
help_flags_all() %>%
help_flag_names
}
#' Get flag names from parsed lines
#'
#' @param lines parsed flag lines where 1st word on each line is the flag name
#'
#' @return character vector of flag names
#' @noRd
help_flag_names <- function(lines){
strsplit(lines, " ") %>%
purrr::map_chr(~{
.x[[1]]
}) %>%
unique
}
#' Get vector of help lines for short (-) and long (--) flag definitions
#'
#' @param lines unprocessed help lines (with newlines trimmed if needed)
#'
#' @return a vector where each entry is a line where the first word is the flag name
#' @noRd
help_flags_all <- function(lines){
# Preprocess lines
parsed_lines <- lines %>%
# drop leading whitespace
gsub("^ +", "", .) %>%
# grab lines beginning with flag prefix
grep("^-{1,2}[^-]", ., value = TRUE) %>%
# parse - and -- flag entries, and put at beginning of line
# combine into single vector for further processing
{
c(help_flags_long(.), help_flags_short(.))
} %>%
# remove flag prefix
gsub("^-+", "", .) %>%
# remove leading whitespace
# in case help file uses an unusual prefix
# I've seen this for some windows CMD help pages.
gsub("^ +", "", .) %>%
# Drop empty lines
gsub("^$", "", .) %>%
# Drop text after =
gsub("=.+", "", .) %>%
# Remove commas
gsub(",", "", .)
return(parsed_lines)
}
#' Return "short" (-) flag definition lines
#'
#' @param cleaned_lines vector of preprocessed help lines
#'
#' @return vector of lines where first word is a flag defined with -
#' @noRd
help_flags_short <- function(cleaned_lines){
cleaned_lines %>%
# remove up to flag prefix (-)
gsub("^-{2}.+ -", "-", .) %>%
# Keep any single - flags
grep("^-{1}[^-]", ., value = TRUE)
}
#' Return "long" (--) flag definition lines
#'
#' @param cleaned_lines
#'
#' @return vector of lines where first word is a flag defined with --
#' @noRd
help_flags_long <- function(cleaned_lines){
cleaned_lines %>%
# remove up to flag prefix (--)
gsub(".+ --", "--", .) %>%
# Drop any single - flags
grep("^-{1}[^-]", ., invert = TRUE, value = TRUE)
}
#' Suggest alternative name by minimizing Levenshtein edit distance between valid and invalid arguments
#'
#' @param command_flag_names character vector of valid names (can be output of \code{\link{cmd_help_parse_flags}})
#' @param flags a vector names correspond to values to be checked against `command_flag_names`
#' @param .fun optional function to apply to `command_flag_names` and `flags`
#' before checking their values. If using a function to rename flags after
#' `cmd_list_interp`, use that same function here. Can be useful for parsing help
#' lines into R-friendly variable names for user-convenience. Can be function
#' or `rlang`-style formula definition (ie `.fun = ~{foo(.x)}` is the same as
#' `.fun = function(x){foo(x)}`). Note: if command_flag_names need additional
#' parsing after \code{\link{cmd_help_parse_flags}}, it is best to do that
#' preprocessing before passing them to this function.
#' @param distance_cutoff Levenshtein edit distance beyond which to suggest
#' ??? instead of most similar argument (default = 3). Setting this too
#' liberally will result in nonsensical suggestions.
#'
#' @return named vector where names are names from `flags` and their values are the suggested best match from `command_flag_names`
#' @export
#'
#' @importFrom utils adist
#'
#' @examples
#' # with a flagsList, need to pass names()
#' flagsList <- list("output" = "somevalue", "missplld" = "anotherValue")
#' cmd_help_flags_similar(c("output", "misspelled"), names(flagsList))
#'
#' command_flags <- c("long-flag-name")
#' flags <- c("long_flag_naee")
#' cmd_help_flags_similar(command_flags, flags, .fun = ~{gsub("-", "_", .x)})
#'
#' # returns NULL if no errors
#' cmd_help_flags_similar(c("test"), "test")
cmd_help_flags_similar <- function(command_flag_names, flags, .fun = NULL, distance_cutoff = 3L){
if (!is.null(.fun)){
if (class(.fun) == "formula"){.fun <- rlang::as_function(.fun)}
stopifnot(is.function(.fun))
command_flag_names <- .fun(command_flag_names)
flags <- .fun(flags)
}
bad_flags <- flags[!flags %in% command_flag_names]
if (length(bad_flags) == 0) {return(NULL)}
flag_dist <- adist(bad_flags, command_flag_names)
# Only suggest names similar enough to existing flag,
# otherwise return ??? for match.
# distance_cutoff is the levenshtein edit distance threshold
# drop_distance is a special value for things to be dropped. Because I minimize edit distance,
# drop_distance needs to be a value larger than the cutoff (as low as distance_cutoff + 1)
drop_distance <- distance_cutoff + 1L
flag_dist[flag_dist > distance_cutoff] <- drop_distance
i <- apply(flag_dist, 1, function(x) {which(x == min(x))[1]})
drop <- apply(flag_dist, 1, function(x) {which(min(x) == drop_distance)[1]})
suggest_flags <- command_flag_names[i]
names(suggest_flags) <- bad_flags
suggest_flags[!is.na(drop)] <- "???"
return(suggest_flags)
}
#' Error & Suggest different flag name to user
#'
#' @param suggest_names named character vector, names correspond to original
#' value, values correspond to suggested replacement.
#'
#' @return error message suggesting alternatives to user
#' @export
#'
#' @examples
#' user_flags <- list("output", "inpt")
#' valid_flags <- c("output", "input")
#' suggestions <- cmd_help_flags_similar(valid_flags, user_flags)
#' \dontrun{
#' # Throws error
#' cmd_help_flags_suggest(suggestions)
#' }
cmd_help_flags_suggest <- function(suggest_names){
if (is.null(suggest_names)){return(NULL)}
quote_name <- function(name) paste0("\"", name, "\"")
suggestString <- paste(quote_name(suggest_names),
quote_name(names(suggest_names)),
sep = " instead of: ",
collapse = "\n")
usethis::ui_stop(paste0("\nInvalid flags. Did you mean:\n", suggestString))
}