This repository has been archived by the owner on Feb 12, 2022. It is now read-only.
/
pack.R
57 lines (54 loc) · 1.41 KB
/
pack.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#' Pack output of kagome
#'
#' Alias of \code{RcppKagome::pack_list} and \code{RcppKagome::pack_df}
#'
#' @param obj Object
#' @param ... Other arguments
#' @return data.frame
#'
#' @family pack-fn
#' @export
pack <- function(obj, ...) {
if (inherits(obj, "data.frame")) {
pack_df(obj, ...)
} else {
pack_list(obj, ...)
}
}
#' Pack ouput of kagome
#'
#' @param list Output of \code{RcppKagome::kagome}.
#' @param .collapse This argument is passed to \code{stringi::stri_join()}.
#' @return data.frame.
#'
#' @family pack-fn
#' @export
pack_list <- function(list, .collapse = " ") {
res <- lapply(list, function(elem) {
elem %>%
map(~ purrr::pluck(., "Surface")) %>%
purrr::flatten_chr() %>%
stringi::stri_join(collapse = .collapse)
}) %>%
imap_dfr(~ data.frame(doc_id = .y, text = .x))
return(res)
}
#' Pack prettified output
#'
#' @param df Output of \code{RcppKagome::prettify}.
#' @param pull Column name to be packed into data.frame. Default value is `token`.
#' @param .collapse This argument is passed to \code{stringi::stri_join()}.
#' @return data.frame.
#'
#' @family pack-fn
#' @export
pack_df <- function(df, pull = "token", .collapse = " ") {
res <- df %>%
dplyr::group_by(.data$doc_id) %>%
dplyr::group_map(
~ dplyr::pull(.x, {{ pull }}) %>%
stringi::stri_join(collapse = .collapse)
) %>%
imap_dfr(~ data.frame(doc_id = .y, text = .x))
return(res)
}