/
getArtigosPublicados.R
executable file
·61 lines (55 loc) · 2.24 KB
/
getArtigosPublicados.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#' @title getArtigosPublicados
#' @description Extract published papers from 'Lattes' XML file.
#' @param curriculo 'Lattes' XML imported as `xml2::read_xml()`.
#' @return data frame
#' @details Curriculum without this information will return NULL.
#' @examples
#' if(interactive()) {
#'
#' # to import from one curriculum
#' # curriculo <- xml2::read_xml('file.xml')
#' # getArtigosPublicados(curriculo)
#'
#' }
#' @seealso
#' \code{\link[xml2]{xml_find_all}},\code{\link[xml2]{xml_attr}}
#' \code{\link[purrr]{map}},\code{\link[purrr]{map2}}
#' \code{\link[dplyr]{bind}},\code{\link[dplyr]{mutate}}
#' \code{\link[janitor]{clean_names}}
#' \code{\link[tibble]{tibble}}
#' @rdname getArtigosPublicados
#' @export
#' @importFrom xml2 xml_find_all xml_attrs
#' @importFrom purrr map map2 pmap
#' @importFrom dplyr bind_rows bind_cols mutate
#' @importFrom janitor clean_names
getArtigosPublicados <- function(curriculo) {
if (!any(class(curriculo) == 'xml_document')) {
stop("The input file must be XML, imported from `xml2` package.", call. = FALSE)
}
dados_basicos <-
curriculo |>
xml2::xml_find_all(".//ARTIGO-PUBLICADO") |>
purrr::map(~ xml2::xml_find_all(., ".//DADOS-BASICOS-DO-ARTIGO")) |>
purrr::map(~ xml2::xml_attrs(.)) |>
purrr::map(~ dplyr::bind_rows(.)) |>
purrr::map(~ janitor::clean_names(.))
detalhamento <-
curriculo |>
xml2::xml_find_all(".//ARTIGO-PUBLICADO") |>
purrr::map(~ xml2::xml_find_all(., ".//DETALHAMENTO-DO-ARTIGO")) |>
purrr::map(~ xml2::xml_attrs(.)) |>
purrr::map(~ dplyr::bind_rows(.)) |>
purrr::map(~ janitor::clean_names(.))
autores <-
curriculo |>
xml2::xml_find_all(".//ARTIGO-PUBLICADO") |>
purrr::map(~ xml2::xml_find_all(., ".//AUTORES")) |>
purrr::map(~ xml2::xml_attrs(.)) |>
purrr::map(~ dplyr::bind_rows(.)) |>
purrr::map(~ janitor::clean_names(.))
a <- purrr::map2(dados_basicos, detalhamento, dplyr::bind_cols)
purrr::pmap(list(a, autores), function(x, y) tibble::tibble(x, autores = list(y))) |>
dplyr::bind_rows() |>
dplyr::mutate(id = getId(curriculo))
}