-
Notifications
You must be signed in to change notification settings - Fork 0
/
gcite_paper_df.R
49 lines (48 loc) · 1.38 KB
/
gcite_paper_df.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#' Get Paper Data Frame from Title URLs
#'
#' @param urls A character vector of urls, from
#' \code{all_papers$title_link}
#' @param verbose Print diagnostic messages
#' @param force If passing a URL and there is a failure, should the
#' program return \code{NULL}, passed to \code{\link{gcite_citation_page}}
#' @param sleeptime time in seconds between http requests,
#' to avoid Google Scholar rate limit
#' @param ... Additional arguments passed to \code{\link{GET}}
#'
#' @return A \code{data.frame} of authors, journal, and citations
#' @export
#' @examples
#' if (!is_travis()) {
#' L = gcite_user_info(user = "uERvKpYAAAAJ",
#' read_citations = FALSE)
#' urls = L$all_papers$title_link
#' paper_df = gcite_paper_df(urls = urls, force = TRUE)
#' }
gcite_paper_df = function(
urls,
verbose = TRUE,
force = FALSE,
sleeptime = 0,
...) {
if (verbose) {
message("Reading citation pages")
}
paper_info = pbapply::pblapply(
urls,
function(x) {
Sys.sleep(sleeptime)
gcite_citation_page(x,
force = force,
... = ...)
}
)
paper_df = data.table::rbindlist(paper_info, fill = TRUE)
paper_df = as.data.frame(paper_df)
cn = colnames(paper_df)
suppressWarnings({
num_cn = as.numeric(cn)
})
cn = c(cn[is.na(num_cn)], sort(num_cn[ !is.na(num_cn)]))
paper_df = paper_df[, cn]
return(paper_df)
}