-
Notifications
You must be signed in to change notification settings - Fork 14
/
issn_title_collect.R
38 lines (29 loc) · 998 Bytes
/
issn_title_collect.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
library(rcrossref)
library(dplyr)
z <- cr_journals(limit = 0)
max <- z$meta$total_results
offsets <- c(0, seq(1000, max, by = 1000))
out <- list()
for (i in seq_along(offsets)) {
cat(i, "\n")
out[[i]] <- cr_journals(limit = 1000, offset = offsets[i])
}
df <- dplyr::bind_rows(lapply(out, function(w) w$data))
dat <- stats::setNames(df$title, df$issn)
dat <- dat[!is.na(names(dat))]
# length(dat)
# dat[1:10]
# save to data/issn_title.rda
issn_title <- dat
save(issn_title, file = "data/issn_title.rda", compress = "xz")
# Cleanup non-ASCII characters
# issn_title_badascii = lapply(unname(issn_title), tools::showNonASCII)
# non_asciis <- which(vapply(issn_title_badascii, function(z) length(z) == 1, logical(1)))
# issn_title[[non_asciis[1]]]
# for (i in seq_along(issn_title)) {
# if (i %in% non_asciis) {
# issn_title[[i]] <- stringi::stri_escape_unicode(issn_title[[i]])
# }
# }
# issn_title2 <- issn_title
# save(issn_title, file = "data/issn_title.rda", compress = "xz")