/
epmc_details.r
140 lines (137 loc) · 4.29 KB
/
epmc_details.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
#' Get details for individual records
#'
#' This function returns parsed metadata for a given publication ID
#' including abstract, full text links, author details including ORCID and affiliation,
#' MeSH terms, chemicals, grants.
#'
#' @param ext_id character, publication identifier
#' @param data_src character, data source, by default Pubmed/MedLine index will
#' be searched.
#' Other sources Europe PubMed Central supports are:
#' \describe{
#' \item{agr}{Agricola is a bibliographic database of citations to the
#' agricultural literature created by the US National Agricultural Library
#' and its co-operators.}
#' \item{cba}{Chinese Biological Abstracts}
#' \item{ctx}{CiteXplore}
#' \item{eth}{EthOs Theses, i.e. PhD theses (British Library)}
#' \item{hir}{NHS Evidence}
#' \item{med}{PubMed/Medline NLM}
#' \item{pat}{Biological Patents}
#' \item{pmc}{PubMed Central}
#' \item{ppr}{Preprint records}
#' }
#'
#' @return list of data frames
#' @export
#'
#' @examples
#' \dontrun{
#' epmc_details(ext_id = "26980001")
#' epmc_details(ext_id = "24270414")
#'
#' # PMC record
#' epmc_details(ext_id = "PMC4747116", data_src = "pmc")
#'
#' # Other sources:
#' # Agricolo
#' epmc_details("IND43783977", data_src = "agr")
#' # Biological Patents
#' epmc_details("EP2412369", data_src = "pat")
#' # Chinese Biological Abstracts
#' epmc_details("583843", data_src = "cba")
#' # CiteXplore
#' epmc_details("C6802", data_src = "ctx")
#' # NHS Evidence
#' epmc_details("338638", data_src = "hir")
#' # Theses
#' epmc_details("409323", data_src = "eth")
#' # Preprint
#' epmc_details("PPR158112", data_src = "ppr")
#' }
epmc_details <- function(ext_id = NULL, data_src = "med") {
if (is.null(ext_id))
stop("Please provide a publication id")
if (!tolower(data_src) %in% supported_data_src)
stop(
paste0(
"Data source '",
data_src,
"' not supported. Try one of the
following sources: ",
paste0(supported_data_src, collapse = ", ")
)
)
# build request
path <- paste0(rest_path(), "/search")
if (data_src == "pmc") {
q <- list(
query = paste0("PMCID:", ext_id),
format = "json",
resulttype = "core"
)
} else {
q <- list(
query = toupper(paste0("ext_id:", ext_id, "%20src:", data_src)),
format = "json",
resulttype = "core"
)
}
req <- rebi_GET(path = path, query = q)
doc <- jsonlite::fromJSON(req)
if (doc$hitCount == 0) {
message("nothing found, please check your query")
NULL
} else {
res <- doc$resultList$result
out <- list(
basic = res[,!names(res) %in% fix_list(res)],
author_details = parse_aut(res),
journal_info = parse_jn(res),
ftx = plyr::rbind.fill(res$fullTextUrlList$fullTextUrl),
chemical = plyr::rbind.fill(res$chemicalList$chemical),
mesh_topic = plyr::rbind.fill(res$meshHeadingList$meshHeading)[-3],
mesh_qualifiers = get_mesh_subheadings(res = res),
comments = plyr::rbind.fill(res$commentCorrectionList$commentCorrection),
grants = plyr::rbind.fill(res$grantsList$grant)
)
lapply(out, tibble::as_tibble)
}
}
#' parsing MeSH subheadings to be returned as data.frame
#' @param res json results node
#' @noRd
get_mesh_subheadings <- function(res) {
# no mesh
if (is.null(res$meshHeadingList$meshHeading))
return(NULL)
# no mesh qualifiers in nested list
if (ncol(res$meshHeadingList$meshHeading[[1]]) == 2)
return(NULL)
mesh_qualifier <-
res$meshHeadingList$meshHeading[[1]][3]$meshQualifierList$meshQualifier
names(mesh_qualifier) <-
unlist(res$meshHeadingList$meshHeading[[1]][2])
dplyr::bind_rows(plyr::compact(mesh_qualifier), .id = "descriptorName")
}
#' get nested author infos
#' @param res json results node
#' @noRd
parse_aut <- function(res) {
if (!is.null(res$authorList$author)) {
out <- plyr::rbind.fill(res$authorList$author) %>%
jsonlite::flatten()
} else {
out <- tibble::tibble()
}
if("authorAffiliationDetailsList.authorAffiliation" %in% colnames(out))
out <- tidyr::unnest(out, cols = c("authorAffiliationDetailsList.authorAffiliation"))
return(out)
}
#' get nested journal info
#' @param res json results node
#' @noRd
parse_jn <- function(res) {
if (!is.null(res$journalInfo))
jsonlite::flatten(res$journalInfo)
}