Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Added dontrun's to examples

  • Loading branch information...
commit 814ffa02976e541981364ae0c074724c6dd1d07d 1 parent d961fec
@DASpringate DASpringate authored
View
1  DESCRIPTION
@@ -20,3 +20,4 @@ Collate:
'rpubmed_io.R'
'rpubmed_textsearch.R'
'rpubmed_locations.R'
+ 'rpubmed_mesh.R'
View
1  NAMESPACE
@@ -5,6 +5,7 @@ export(geocode_address)
export(geocode_addresses)
export(get_article_location_data)
export(get_articles_by_terms)
+export(get_mesh_headings)
export(pubmed_fetch)
export(record_counts_by_year)
export(write_JSON_file)
View
129 R/rpubmed_fetch.R
@@ -1,66 +1,63 @@
-#equire(XML)
-#require(RCurl)
-
-#' Downloads abstracts and Metadata from Pubmed, storing as R objects
-#' Splits large id vectors into a list of smaller chunks, so as not to hammer the entrez server!
-#' If you are making large bulk downloads, consider setting a delay so the downloading starts at off-peak USA times.
-#'
-#'
-#' @export
-#' @import XML RCurl
-#' @param ids integer Pubmed ID's to get abstracts and metadata from
-#' @param chunk_size Number of articles to be pulled with each call to pubmed_fetch (optional)
-#' @param delay Integer Number of hours to wait before downloading starts
-#' @param \dots character Additional terms to add to the request
-#' @return list containing abstratcs and metadata for each ID
-#' @examples
-#'
-#' # Get IDs via rentrez_search:
-#' plasticity_ids <- entrez_search("pubmed", "phenotypic plasticity", retmax = 2600)$ids
-#' plasticity_records <- fetch_in_chunks(plasticity_ids)
-#'
-
-
-fetch_in_chunks <- function(ids, chunk_size = 500, delay = 0, ...){
- Sys.sleep(delay * 3600) # Wait for appropriate time for the server.
- chunks <- chunker(ids, chunk_size)
- Reduce(append, lapply(chunks, function(x) pubmed_fetch(x, ...)))
-}
-
-#' Download data from Pubmed
-#'
-#'
-#'
-#' @export
-#' @param ids integer Pubmed ID's to get abstracts and metadata from
-#' @param file_format character Format in which to get data (eg, fasta, xml...) default = "xml"
-#' @param as_r_object boolean if TRUE, parses returned xml to R objects (nested lists), else returns xml
-#' @param \dots character Additional terms to add to the request
-#' @return list or character string containing abstratcs and metadata for each ID (see as_r_object)
-#' @examples
-#'
-#' # Get IDs via rentrez_search:
-#' plasticity_ids <- entrez_search("pubmed", "phenotypic plasticity", retmax = 2600)$ids[1:100]
-#' plasticity_records <- pubmed_fetch(plasticity_ids)
-#'
-pubmed_fetch <- function(ids, file_format = "xml", as_r_object = TRUE, ...){
-
- args <- c(id = paste(ids, collapse = ","), db = "pubmed", rettype = file_format,
- email = entrez_email, tool = entrez_tool, ...)
-
- url_args <- paste(paste(names(args), args, sep="="), collapse = "&")
- base_url <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=full"
- url_string <- paste(base_url, url_args, sep = "&")
- records <- getURL(url_string)
- #NCBI limits requests to three per second
- Sys.sleep(0.33)
- if(as_r_object){
- return(xmlToList(xmlTreeParse(records, useInternalNodes = TRUE)))
- } else return(records)
-}
-
-#' Helper function to split a vector v into list of chunks of chunk_size
-chunker <- function(v, chunk_size){
- split(v, ceiling(seq_along(v)/chunk_size))
-}
-
+#' Downloads abstracts and Metadata from Pubmed, storing as R objects
+#' Splits large id vectors into a list of smaller chunks, so as not to hammer the entrez server!
+#' If you are making large bulk downloads, consider setting a delay so the downloading starts at off-peak USA times.
+#'
+#'
+#' @export
+#' @import XML RCurl
+#' @param ids integer Pubmed ID's to get abstracts and metadata from
+#' @param chunk_size Number of articles to be pulled with each call to pubmed_fetch (optional)
+#' @param delay Integer Number of hours to wait before downloading starts
+#' @param \dots character Additional terms to add to the request
+#' @return list containing abstratcs and metadata for each ID
+#' @examples \dontrun{
+#' # Get IDs via rentrez_search:
+#' plasticity_ids <- entrez_search("pubmed", "phenotypic plasticity", retmax = 2600)$ids
+#' plasticity_records <- fetch_in_chunks(plasticity_ids)
+#' }
+
+
+
+fetch_in_chunks <- function(ids, chunk_size = 500, delay = 0, ...){
+ Sys.sleep(delay * 3600) # Wait for appropriate time for the server.
+ chunks <- chunker(ids, chunk_size)
+ Reduce(append, lapply(chunks, function(x) pubmed_fetch(x, ...)))
+}
+
+#' Download data from Pubmed
+#'
+#'
+#'
+#' @export
+#' @param ids integer Pubmed ID's to get abstracts and metadata from
+#' @param file_format character Format in which to get data (eg, fasta, xml...) default = "xml"
+#' @param as_r_object boolean if TRUE, parses returned xml to R objects (nested lists), else returns xml
+#' @param \dots character Additional terms to add to the request
+#' @return list or character string containing abstratcs and metadata for each ID (see as_r_object)
+#' @examples \dontrun{
+#' # Get IDs via rentrez_search:
+#' plasticity_ids <- entrez_search("pubmed", "phenotypic plasticity", retmax = 2600)$ids[1:100]
+#' plasticity_records <- pubmed_fetch(plasticity_ids)
+#' }
+
+pubmed_fetch <- function(ids, file_format = "xml", as_r_object = TRUE, ...){
+
+ args <- c(id = paste(ids, collapse = ","), db = "pubmed", rettype = file_format,
+ email = entrez_email, tool = entrez_tool, ...)
+
+ url_args <- paste(paste(names(args), args, sep="="), collapse = "&")
+ base_url <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=full"
+ url_string <- paste(base_url, url_args, sep = "&")
+ records <- getURL(url_string)
+ #NCBI limits requests to three per second
+ Sys.sleep(0.33)
+ if(as_r_object){
+ return(xmlToList(xmlTreeParse(records, useInternalNodes = TRUE)))
+ } else return(records)
+}
+
+#' Helper function to split a vector v into list of chunks of chunk_size
+chunker <- function(v, chunk_size){
+ split(v, ceiling(seq_along(v)/chunk_size))
+}
+
View
192 R/rpubmed_locations.R
@@ -1,96 +1,96 @@
-# Tools for geocoding addresses affiliated with Pubmed Records
-# Geocoder still needs some work - Not a good enough hit rate...
-
-#' Returns a data frame of geocoded addresses with longitude and latitudes
-#' Uses the Google Maps geocode API
-#' @export
-#' @param addresses A character vector of addresses for geocoding
-#' @param sleeper numeric Number of seconds between calls to the geocoding server
-#' @param depth integer recursion depth for attempting to get coordinates. If the full address fails to get a hit, the function is called again with the first line of the address removed. The process is repeated depth times before returning NAs
-#' @return data frame of addresses, latitudes and longitudes
-#' @examples
-#'
-#' # get a list of articles pulled from pubmed:
-#' abstracts <- fromJSON("Test/plasticity_abstracts.json")
-#'
-#' # Extract affiliated addresses from article metadata:
-#' affil_addresses <- get_article_location_data(abstracts)
-#'
-#' # Get coordinates:
-#' coords <- geocode_addresses(affil_addresses, depth = 4)
-#'
-#' # plot coordinates on a map:
-#'
-#' map("world", col="#f2f2f2", fill=TRUE, bg="white", lwd=0.05)
-#' points(coords$long, coords$lat, col = "red", pch = 20)
-#'
-
-geocode_addresses <- function(addresses, sleeper = 0.33, depth = 3){
- coords <- t(sapply(addresses,
- function(addr){
- as.numeric(geocode_address(addr, depth = depth))
- }))
- data.frame(address = row.name(coords), lat = coords[,1], long = coords[,2])
-}
-
-
-#' Extracts addresses of affiliated departments from Pubmed metadata
-#' email addresses are cleaned out.
-#' @export
-#' @param abstracts A list of Pubmed records. e.g. from fetch_in_chunks()
-#' @return character vector of addresses
-#' @examples
-#' # Extract affiliated addresses from article metadata:
-#' affil_addresses <- get_article_location_data(abstracts)
-#'
-#'
-get_article_location_data <- function(abstracts){
- raw_locations <- as.character(do.call(rbind,
- lapply(abstracts,
- function(x) x$MedlineCitation$Article$Affiliation)))
- locations <- gsub(pattern= "[[:alnum:][:punct:]]+@+[[:alnum:][:punct:]]+", "", raw_locations)
- locations
-}
-
-#' Function to get coordinates from a supplied address
-#' If no match is found, it recursively calls itself on the address minus the first line of the address
-#' @export
-#' @param address string
-#' @param depth depth integer recursion depth for attempting to get coordinates. If the full address fails to get a hit, the function is called again with the first line of the address removed. The process is repeated depth times before returning NAs
-#' @return vector of address, lat, long
-#'
-#' @examples
-#'
-#' x <- "Rothamsted Research, Harpenden, Herts AL5 2JQ, UK."
-#' geocode_address(x)
-#'
-geocode_address <- function(address, depth = 3){
- coords <- geocode(address)
- if(!is.null(names(coords)) & is.na(coords[1]) & depth > 0){
- address <- sub(pattern="[[:alnum:][:punct:][:space:]][^,]*, ?", "", address)
- return(get_geocode(address, depth = depth -1))
- }
- coords
-}
-
-
-#' Helper function for geocode_address
-geocode <- function(address){
- gcStr <- gsub(' ','%20', address) #Encode URL Parameters
- #Open Connection
- connectStr <- paste('http://maps.google.com/maps/api/geocode/json?sensor=false&address=',gcStr, sep="")
- con <- url(connectStr)
- tryCatch({
- data.json <- fromJSON(paste(readLines(con), collapse=""))
- close(con)
- #Flatten the received JSON
- data.json <- unlist(data.json)
- lat <- data.json["results.geometry.location.lat"]
- lng <- data.json["results.geometry.location.lng"]
- gcodes <- c(lat, lng)
- names(gcodes) <- c("Lat", "Lng")
- #print(paste(address, gcodes$Lat, gcodes$Lng))
- return (gcodes)
- }, error = function(e) return(c(NA,NA)))
-}
-
+# Tools for geocoding addresses affiliated with Pubmed Records
+# Geocoder still needs some work - Not a good enough hit rate...
+
+#' Returns a data frame of geocoded addresses with longitude and latitudes
+#' Uses the Google Maps geocode API
+#' @export
+#' @param addresses A character vector of addresses for geocoding
+#' @param sleeper numeric Number of seconds between calls to the geocoding server
+#' @param depth integer recursion depth for attempting to get coordinates. If the full address fails to get a hit, the function is called again with the first line of the address removed. The process is repeated depth times before returning NAs
+#' @return data frame of addresses, latitudes and longitudes
+#' @examples \dontrun{
+#' # get a list of articles pulled from pubmed:
+#' abstracts <- fromJSON("Test/plasticity_abstracts.json")
+#'
+#' # Extract affiliated addresses from article metadata:
+#' affil_addresses <- get_article_location_data(abstracts)
+#'
+#' # Get coordinates:
+#' coords <- geocode_addresses(affil_addresses, depth = 4)
+#'
+#' # plot coordinates on a map:
+#'
+#' map("world", col="#f2f2f2", fill=TRUE, bg="white", lwd=0.05)
+#' points(coords$long, coords$lat, col = "red", pch = 20)
+#' }
+
+
+geocode_addresses <- function(addresses, sleeper = 0.33, depth = 3){
+ coords <- t(sapply(addresses,
+ function(addr){
+ as.numeric(geocode_address(addr, depth = depth))
+ }))
+ data.frame(address = row.name(coords), lat = coords[,1], long = coords[,2])
+}
+
+
+#' Extracts addresses of affiliated departments from Pubmed metadata
+#' email addresses are cleaned out.
+#' @export
+#' @param abstracts A list of Pubmed records. e.g. from fetch_in_chunks()
+#' @return character vector of addresses
+#' @examples \dontrun{
+#' # Extract affiliated addresses from article metadata:
+#' affil_addresses <- get_article_location_data(abstracts)
+#' }
+
+get_article_location_data <- function(abstracts){
+ raw_locations <- as.character(do.call(rbind,
+ lapply(abstracts,
+ function(x) x$MedlineCitation$Article$Affiliation)))
+ locations <- gsub(pattern= "[[:alnum:][:punct:]]+@+[[:alnum:][:punct:]]+", "", raw_locations)
+ locations
+}
+
+#' Function to get coordinates from a supplied address
+#' If no match is found, it recursively calls itself on the address minus the first line of the address
+#' @export
+#' @param address string
+#' @param depth depth integer recursion depth for attempting to get coordinates. If the full address fails to get a hit, the function is called again with the first line of the address removed. The process is repeated depth times before returning NAs
+#' @return vector of address, lat, long
+#'
+#' @examples \dontrun{
+#' x <- "Rothamsted Research, Harpenden, Herts AL5 2JQ, UK."
+#' geocode_address(x)
+#' }
+
+geocode_address <- function(address, depth = 3){
+ coords <- geocode(address)
+ if(!is.null(names(coords)) & is.na(coords[1]) & depth > 0){
+ address <- sub(pattern="[[:alnum:][:punct:][:space:]][^,]*, ?", "", address)
+ return(get_geocode(address, depth = depth -1))
+ }
+ coords
+}
+
+
+#' Helper function for geocode_address
+geocode <- function(address){
+ gcStr <- gsub(' ','%20', address) #Encode URL Parameters
+ #Open Connection
+ connectStr <- paste('http://maps.google.com/maps/api/geocode/json?sensor=false&address=',gcStr, sep="")
+ con <- url(connectStr)
+ tryCatch({
+ data.json <- fromJSON(paste(readLines(con), collapse=""))
+ close(con)
+ #Flatten the received JSON
+ data.json <- unlist(data.json)
+ lat <- data.json["results.geometry.location.lat"]
+ lng <- data.json["results.geometry.location.lng"]
+ gcodes <- c(lat, lng)
+ names(gcodes) <- c("Lat", "Lng")
+ #print(paste(address, gcodes$Lat, gcodes$Lng))
+ return (gcodes)
+ }, error = function(e) return(c(NA,NA)))
+}
+
View
213 R/rpubmed_textsearch.R
@@ -1,106 +1,107 @@
-#' Returns a list of articles matching the termlist
-#' items in the termlist can be strings or character vectors, concatenated to an "or" regex
-#' e.g list(c("gprd", "diabetes")) returns all articles mentioning either gprd or diabetes.
-#' different items in the list recursively filter the list
-#' e.g. list("gprd", "diabetes") returns articles mentioning gprd and diabtes
-#'
-#' @export
-#' @param corpus list of downloaded Pubmed records, e.g. from rpubmed_fetch_in_chunks
-#' @param term_list list of character vectors giving the search terms. list elements are searched for reductively (using &). Elements of internal charater vectors are combined into 'or' terms
-#' @param where A predicate function referring to a search in an area of the record. Choose from in_abstract_p, in_mesh_p or in_mesh_abstract_p
-#' @param case_sensitive boolean is the search case sensitive?
-#' @return list containing abstracts and metadata for each ID matching the search criteria
-#' @examples
-#'
-#' plasticity_records <- fetch_in_chunks(plasticity_ids)
-#'
-#' # Search for articles with "plant" and "fish" in the abstract
-#' get_articles_by_terms(plasticity_records, list("plant", "fish"), where = in_abstract_p)
-#'
-#' # Search for records with "plant" or "fish" in the abstract or MeSH headings:
-#' get_articles_by_terms(plasticity_records, list(c("plant", "fish")), where = in_mesh_abstract_p)
-
-
-get_articles_by_terms <- function(corpus, term_list, where, case_sensitive = FALSE){
- corpus <- lapply(corpus,
- function(article){
- if(where(article, term_list[[1]], case_sensitive)) article
- })
- corpus <- corpus[!sapply(corpus, is.null)]
- if(length(term_list) <= 1){
- return(corpus)
- } else {
- return(get_articles_by_terms(corpus, term_list[2:length(term_list)], where = where))
- }
-}
-
-#' Gives a breakdown of records per year in a corpus of Pubmed Records
-#'
-#' @export
-#' @param corpus a list of Pubmed records e.g. as given by fetch_in_chunks()
-#' @param year_min integer representing the earliest year to be included in the counts
-#' @param year_max integer representing the latest year to be included in the counts. Frequencies are calculated after the dataset is truncated.
-#' @return dataframe with year, records and freq columns
-#'
-#'
-record_counts_by_year <- function(corpus, year_min = FALSE, year_max = FALSE){
- years_table <- table(sapply(corpus,
- function(x) as.numeric(x$PubmedData$History$PubMedPubDate$Year),
- simplify = TRUE))
- years_df <- data.frame(years_table)
- names(years_df) <- c("year", "records")
- years_df$year <- as.integer(as.character(years_df$year))
- if(is.numeric(year_min)) years_df <- years_df[years_df$year >= year_min,]
- if(is.numeric(year_max)) years_df <- years_df[years_df$year <= year_max,]
- years_df$freq <- with(years_df, years_df$records / sum(years_df$records))
- years_df
-}
-
-
-# Helper functions:
-
-#' concatenates abstract list to a single string
-abstract_to_text <- function(article){
- paste(unlist(article$MedlineCitation$Article$Abstract), collapse = " ")
-}
-
-#' concatenates a list of MeSH headings to a single string
-mesh_to_text <- function(article){
- paste(unlist(get_mesh_headings(article)), collapse = " ")
-}
-
-
-#' predicate function for presence of a term in an article text
-term_in_text_p <- function(term, text, case_sensitive){
- ifelse(length(grep(pattern = term, x = text, ignore.case = !case_sensitive)), TRUE, FALSE)
-}
-
-#' predicate function for searching abstracts
-in_abstract_p <- function(article, terms, case_sensitive = FALSE){
- # are terms found in the abstract body?
- pattern <- paste(terms, collapse = "|")
- term_in_text_p(term = pattern,
- text = abstract_to_text(article), case_sensitive)
-}
-
-#' predicate function for searching MeSH headings
-in_mesh_headings_p <- function(article, terms, case_sensitive = FALSE){
- # Are terms found in the mesh headings?
- pattern <- paste(terms, collapse = "|")
- mesh <- mesh_to_text(article)
- term_in_text_p(term = pattern,
- text = mesh, case_sensitive)
-}
-
-#' predicate function for searching abstracts and MeSH headings
-in_mesh_abstract_p <- function(article, terms, case_sensitive = FALSE){
- # Are terms found in the mesh headings?
- pattern <- paste(terms, collapse = "|")
- mesh_article <- paste(mesh_to_text(article), abstract_to_text(article))
- term_in_text_p(term = pattern,
- text = mesh_article, case_sensitive)
-}
-
-
-
-
+#' Returns a list of articles matching the termlist
+#' items in the termlist can be strings or character vectors, concatenated to an "or" regex
+#' e.g list(c("gprd", "diabetes")) returns all articles mentioning either gprd or diabetes.
+#' different items in the list recursively filter the list
+#' e.g. list("gprd", "diabetes") returns articles mentioning gprd and diabtes
+#'
+#' @export
+#' @param corpus list of downloaded Pubmed records, e.g. from rpubmed_fetch_in_chunks
+#' @param term_list list of character vectors giving the search terms. list elements are searched for reductively (using &). Elements of internal charater vectors are combined into 'or' terms
+#' @param where A predicate function referring to a search in an area of the record. Choose from in_abstract_p, in_mesh_p or in_mesh_abstract_p
+#' @param case_sensitive boolean is the search case sensitive?
+#' @return list containing abstracts and metadata for each ID matching the search criteria
+#' @examples \dontrun{
+#' plasticity_records <- fetch_in_chunks(plasticity_ids)
+#'
+#' # Search for articles with "plant" and "fish" in the abstract
+#' get_articles_by_terms(plasticity_records, list("plant", "fish"), where = in_abstract_p)
+#'
+#' # Search for records with "plant" or "fish" in the abstract or MeSH headings:
+#' get_articles_by_terms(plasticity_records, list(c("plant", "fish")), where = in_mesh_abstract_p)
+#' }
+
+
+
+get_articles_by_terms <- function(corpus, term_list, where, case_sensitive = FALSE){
+ corpus <- lapply(corpus,
+ function(article){
+ if(where(article, term_list[[1]], case_sensitive)) article
+ })
+ corpus <- corpus[!sapply(corpus, is.null)]
+ if(length(term_list) <= 1){
+ return(corpus)
+ } else {
+ return(get_articles_by_terms(corpus, term_list[2:length(term_list)], where = where))
+ }
+}
+
+#' Gives a breakdown of records per year in a corpus of Pubmed Records
+#'
+#' @export
+#' @param corpus a list of Pubmed records e.g. as given by fetch_in_chunks()
+#' @param year_min integer representing the earliest year to be included in the counts
+#' @param year_max integer representing the latest year to be included in the counts. Frequencies are calculated after the dataset is truncated.
+#' @return dataframe with year, records and freq columns
+#'
+#'
+record_counts_by_year <- function(corpus, year_min = FALSE, year_max = FALSE){
+ years_table <- table(sapply(corpus,
+ function(x) as.numeric(x$PubmedData$History$PubMedPubDate$Year),
+ simplify = TRUE))
+ years_df <- data.frame(years_table)
+ names(years_df) <- c("year", "records")
+ years_df$year <- as.integer(as.character(years_df$year))
+ if(is.numeric(year_min)) years_df <- years_df[years_df$year >= year_min,]
+ if(is.numeric(year_max)) years_df <- years_df[years_df$year <= year_max,]
+ years_df$freq <- with(years_df, years_df$records / sum(years_df$records))
+ years_df
+}
+
+
+# Helper functions:
+
+#' concatenates abstract list to a single string
+abstract_to_text <- function(article){
+ paste(unlist(article$MedlineCitation$Article$Abstract), collapse = " ")
+}
+
+#' concatenates a list of MeSH headings to a single string
+mesh_to_text <- function(article){
+ paste(unlist(get_mesh_headings(article)), collapse = " ")
+}
+
+
+#' predicate function for presence of a term in an article text
+term_in_text_p <- function(term, text, case_sensitive){
+ ifelse(length(grep(pattern = term, x = text, ignore.case = !case_sensitive)), TRUE, FALSE)
+}
+
+#' predicate function for searching abstracts
+in_abstract_p <- function(article, terms, case_sensitive = FALSE){
+ # are terms found in the abstract body?
+ pattern <- paste(terms, collapse = "|")
+ term_in_text_p(term = pattern,
+ text = abstract_to_text(article), case_sensitive)
+}
+
+#' predicate function for searching MeSH headings
+in_mesh_headings_p <- function(article, terms, case_sensitive = FALSE){
+ # Are terms found in the mesh headings?
+ pattern <- paste(terms, collapse = "|")
+ mesh <- mesh_to_text(article)
+ term_in_text_p(term = pattern,
+ text = mesh, case_sensitive)
+}
+
+#' predicate function for searching abstracts and MeSH headings
+in_mesh_abstract_p <- function(article, terms, case_sensitive = FALSE){
+ # Are terms found in the mesh headings?
+ pattern <- paste(terms, collapse = "|")
+ mesh_article <- paste(mesh_to_text(article), abstract_to_text(article))
+ term_in_text_p(term = pattern,
+ text = mesh_article, case_sensitive)
+}
+
+
+
+
View
76 man/fetch_in_chunks.Rd
@@ -1,37 +1,39 @@
-\name{fetch_in_chunks}
-\alias{fetch_in_chunks}
-\title{Downloads abstracts and Metadata from Pubmed, storing as R objects
-Splits large id vectors into a list of smaller chunks, so as not to hammer the entrez server!
-If you are making large bulk downloads, consider setting a delay so the downloading starts at off-peak USA times.}
-\usage{
- fetch_in_chunks(ids, chunk_size = 500, delay = 0, ...)
-}
-\arguments{
- \item{ids}{integer Pubmed ID's to get abstracts and
- metadata from}
-
- \item{chunk_size}{Number of articles to be pulled with
- each call to pubmed_fetch (optional)}
-
- \item{delay}{Integer Number of hours to wait before
- downloading starts}
-
- \item{\dots}{character Additional terms to add to the
- request}
-}
-\value{
- list containing abstratcs and metadata for each ID
-}
-\description{
- Downloads abstracts and Metadata from Pubmed, storing as
- R objects Splits large id vectors into a list of smaller
- chunks, so as not to hammer the entrez server! If you are
- making large bulk downloads, consider setting a delay so
- the downloading starts at off-peak USA times.
-}
-\examples{
-# Get IDs via rentrez_search:
-plasticity_ids <- entrez_search("pubmed", "phenotypic plasticity", retmax = 2600)$ids
-plasticity_records <- fetch_in_chunks(plasticity_ids)
-}
-
+\name{fetch_in_chunks}
+\alias{fetch_in_chunks}
+\title{Downloads abstracts and Metadata from Pubmed, storing as R objects
+Splits large id vectors into a list of smaller chunks, so as not to hammer the entrez server!
+If you are making large bulk downloads, consider setting a delay so the downloading starts at off-peak USA times.}
+\usage{
+ fetch_in_chunks(ids, chunk_size = 500, delay = 0, ...)
+}
+\arguments{
+ \item{ids}{integer Pubmed ID's to get abstracts and
+ metadata from}
+
+ \item{chunk_size}{Number of articles to be pulled with
+ each call to pubmed_fetch (optional)}
+
+ \item{delay}{Integer Number of hours to wait before
+ downloading starts}
+
+ \item{\dots}{character Additional terms to add to the
+ request}
+}
+\value{
+ list containing abstratcs and metadata for each ID
+}
+\description{
+ Downloads abstracts and Metadata from Pubmed, storing as
+ R objects Splits large id vectors into a list of smaller
+ chunks, so as not to hammer the entrez server! If you are
+ making large bulk downloads, consider setting a delay so
+ the downloading starts at off-peak USA times.
+}
+\examples{
+\dontrun{
+ # Get IDs via rentrez_search:
+ plasticity_ids <- entrez_search("pubmed", "phenotypic plasticity", retmax = 2600)$ids
+ plasticity_records <- fetch_in_chunks(plasticity_ids)
+}
+}
+
View
60 man/geocode_address.Rd
@@ -1,29 +1,31 @@
-\name{geocode_address}
-\alias{geocode_address}
-\title{Function to get coordinates from a supplied address
-If no match is found, it recursively calls itself on the address minus the first line of the address}
-\usage{
- geocode_address(address, depth = 3)
-}
-\arguments{
- \item{address}{string}
-
- \item{depth}{depth integer recursion depth for attempting
- to get coordinates. If the full address fails to get a
- hit, the function is called again with the first line of
- the address removed. The process is repeated depth times
- before returning NAs}
-}
-\value{
- vector of address, lat, long
-}
-\description{
- Function to get coordinates from a supplied address If no
- match is found, it recursively calls itself on the
- address minus the first line of the address
-}
-\examples{
-x <- "Rothamsted Research, Harpenden, Herts AL5 2JQ, UK."
-geocode_address(x)
-}
-
+\name{geocode_address}
+\alias{geocode_address}
+\title{Function to get coordinates from a supplied address
+If no match is found, it recursively calls itself on the address minus the first line of the address}
+\usage{
+ geocode_address(address, depth = 3)
+}
+\arguments{
+ \item{address}{string}
+
+ \item{depth}{depth integer recursion depth for attempting
+ to get coordinates. If the full address fails to get a
+ hit, the function is called again with the first line of
+ the address removed. The process is repeated depth times
+ before returning NAs}
+}
+\value{
+ vector of address, lat, long
+}
+\description{
+ Function to get coordinates from a supplied address If no
+ match is found, it recursively calls itself on the
+ address minus the first line of the address
+}
+\examples{
+\dontrun{
+x <- "Rothamsted Research, Harpenden, Herts AL5 2JQ, UK."
+geocode_address(x)
+}
+}
+
View
88 man/geocode_addresses.Rd
@@ -1,43 +1,45 @@
-\name{geocode_addresses}
-\alias{geocode_addresses}
-\title{Returns a data frame of geocoded addresses with longitude and latitudes
-Uses the Google Maps geocode API}
-\usage{
- geocode_addresses(addresses, sleeper = 0.33, depth = 3)
-}
-\arguments{
- \item{addresses}{A character vector of addresses for
- geocoding}
-
- \item{sleeper}{numeric Number of seconds between calls to
- the geocoding server}
-
- \item{depth}{integer recursion depth for attempting to
- get coordinates. If the full address fails to get a hit,
- the function is called again with the first line of the
- address removed. The process is repeated depth times
- before returning NAs}
-}
-\value{
- data frame of addresses, latitudes and longitudes
-}
-\description{
- Returns a data frame of geocoded addresses with longitude
- and latitudes Uses the Google Maps geocode API
-}
-\examples{
-# get a list of articles pulled from pubmed:
-abstracts <- fromJSON("Test/plasticity_abstracts.json")
-
-# Extract affiliated addresses from article metadata:
-affil_addresses <- get_article_location_data(abstracts)
-
-# Get coordinates:
-coords <- geocode_addresses(affil_addresses, depth = 4)
-
-# plot coordinates on a map:
-
-map("world", col="#f2f2f2", fill=TRUE, bg="white", lwd=0.05)
-points(coords$long, coords$lat, col = "red", pch = 20)
-}
-
+\name{geocode_addresses}
+\alias{geocode_addresses}
+\title{Returns a data frame of geocoded addresses with longitude and latitudes
+Uses the Google Maps geocode API}
+\usage{
+ geocode_addresses(addresses, sleeper = 0.33, depth = 3)
+}
+\arguments{
+ \item{addresses}{A character vector of addresses for
+ geocoding}
+
+ \item{sleeper}{numeric Number of seconds between calls to
+ the geocoding server}
+
+ \item{depth}{integer recursion depth for attempting to
+ get coordinates. If the full address fails to get a hit,
+ the function is called again with the first line of the
+ address removed. The process is repeated depth times
+ before returning NAs}
+}
+\value{
+ data frame of addresses, latitudes and longitudes
+}
+\description{
+ Returns a data frame of geocoded addresses with longitude
+ and latitudes Uses the Google Maps geocode API
+}
+\examples{
+\dontrun{
+ # get a list of articles pulled from pubmed:
+abstracts <- fromJSON("Test/plasticity_abstracts.json")
+
+# Extract affiliated addresses from article metadata:
+affil_addresses <- get_article_location_data(abstracts)
+
+# Get coordinates:
+coords <- geocode_addresses(affil_addresses, depth = 4)
+
+# plot coordinates on a map:
+
+map("world", col="#f2f2f2", fill=TRUE, bg="white", lwd=0.05)
+points(coords$long, coords$lat, col = "red", pch = 20)
+}
+}
+
View
48 man/get_article_location_data.Rd
@@ -1,23 +1,25 @@
-\name{get_article_location_data}
-\alias{get_article_location_data}
-\title{Extracts addresses of affiliated departments from Pubmed metadata
-email addresses are cleaned out.}
-\usage{
- get_article_location_data(abstracts)
-}
-\arguments{
- \item{abstracts}{A list of Pubmed records. e.g. from
- fetch_in_chunks()}
-}
-\value{
- character vector of addresses
-}
-\description{
- Extracts addresses of affiliated departments from Pubmed
- metadata email addresses are cleaned out.
-}
-\examples{
-# Extract affiliated addresses from article metadata:
-affil_addresses <- get_article_location_data(abstracts)
-}
-
+\name{get_article_location_data}
+\alias{get_article_location_data}
+\title{Extracts addresses of affiliated departments from Pubmed metadata
+email addresses are cleaned out.}
+\usage{
+ get_article_location_data(abstracts)
+}
+\arguments{
+ \item{abstracts}{A list of Pubmed records. e.g. from
+ fetch_in_chunks()}
+}
+\value{
+ character vector of addresses
+}
+\description{
+ Extracts addresses of affiliated departments from Pubmed
+ metadata email addresses are cleaned out.
+}
+\examples{
+\dontrun{
+# Extract affiliated addresses from article metadata:
+affil_addresses <- get_article_location_data(abstracts)
+}
+}
+
View
102 man/get_articles_by_terms.Rd
@@ -1,50 +1,52 @@
-\name{get_articles_by_terms}
-\alias{get_articles_by_terms}
-\title{Returns a list of articles matching the termlist
-items in the termlist can be strings or character vectors, concatenated to an "or" regex
-e.g list(c("gprd", "diabetes")) returns all articles mentioning either gprd or diabetes.
-different items in the list recursively filter the list
-e.g. list("gprd", "diabetes") returns articles mentioning gprd and diabtes}
-\usage{
- get_articles_by_terms(corpus, term_list, where,
- case_sensitive = FALSE)
-}
-\arguments{
- \item{corpus}{list of downloaded Pubmed records, e.g.
- from rpubmed_fetch_in_chunks}
-
- \item{term_list}{list of character vectors giving the
- search terms. list elements are searched for reductively
- (using &). Elements of internal charater vectors are
- combined into 'or' terms}
-
- \item{where}{A predicate function referring to a search
- in an area of the record. Choose from in_abstract_p,
- in_mesh_p or in_mesh_abstract_p}
-
- \item{case_sensitive}{boolean is the search case
- sensitive?}
-}
-\value{
- list containing abstracts and metadata for each ID
- matching the search criteria
-}
-\description{
- Returns a list of articles matching the termlist items in
- the termlist can be strings or character vectors,
- concatenated to an "or" regex e.g list(c("gprd",
- "diabetes")) returns all articles mentioning either gprd
- or diabetes. different items in the list recursively
- filter the list e.g. list("gprd", "diabetes") returns
- articles mentioning gprd and diabtes
-}
-\examples{
-plasticity_records <- fetch_in_chunks(plasticity_ids)
-
-# Search for articles with "plant" and "fish" in the abstract
-get_articles_by_terms(plasticity_records, list("plant", "fish"), where = in_abstract_p)
-
-# Search for records with "plant" or "fish" in the abstract or MeSH headings:
-get_articles_by_terms(plasticity_records, list(c("plant", "fish")), where = in_mesh_abstract_p)
-}
-
+\name{get_articles_by_terms}
+\alias{get_articles_by_terms}
+\title{Returns a list of articles matching the termlist
+items in the termlist can be strings or character vectors, concatenated to an "or" regex
+e.g list(c("gprd", "diabetes")) returns all articles mentioning either gprd or diabetes.
+different items in the list recursively filter the list
+e.g. list("gprd", "diabetes") returns articles mentioning gprd and diabtes}
+\usage{
+ get_articles_by_terms(corpus, term_list, where,
+ case_sensitive = FALSE)
+}
+\arguments{
+ \item{corpus}{list of downloaded Pubmed records, e.g.
+ from rpubmed_fetch_in_chunks}
+
+ \item{term_list}{list of character vectors giving the
+ search terms. list elements are searched for reductively
+ (using &). Elements of internal charater vectors are
+ combined into 'or' terms}
+
+ \item{where}{A predicate function referring to a search
+ in an area of the record. Choose from in_abstract_p,
+ in_mesh_p or in_mesh_abstract_p}
+
+ \item{case_sensitive}{boolean is the search case
+ sensitive?}
+}
+\value{
+ list containing abstracts and metadata for each ID
+ matching the search criteria
+}
+\description{
+ Returns a list of articles matching the termlist items in
+ the termlist can be strings or character vectors,
+ concatenated to an "or" regex e.g list(c("gprd",
+ "diabetes")) returns all articles mentioning either gprd
+ or diabetes. different items in the list recursively
+ filter the list e.g. list("gprd", "diabetes") returns
+ articles mentioning gprd and diabtes
+}
+\examples{
+\dontrun{
+plasticity_records <- fetch_in_chunks(plasticity_ids)
+
+# Search for articles with "plant" and "fish" in the abstract
+get_articles_by_terms(plasticity_records, list("plant", "fish"), where = in_abstract_p)
+
+# Search for records with "plant" or "fish" in the abstract or MeSH headings:
+get_articles_by_terms(plasticity_records, list(c("plant", "fish")), where = in_mesh_abstract_p)
+}
+}
+
View
68 man/pubmed_fetch.Rd
@@ -1,33 +1,35 @@
-\name{pubmed_fetch}
-\alias{pubmed_fetch}
-\title{Download data from Pubmed}
-\usage{
- pubmed_fetch(ids, file_format = "xml",
- as_r_object = TRUE, ...)
-}
-\arguments{
- \item{ids}{integer Pubmed ID's to get abstracts and
- metadata from}
-
- \item{file_format}{character Format in which to get data
- (eg, fasta, xml...) default = "xml"}
-
- \item{as_r_object}{boolean if TRUE, parses returned xml
- to R objects (nested lists), else returns xml}
-
- \item{\dots}{character Additional terms to add to the
- request}
-}
-\value{
- list or character string containing abstratcs and
- metadata for each ID (see as_r_object)
-}
-\description{
- Download data from Pubmed
-}
-\examples{
-# Get IDs via rentrez_search:
-plasticity_ids <- entrez_search("pubmed", "phenotypic plasticity", retmax = 2600)$ids[1:100]
-plasticity_records <- pubmed_fetch(plasticity_ids)
-}
-
+\name{pubmed_fetch}
+\alias{pubmed_fetch}
+\title{Download data from Pubmed}
+\usage{
+ pubmed_fetch(ids, file_format = "xml",
+ as_r_object = TRUE, ...)
+}
+\arguments{
+ \item{ids}{integer Pubmed ID's to get abstracts and
+ metadata from}
+
+ \item{file_format}{character Format in which to get data
+ (eg, fasta, xml...) default = "xml"}
+
+ \item{as_r_object}{boolean if TRUE, parses returned xml
+ to R objects (nested lists), else returns xml}
+
+ \item{\dots}{character Additional terms to add to the
+ request}
+}
+\value{
+ list or character string containing abstratcs and
+ metadata for each ID (see as_r_object)
+}
+\description{
+ Download data from Pubmed
+}
+\examples{
+\dontrun{
+# Get IDs via rentrez_search:
+plasticity_ids <- entrez_search("pubmed", "phenotypic plasticity", retmax = 2600)$ids[1:100]
+plasticity_records <- pubmed_fetch(plasticity_ids)
+}
+}
+
Please sign in to comment.
Something went wrong with that request. Please try again.