Permalink
Browse files

Added dontrun's to examples

  • Loading branch information...
1 parent d961fec commit 814ffa02976e541981364ae0c074724c6dd1d07d @DASpringate DASpringate committed Feb 7, 2013
Showing with 495 additions and 483 deletions.
  1. +1 −0 DESCRIPTION
  2. +1 −0 NAMESPACE
  3. +63 −66 R/rpubmed_fetch.R
  4. +96 −96 R/rpubmed_locations.R
  5. +107 −106 R/rpubmed_textsearch.R
  6. +39 −37 man/fetch_in_chunks.Rd
  7. +31 −29 man/geocode_address.Rd
  8. +45 −43 man/geocode_addresses.Rd
  9. +25 −23 man/get_article_location_data.Rd
  10. +52 −50 man/get_articles_by_terms.Rd
  11. +35 −33 man/pubmed_fetch.Rd
View
@@ -20,3 +20,4 @@ Collate:
'rpubmed_io.R'
'rpubmed_textsearch.R'
'rpubmed_locations.R'
+ 'rpubmed_mesh.R'
View
@@ -5,6 +5,7 @@ export(geocode_address)
export(geocode_addresses)
export(get_article_location_data)
export(get_articles_by_terms)
+export(get_mesh_headings)
export(pubmed_fetch)
export(record_counts_by_year)
export(write_JSON_file)
View
@@ -1,66 +1,63 @@
-#equire(XML)
-#require(RCurl)
-
-#' Downloads abstracts and Metadata from Pubmed, storing as R objects
-#' Splits large id vectors into a list of smaller chunks, so as not to hammer the entrez server!
-#' If you are making large bulk downloads, consider setting a delay so the downloading starts at off-peak USA times.
-#'
-#'
-#' @export
-#' @import XML RCurl
-#' @param ids integer Pubmed ID's to get abstracts and metadata from
-#' @param chunk_size Number of articles to be pulled with each call to pubmed_fetch (optional)
-#' @param delay Integer Number of hours to wait before downloading starts
-#' @param \dots character Additional terms to add to the request
-#' @return list containing abstratcs and metadata for each ID
-#' @examples
-#'
-#' # Get IDs via rentrez_search:
-#' plasticity_ids <- entrez_search("pubmed", "phenotypic plasticity", retmax = 2600)$ids
-#' plasticity_records <- fetch_in_chunks(plasticity_ids)
-#'
-
-
-fetch_in_chunks <- function(ids, chunk_size = 500, delay = 0, ...){
- Sys.sleep(delay * 3600) # Wait for appropriate time for the server.
- chunks <- chunker(ids, chunk_size)
- Reduce(append, lapply(chunks, function(x) pubmed_fetch(x, ...)))
-}
-
-#' Download data from Pubmed
-#'
-#'
-#'
-#' @export
-#' @param ids integer Pubmed ID's to get abstracts and metadata from
-#' @param file_format character Format in which to get data (eg, fasta, xml...) default = "xml"
-#' @param as_r_object boolean if TRUE, parses returned xml to R objects (nested lists), else returns xml
-#' @param \dots character Additional terms to add to the request
-#' @return list or character string containing abstratcs and metadata for each ID (see as_r_object)
-#' @examples
-#'
-#' # Get IDs via rentrez_search:
-#' plasticity_ids <- entrez_search("pubmed", "phenotypic plasticity", retmax = 2600)$ids[1:100]
-#' plasticity_records <- pubmed_fetch(plasticity_ids)
-#'
-pubmed_fetch <- function(ids, file_format = "xml", as_r_object = TRUE, ...){
-
- args <- c(id = paste(ids, collapse = ","), db = "pubmed", rettype = file_format,
- email = entrez_email, tool = entrez_tool, ...)
-
- url_args <- paste(paste(names(args), args, sep="="), collapse = "&")
- base_url <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=full"
- url_string <- paste(base_url, url_args, sep = "&")
- records <- getURL(url_string)
- #NCBI limits requests to three per second
- Sys.sleep(0.33)
- if(as_r_object){
- return(xmlToList(xmlTreeParse(records, useInternalNodes = TRUE)))
- } else return(records)
-}
-
-#' Helper function to split a vector v into list of chunks of chunk_size
-chunker <- function(v, chunk_size){
- split(v, ceiling(seq_along(v)/chunk_size))
-}
-
+#' Downloads abstracts and Metadata from Pubmed, storing as R objects
+#' Splits large id vectors into a list of smaller chunks, so as not to hammer the entrez server!
+#' If you are making large bulk downloads, consider setting a delay so the downloading starts at off-peak USA times.
+#'
+#'
+#' @export
+#' @import XML RCurl
+#' @param ids integer Pubmed ID's to get abstracts and metadata from
+#' @param chunk_size Number of articles to be pulled with each call to pubmed_fetch (optional)
+#' @param delay Integer Number of hours to wait before downloading starts
+#' @param \dots character Additional terms to add to the request
+#' @return list containing abstratcs and metadata for each ID
+#' @examples \dontrun{
+#' # Get IDs via rentrez_search:
+#' plasticity_ids <- entrez_search("pubmed", "phenotypic plasticity", retmax = 2600)$ids
+#' plasticity_records <- fetch_in_chunks(plasticity_ids)
+#' }
+
+
+
+fetch_in_chunks <- function(ids, chunk_size = 500, delay = 0, ...){
+ Sys.sleep(delay * 3600) # Wait for appropriate time for the server.
+ chunks <- chunker(ids, chunk_size)
+ Reduce(append, lapply(chunks, function(x) pubmed_fetch(x, ...)))
+}
+
+#' Download data from Pubmed
+#'
+#'
+#'
+#' @export
+#' @param ids integer Pubmed ID's to get abstracts and metadata from
+#' @param file_format character Format in which to get data (eg, fasta, xml...) default = "xml"
+#' @param as_r_object boolean if TRUE, parses returned xml to R objects (nested lists), else returns xml
+#' @param \dots character Additional terms to add to the request
+#' @return list or character string containing abstratcs and metadata for each ID (see as_r_object)
+#' @examples \dontrun{
+#' # Get IDs via rentrez_search:
+#' plasticity_ids <- entrez_search("pubmed", "phenotypic plasticity", retmax = 2600)$ids[1:100]
+#' plasticity_records <- pubmed_fetch(plasticity_ids)
+#' }
+
+pubmed_fetch <- function(ids, file_format = "xml", as_r_object = TRUE, ...){
+
+ args <- c(id = paste(ids, collapse = ","), db = "pubmed", rettype = file_format,
+ email = entrez_email, tool = entrez_tool, ...)
+
+ url_args <- paste(paste(names(args), args, sep="="), collapse = "&")
+ base_url <- "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?retmode=full"
+ url_string <- paste(base_url, url_args, sep = "&")
+ records <- getURL(url_string)
+ #NCBI limits requests to three per second
+ Sys.sleep(0.33)
+ if(as_r_object){
+ return(xmlToList(xmlTreeParse(records, useInternalNodes = TRUE)))
+ } else return(records)
+}
+
+#' Helper function to split a vector v into list of chunks of chunk_size
+chunker <- function(v, chunk_size){
+ split(v, ceiling(seq_along(v)/chunk_size))
+}
+
View
@@ -1,96 +1,96 @@
-# Tools for geocoding addresses affiliated with Pubmed Records
-# Geocoder still needs some work - Not a good enough hit rate...
-
-#' Returns a data frame of geocoded addresses with longitude and latitudes
-#' Uses the Google Maps geocode API
-#' @export
-#' @param addresses A character vector of addresses for geocoding
-#' @param sleeper numeric Number of seconds between calls to the geocoding server
-#' @param depth integer recursion depth for attempting to get coordinates. If the full address fails to get a hit, the function is called again with the first line of the address removed. The process is repeated depth times before returning NAs
-#' @return data frame of addresses, latitudes and longitudes
-#' @examples
-#'
-#' # get a list of articles pulled from pubmed:
-#' abstracts <- fromJSON("Test/plasticity_abstracts.json")
-#'
-#' # Extract affiliated addresses from article metadata:
-#' affil_addresses <- get_article_location_data(abstracts)
-#'
-#' # Get coordinates:
-#' coords <- geocode_addresses(affil_addresses, depth = 4)
-#'
-#' # plot coordinates on a map:
-#'
-#' map("world", col="#f2f2f2", fill=TRUE, bg="white", lwd=0.05)
-#' points(coords$long, coords$lat, col = "red", pch = 20)
-#'
-
-geocode_addresses <- function(addresses, sleeper = 0.33, depth = 3){
- coords <- t(sapply(addresses,
- function(addr){
- as.numeric(geocode_address(addr, depth = depth))
- }))
- data.frame(address = row.name(coords), lat = coords[,1], long = coords[,2])
-}
-
-
-#' Extracts addresses of affiliated departments from Pubmed metadata
-#' email addresses are cleaned out.
-#' @export
-#' @param abstracts A list of Pubmed records. e.g. from fetch_in_chunks()
-#' @return character vector of addresses
-#' @examples
-#' # Extract affiliated addresses from article metadata:
-#' affil_addresses <- get_article_location_data(abstracts)
-#'
-#'
-get_article_location_data <- function(abstracts){
- raw_locations <- as.character(do.call(rbind,
- lapply(abstracts,
- function(x) x$MedlineCitation$Article$Affiliation)))
- locations <- gsub(pattern= "[[:alnum:][:punct:]]+@+[[:alnum:][:punct:]]+", "", raw_locations)
- locations
-}
-
-#' Function to get coordinates from a supplied address
-#' If no match is found, it recursively calls itself on the address minus the first line of the address
-#' @export
-#' @param address string
-#' @param depth depth integer recursion depth for attempting to get coordinates. If the full address fails to get a hit, the function is called again with the first line of the address removed. The process is repeated depth times before returning NAs
-#' @return vector of address, lat, long
-#'
-#' @examples
-#'
-#' x <- "Rothamsted Research, Harpenden, Herts AL5 2JQ, UK."
-#' geocode_address(x)
-#'
-geocode_address <- function(address, depth = 3){
- coords <- geocode(address)
- if(!is.null(names(coords)) & is.na(coords[1]) & depth > 0){
- address <- sub(pattern="[[:alnum:][:punct:][:space:]][^,]*, ?", "", address)
- return(get_geocode(address, depth = depth -1))
- }
- coords
-}
-
-
-#' Helper function for geocode_address
-geocode <- function(address){
- gcStr <- gsub(' ','%20', address) #Encode URL Parameters
- #Open Connection
- connectStr <- paste('http://maps.google.com/maps/api/geocode/json?sensor=false&address=',gcStr, sep="")
- con <- url(connectStr)
- tryCatch({
- data.json <- fromJSON(paste(readLines(con), collapse=""))
- close(con)
- #Flatten the received JSON
- data.json <- unlist(data.json)
- lat <- data.json["results.geometry.location.lat"]
- lng <- data.json["results.geometry.location.lng"]
- gcodes <- c(lat, lng)
- names(gcodes) <- c("Lat", "Lng")
- #print(paste(address, gcodes$Lat, gcodes$Lng))
- return (gcodes)
- }, error = function(e) return(c(NA,NA)))
-}
-
+# Tools for geocoding addresses affiliated with Pubmed Records
+# Geocoder still needs some work - Not a good enough hit rate...
+
+#' Returns a data frame of geocoded addresses with longitude and latitudes
+#' Uses the Google Maps geocode API
+#' @export
+#' @param addresses A character vector of addresses for geocoding
+#' @param sleeper numeric Number of seconds between calls to the geocoding server
+#' @param depth integer recursion depth for attempting to get coordinates. If the full address fails to get a hit, the function is called again with the first line of the address removed. The process is repeated depth times before returning NAs
+#' @return data frame of addresses, latitudes and longitudes
+#' @examples \dontrun{
+#' # get a list of articles pulled from pubmed:
+#' abstracts <- fromJSON("Test/plasticity_abstracts.json")
+#'
+#' # Extract affiliated addresses from article metadata:
+#' affil_addresses <- get_article_location_data(abstracts)
+#'
+#' # Get coordinates:
+#' coords <- geocode_addresses(affil_addresses, depth = 4)
+#'
+#' # plot coordinates on a map:
+#'
+#' map("world", col="#f2f2f2", fill=TRUE, bg="white", lwd=0.05)
+#' points(coords$long, coords$lat, col = "red", pch = 20)
+#' }
+
+
+geocode_addresses <- function(addresses, sleeper = 0.33, depth = 3){
+ coords <- t(sapply(addresses,
+ function(addr){
+ as.numeric(geocode_address(addr, depth = depth))
+ }))
+ data.frame(address = row.name(coords), lat = coords[,1], long = coords[,2])
+}
+
+
+#' Extracts addresses of affiliated departments from Pubmed metadata
+#' email addresses are cleaned out.
+#' @export
+#' @param abstracts A list of Pubmed records. e.g. from fetch_in_chunks()
+#' @return character vector of addresses
+#' @examples \dontrun{
+#' # Extract affiliated addresses from article metadata:
+#' affil_addresses <- get_article_location_data(abstracts)
+#' }
+
+get_article_location_data <- function(abstracts){
+ raw_locations <- as.character(do.call(rbind,
+ lapply(abstracts,
+ function(x) x$MedlineCitation$Article$Affiliation)))
+ locations <- gsub(pattern= "[[:alnum:][:punct:]]+@+[[:alnum:][:punct:]]+", "", raw_locations)
+ locations
+}
+
+#' Function to get coordinates from a supplied address
+#' If no match is found, it recursively calls itself on the address minus the first line of the address
+#' @export
+#' @param address string
+#' @param depth depth integer recursion depth for attempting to get coordinates. If the full address fails to get a hit, the function is called again with the first line of the address removed. The process is repeated depth times before returning NAs
+#' @return vector of address, lat, long
+#'
+#' @examples \dontrun{
+#' x <- "Rothamsted Research, Harpenden, Herts AL5 2JQ, UK."
+#' geocode_address(x)
+#' }
+
+geocode_address <- function(address, depth = 3){
+ coords <- geocode(address)
+ if(!is.null(names(coords)) & is.na(coords[1]) & depth > 0){
+ address <- sub(pattern="[[:alnum:][:punct:][:space:]][^,]*, ?", "", address)
+ return(get_geocode(address, depth = depth -1))
+ }
+ coords
+}
+
+
+#' Helper function for geocode_address
+geocode <- function(address){
+ gcStr <- gsub(' ','%20', address) #Encode URL Parameters
+ #Open Connection
+ connectStr <- paste('http://maps.google.com/maps/api/geocode/json?sensor=false&address=',gcStr, sep="")
+ con <- url(connectStr)
+ tryCatch({
+ data.json <- fromJSON(paste(readLines(con), collapse=""))
+ close(con)
+ #Flatten the received JSON
+ data.json <- unlist(data.json)
+ lat <- data.json["results.geometry.location.lat"]
+ lng <- data.json["results.geometry.location.lng"]
+ gcodes <- c(lat, lng)
+ names(gcodes) <- c("Lat", "Lng")
+ #print(paste(address, gcodes$Lat, gcodes$Lng))
+ return (gcodes)
+ }, error = function(e) return(c(NA,NA)))
+}
+
Oops, something went wrong.

0 comments on commit 814ffa0

Please sign in to comment.