changes

ropensci-archive · Apr 12, 2014 · 2b4cc3c · 2b4cc3c
2 parents 6692694 + 4e6bc25
commit 2b4cc3c
Show file tree

Hide file tree

Showing 93 changed files with 4,977 additions and 425 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -11,4 +11,4 @@ Makefile
 inst/vign/
 vignettes/rbhl_vignette.md
 vignettes/margins.sty
-vignettes/cache/
+vignettes/cache/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -4,9 +4,9 @@ Description: Interface to Biodiversity Heritage Library (BHL) API methods. BHL
     is a repository of digitized literature on biodiversity studies,
     including floras, research papers, and more.
 Type: Package
-Version: 0.0.7
-License: CC0
-Date: 2014-01-13
+Version: 0.1.0
+License: MIT + file LICENSE
+Date: 2014-01-16
 Authors@R: c(person("Scott", "Chamberlain", role = c("aut", "cre"),
     email = "myrmecocystus@gmail.com"),
     person("Karthik", "Ram", role = "aut",

diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,2 @@
+YEAR: 2013
+COPYRIGHT HOLDER: Scott Chamberlain
diff --git a/Makefile b/Makefile
@@ -1,3 +1,7 @@
+all:
+	make move
+	make pandoc
+	make rmd2md
 all: move pandoc rmd2md cleanup
 
 vignettes: 
@@ -19,4 +23,4 @@ rmd2md:
 
 cleanup:
 		cd vignettes;\
-		rm -rf figure
+		rm -rf figure
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,28 +1,40 @@
+# Generated by roxygen2 (4.0.0): do not edit by hand
+
 export(bhl_authorsearch)
 export(bhl_bioherlib)
 export(bhl_booksearch)
+export(bhl_getauthorparts)
 export(bhl_getauthortitles)
 export(bhl_getcollections)
 export(bhl_getitembyidentifier)
 export(bhl_getitemmetadata)
 export(bhl_getitempages)
+export(bhl_getitemparts)
 export(bhl_getlanguages)
 export(bhl_getpagemetadata)
 export(bhl_getpagenames)
 export(bhl_getpageocrtext)
+export(bhl_getpartbibtex)
+export(bhl_getpartbyidentifier)
+export(bhl_getpartendnote)
+export(bhl_getpartmetadata)
+export(bhl_getpartnames)
+export(bhl_getsubjectparts)
 export(bhl_getsubjecttitles)
 export(bhl_gettitlebibTex)
 export(bhl_gettitlebyidentifier)
 export(bhl_gettitleendNote)
 export(bhl_gettitleitems)
 export(bhl_gettitlemetadata)
 export(bhl_getunpublisheditems)
+export(bhl_getunpublishedparts)
 export(bhl_getunpublishedtitles)
 export(bhl_namecount)
 export(bhl_namegetdetail)
 export(bhl_namelist)
 export(bhl_namesearch)
 export(bhl_openurl)
+export(bhl_partsearch)
 export(bhl_subjectsearch)
 export(bhl_titlesearchsimple)
 export(getkey)

diff --git a/NEWS b/NEWS
@@ -1,3 +1,17 @@
+rbhl 0.1.2
+===============
+
+NEW FEATURES 
+
+* Gains new functions bhl_getauthorparts(), bhl_getitemparts(), bhl_getpartbibtex(), bhl_getpartendnote(), bhl_getsubjectparts(), bhl_getunpublishedparts(), and bhl_partsearch(). 
+* New function bhl_getpages() to get multiple pages of ocr text in one call, and optionally combine all pages into a single text object for further processing.
+
+BUG FIXES
+
+* Removed arguments in bhl_getpageocrtext() for names and ocr that weren't actually available in the API method.
+* Some functions were missing parameters, and have now been added in where appropriate.
+
+
 rbhl 0.1.0
 ===============
 

diff --git a/R/bhl_authorsearch.R b/R/bhl_authorsearch.R
@@ -7,14 +7,9 @@
 #' @import httr 
 #' @importFrom plyr compact
 #' @importFrom XML xmlTreeParse
+#' @template all
 #' @param name full or partial name of the author for which to search
 #'     (last name listed first, i.e. 'Darwin, Charles') (character)
-#' @param format either XML ('xml') or JSON ('json') format. If output='list',
-#' 		format is forced to equal 'json'
-#' @param key your BHL API key, either enter, or loads from .Rprofile
-#' @param callopts Call options passed on to httr::GET.
-#' @param output Return a list, raw json or xml, or parsed data (character). 
-#'    Default: 'list'
 #' @export
 #' @examples \dontrun{
 #' bhl_authorsearch(name='dimmock')

diff --git a/R/bhl_bioherlib.R b/R/bhl_bioherlib.R
@@ -3,11 +3,11 @@
 #' @import httr
 #' @importFrom plyr compact
 #' @importFrom XML xmlTreeParse
+#' @template all
 #' @param method The API method to use.
 #' @param pageid The identifier of an individual page in a scanned book.
 #' @param ocr return ocr text of the page (logical). Default: FALSE
 #' @param names return the names that appear on the page (logical). Default: FALSE
-#' @inheritParams bhl_authorsearch
 #' @export
 #' @examples \dontrun{
 #' bhl_bioherlib(method='GetPageMetadata', pageid=1328690, ocr=TRUE, names=TRUE)

diff --git a/R/bhl_booksearch.R b/R/bhl_booksearch.R
@@ -9,14 +9,14 @@
 #'
 #' @import httr
 #' @importFrom plyr compact 
+#' @template all
 #' @param title string to search for in the title (character)
 #' @param lname last name to search for (character)
 #' @param volume volume to search for (numeric)
 #' @param edition edition to search for (character)
 #' @param year year to search for, four characters, e.g, 1970 (numeric)
 #' @param collectionid collection identifier to search for (numeric)
 #' @param language language to search for (character)
-#' @inheritParams bhl_authorsearch
 #' @export
 #' @note Use \code{\link{bhl_getcollections}} or \code{\link{bhl_getlanguages}} to get 
 #' acceptable terms

diff --git a/R/bhl_getauthorparts.R b/R/bhl_getauthorparts.R
@@ -0,0 +1,32 @@
+#' Return a list of parts (articles, chapters, etc) associated with a given BHL 
+#' author identifier. Unless the identifier  for a particular BHL author record 
+#' is known in advance, this method should be used in combination	with the 
+#' AuthorSearch method.
+#' 
+#' Note: haven't seen examples for this function that work yet...
+#'
+#' @import httr
+#' @importFrom plyr compact
+#' @importFrom XML xmlTreeParse
+#' @template all
+#' @param creatorid BHL identifier for a particular author (numeric)
+#' @examples \dontrun{
+#' bhl_getauthorparts(147)
+#' bhl_getauthorparts(39120, output='raw')
+#' bhl_getauthorparts(39120, format='xml', output='raw')
+#' bhl_getauthorparts(39120, format='xml', output='parsed')
+#' }
+#' @export
+bhl_getauthorparts <- function(creatorid, format = "json", output='list', 
+  key = NULL, callopts=list()) 
+{
+  if(output=='list') format='json'
+  key <- getkey(key)
+  url = "http://www.biodiversitylibrary.org/api2/httpquery.ashx"
+  args <- compact(list(op = "GetAuthorParts", apikey = key, format = format, 
+                       creatorid=creatorid))
+  out <- GET(url, query = args, callopts)
+  stop_for_status(out)
+  tt <- content(out, as="text")
+  return_results(tt, output, format)
+}
diff --git a/R/bhl_getauthortitles.R b/R/bhl_getauthortitles.R
@@ -6,8 +6,8 @@
 #'
 #' @import httr
 #' @importFrom plyr compact 
+#' @template all
 #' @param creatorid BHL identifier for a particular author (numeric)
-#' @inheritParams bhl_authorsearch
 #' @export
 #' @examples \dontrun{
 #' bhl_getauthortitles(1970)

diff --git a/R/bhl_getcollections.R b/R/bhl_getcollections.R
@@ -4,7 +4,7 @@
 #' @import httr
 #' @importFrom RJSONIO fromJSON
 #' @importFrom plyr compact ldply
-#' @inheritParams bhl_authorsearch
+#' @template all
 #' @examples \dontrun{
 #' bhl_getcollections()
 #' bhl_getcollections(out = 'raw')

diff --git a/R/bhl_getitembyidentifier.R b/R/bhl_getitembyidentifier.R
@@ -6,12 +6,11 @@
 #' @import httr
 #' @importFrom plyr compact
 #' @importFrom XML xmlTreeParse
+#' @template all
 #' @param type the type of identifier (barcode or ia) (character)
 #' @param value the identifier value (character)
-#' @inheritParams bhl_authorsearch
 #' @examples \dontrun{
 #' bhl_getitembyidentifier(type='ia', value='animalkingdomarr03cuvi')
-#' bhl_getitembyidentifier(type='ia', value='animalkingdomarr03cuvi', format='xml')
 #' bhl_getitembyidentifier(type='ia', value='animalkingdomarr03cuvi', format='xml', output='raw')
 #' bhl_getitembyidentifier(type='ia', value='animalkingdomarr03cuvi', format='xml', output='parsed')
 #' }

diff --git a/R/bhl_getitemmetadata.R b/R/bhl_getitemmetadata.R
@@ -5,28 +5,36 @@
 #' @import httr
 #' @importFrom plyr compact
 #' @importFrom XML xmlTreeParse
-#' @param itemid item id (character)
-#' @param pages return the items pages (logical)
-#' @param ocr return ocr text of the page (logical)
 #' @template all
-#' @export
+#' @param itemid item id (character)
+#' @param pages return the items pages (TRUE/FALSE)
+#' @param ocr (logical) TRUE to return the ocr for the item's pages. Setting this
+#' to TRUE apparently doesn't return any actual ocr text, but leaving parameter 
+#' here for now.
+#' @param parts (logical) TRUE to return the item's parts. Setting this
+#' to TRUE apparently doesn't return any parts text, but leaving parameter 
+#' here for now.
 #' @examples \dontrun{
 #' bhl_getitemmetadata('16800', TRUE)
 #' bhl_getitemmetadata('16800', TRUE, 'xml', 'parsed')
 #' bhl_getitemmetadata('16800', TRUE, 'json', 'raw')
 #' bhl_getitemmetadata('16800', TRUE, 'xml', 'raw')
+#' 
+#' 
+#' bhl_getitemmetadata(20419, pages=FALSE, parts=TRUE)
 #' }
-
-bhl_getitemmetadata <- function(itemid = NULL, pages = TRUE, ocr = FALSE, 
+#' @export
+bhl_getitemmetadata <- function(itemid = NULL, pages = TRUE, ocr=FALSE, parts=FALSE,
   format='json', output = 'list', key = NULL, callopts = list()) 
 {
   if(output=='list') format='json'
   key <- getkey(key)
   url = "http://www.biodiversitylibrary.org/api2/httpquery.ashx"
-  args <- compact(list(op="GetItemMetadata", apikey=key, pages=pages, 
-                       itemid=itemid, ocr=ocr, format=format))
+  args <- compact(list(op="GetItemMetadata", apikey=key, pages=pages, itemid=itemid,
+                       format=format, ocr=if(ocr) 't' else NULL, 
+                       parts=if(parts) 't' else NULL))
   out <- GET(url, query = args, callopts)
   stop_for_status(out)
   tt <- content(out, as="text")
   return_results(tt, output, format)
-}
+}
diff --git a/R/bhl_getitempages.R b/R/bhl_getitempages.R
@@ -3,22 +3,26 @@
 #' @import httr
 #' @importFrom plyr compact
 #' @importFrom XML xmlTreeParse
+#' @template all
 #' @param itemid the item id (character)
-#' @inheritParams bhl_authorsearch
+#' @param ocr return ocr text of the page (TRUE/FALSE)
 #' @examples \dontrun{
 #' bhl_getitempages('16800')
 #' bhl_getitempages('16800', 'xml', 'raw')
 #' bhl_getitempages('16800', 'xml', 'parsed')
+#' 
+#' # Return ocr text
+#' bhl_getitempages('16800', TRUE) 
 #' }
 #' @export
-bhl_getitempages <- function(itemid = NA, format = "json", output='list',
+bhl_getitempages <- function(itemid, ocr=FALSE, format = "json", output='list',
   key = NULL, callopts = list()) 
 {
   if(output=='list') format='json'
   key <- getkey(key)
   url = "http://www.biodiversitylibrary.org/api2/httpquery.ashx"
   args <- compact(list(op = "GetItemPages", apikey = key, format = format, 
-                       itemid=itemid))
+                       itemid=itemid, ocr=if(ocr) 't' else NULL))
   out <- GET(url, query = args, callopts)
   stop_for_status(out)
   tt <- content(out, as="text")

diff --git a/R/bhl_getitemparts.R b/R/bhl_getitemparts.R
@@ -0,0 +1,26 @@
+#' Return a list of an item's parts.
+#'
+#' @import httr
+#' @importFrom plyr compact
+#' @importFrom XML xmlTreeParse
+#' @template all
+#' @param itemid the item id (character)
+#' @examples \dontrun{
+#' bhl_getitemparts(35600)
+#' bhl_getitemparts(35600, 'xml', 'raw')
+#' bhl_getitemparts(35600, 'xml', 'parsed')
+#' }
+#' @export
+bhl_getitemparts <- function(itemid, format = "json", output='list',
+  key = NULL, callopts = list()) 
+{
+  if(output=='list') format='json'
+  key <- getkey(key)
+  url = "http://www.biodiversitylibrary.org/api2/httpquery.ashx"
+  args <- compact(list(op = "GetItemParts", apikey = key, format = format, 
+                       itemid=itemid))
+  out <- GET(url, query = args, callopts)
+  stop_for_status(out)
+  tt <- content(out, as="text")
+  return_results(tt, output, format)
+}
diff --git a/R/bhl_getlanguages.R b/R/bhl_getlanguages.R
@@ -4,7 +4,7 @@
 #' @importFrom RJSONIO fromJSON
 #' @importFrom plyr compact ldply
 #' @importFrom XML xmlTreeParse
-#' @inheritParams bhl_authorsearch
+#' @template all
 #' @examples \dontrun{
 #' bhl_getlanguages()
 #' bhl_getlanguages(output='parsed')

diff --git a/R/bhl_getpagemetadata.R b/R/bhl_getpagemetadata.R
@@ -5,10 +5,10 @@
 #' @import httr
 #' @importFrom plyr compact
 #' @importFrom XML xmlTreeParse
+#' @template all
 #' @param page page number to get
 #' @param ocr return ocr text of the page (TRUE/FALSE)
 #' @param names return the names that appear on the page (TRUE/FALSE)
-#' @inheritParams bhl_authorsearch
 #' @examples \dontrun{
 #' bhl_getpagemetadata(page=1328690, ocr=TRUE, format='json')
 #' bhl_getpagemetadata(page=1328690, ocr=TRUE, format='xml')

diff --git a/R/bhl_getpagenames.R b/R/bhl_getpagenames.R
@@ -3,8 +3,8 @@
 #' @import httr
 #' @importFrom plyr compact
 #' @importFrom XML xmlTreeParse
+#' @template all
 #' @param page page number to get
-#' @inheritParams bhl_authorsearch
 #' @examples \dontrun{
 #' bhl_getpagenames('1328690')
 #' bhl_getpagenames('1328690', 'xml', 'raw')

diff --git a/R/bhl_getpageocrtext.R b/R/bhl_getpageocrtext.R
@@ -3,24 +3,21 @@
 #' @import httr
 #' @importFrom plyr compact
 #' @importFrom XML xmlTreeParse
+#' @template all
 #' @param page page number to get
-#' @param ocr return ocr text of the page (TRUE/FALSE)
-#' @param names return the names that appear on the page (TRUE/FALSE)
-#' @inheritParams bhl_authorsearch
 #' @examples \dontrun{
-#' bhl_getpageocrtext(1328690, FALSE, FALSE, 'json')
-#' bhl_getpageocrtext(1328690, FALSE, FALSE, 'xml', 'raw')
-#' bhl_getpageocrtext(1328690, FALSE, FALSE, 'xml', 'parsed')
+#' bhl_getpageocrtext(1328690, 'json')
+#' bhl_getpageocrtext(1328690, 'xml', 'raw')
+#' bhl_getpageocrtext(1328690, 'xml', 'parsed')
 #' }
 #' @export
-bhl_getpageocrtext <- function(page = NULL, ocr = FALSE, names = FALSE, format = 'json', 
-  output = 'list', key = NULL, callopts = list()) 
+bhl_getpageocrtext <- function(page = NULL, format = 'json', output = 'list', 
+                               key = NULL, callopts = list()) 
 {
   if(output=='list') format='json'
   key <- getkey(key)
   url = "http://www.biodiversitylibrary.org/api2/httpquery.ashx"
-  args <- compact(list(op = "GetPageOcrText", apikey = key, format=format, pageid=page,
-                       ocr=if(ocr) 't' else NULL, names=if(names) 't' else NULL))
+  args <- compact(list(op = "GetPageOcrText", apikey = key, format=format, pageid=page))
   out <- GET(url, query = args, callopts)
   stop_for_status(out)
   tt <- content(out, as="text")