From a102daad47913f3ecad57070f4b3e7fd46da6c8c Mon Sep 17 00:00:00 2001 From: Carl Boettiger Date: Mon, 25 Jun 2018 17:23:34 -0700 Subject: [PATCH] support as_emld.raw closes #19 closes #20 closes #21 --- NAMESPACE | 1 + R/as_emld.R | 68 +++++++++++++++++++++++++++++------ man/as_emld.Rd | 8 ++++- tests/testthat/test-as_emld.R | 14 ++++++++ 4 files changed, 80 insertions(+), 11 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 8a61e96..51b63eb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ S3method(as_emld,character) S3method(as_emld,json) S3method(as_emld,list) +S3method(as_emld,raw) S3method(as_emld,xml_document) S3method(as_json,emld) S3method(as_json,list) diff --git a/R/as_emld.R b/R/as_emld.R index ee0e82c..7dda1f5 100644 --- a/R/as_emld.R +++ b/R/as_emld.R @@ -2,18 +2,58 @@ #' #' Parse an EML file into an emld object. #' @param x path to an EML file +#' @param from explicit type for the input format. By default, will +#' attempt to guess the format, but it always safer to specify the +#' input format. This is essential for literal text strings or raw +#' vectors where the type cannot be guessed by the R object class +#' or file extension of the input. #' @importFrom xml2 read_xml xml_find_all xml_remove #' @importFrom methods is #' @importFrom jsonld jsonld_compact jsonld_frame #' #' @export -as_emld <- function(x) +as_emld <- function(x, from = c("guess", "xml", "json", "list")) { UseMethod("as_emld") } + +#' @export +as_emld.raw <- function(x, from = c("guess", "xml", "json", "list") ){ + + from <- match.arg(from) + if(from == "xml") { + x <- xml2::read_xml(x) + return(as_emld.xml_document(x) ) + } else if (from == "json"){ + return (as_emld.json(x) ) + } else { + warning("assuming raw vector is xml...") + x <- xml2::read_xml(x) + return(as_emld.xml_document(x) ) + } + +} + + #' @export -as_emld.character <- function(x){ +as_emld.character <- function(x, from = c("guess", "xml", "json", "list")){ + + # Character could be literal or filepath. Let's hope `from` is specified + from <- match.arg(from) + + ## Handle declared cases first + if(from == "xml"){ + x <- xml2::read_xml(x) + return(as_emld.xml_document(x) ) + } else if(from == "json"){ + x <- jsonlite::read_json(x) + return(as_emld.json(x) ) + } else if(from == "list"){ + return( as_emld.list(as.list(x), from = "list") ) + + } else { # Handle "guess" + ## Read json or xml files, based on extension if(file.exists(x)){ if(grepl("\\.xml$", x)){ @@ -30,15 +70,17 @@ as_emld.character <- function(x){ } else { ## what other kind of character string examples do we expect other than filenames? - as_emld.list(as.list(x)) + as_emld.list(as.list(x), from = "list") + } + } } -### FROM JSON FILES ### +### FROM json Class objects ### #' @export -as_emld.json <- function(x){ - ## Convert json or xml_document to the S3 emld object +as_emld.json <- function(x, from = "json"){ + ## Convert json to the S3 emld object ## FIXME technically this assumes only our context frame <- system.file(paste0("frame/", @@ -47,6 +89,7 @@ as_emld.json <- function(x){ context <- system.file(paste0("context/", getOption("emld_db", "eml-2.2.0"), "/eml-context.json"), package = "emld") + framed <- jsonld::jsonld_frame(x, frame) compacted <- jsonld::jsonld_compact(framed, context) emld <- jsonlite::fromJSON(compacted, simplifyVector = FALSE) @@ -55,9 +98,9 @@ as_emld.json <- function(x){ } -### FROM XML FILES ###### +### FROM xml_document Class objects ###### #' @export -as_emld.xml_document <- function(x){ +as_emld.xml_document <- function(x, from = "xml_document"){ ## Drop comment nodes xml2::xml_remove(xml2::xml_find_all(x, "//comment()")) @@ -81,10 +124,15 @@ as_emld.xml_document <- function(x){ ## CHECKME xml_document and json are also list! #' @export -as_emld.list <- function(x){ - # Note that xml_document is.list too +as_emld.list <- function(x, from = "list"){ + if(is(x, "xml_document")){ + return(as_emld.xml_document(x)) + } else if(is(x, "json")){ + return(as_emld.json(x)) + } else { class(x) <- c("emld", "list") return(x) + } } diff --git a/man/as_emld.Rd b/man/as_emld.Rd index a322a40..fd66a71 100644 --- a/man/as_emld.Rd +++ b/man/as_emld.Rd @@ -4,10 +4,16 @@ \alias{as_emld} \title{as_emld} \usage{ -as_emld(x) +as_emld(x, from = c("guess", "xml", "json", "list")) } \arguments{ \item{x}{path to an EML file} + +\item{from}{explicit type for the input format. By default, will +attempt to guess the format, but it always safer to specify the +input format. This is essential for literal text strings or raw +vectors where the type cannot be guessed by the R object class +or file extension of the input.} } \description{ Parse an EML file into an emld object. diff --git a/tests/testthat/test-as_emld.R b/tests/testthat/test-as_emld.R index 0429b6b..00dc0d0 100644 --- a/tests/testthat/test-as_emld.R +++ b/tests/testthat/test-as_emld.R @@ -62,3 +62,17 @@ test_that("we can parse repeated name elements", { expect_is(json, "json") }) + +test_that("raw files can be parsed", { + emld <- as_emld(ex) + + ex_char <- paste(readLines(ex), collapse = "\n") + # potentially test for char formats here. currently doesn't work (converts to list of 1 instead of 6) + # emld_char <- as_emld(ex_char) + # expect_equal(emld, emld_char) + + ex_raw <- charToRaw(ex_char) + emld_raw <- as_emld(ex_raw, "xml") + expect_equal(emld, emld_raw) + expect_warning(emld_raw <- as_emld(ex_raw), "assuming raw vector is xml") +})