From 94996e684a2a6dd8dc8ac25d3eb7ac0d2c423d47 Mon Sep 17 00:00:00 2001 From: Carl Boettiger Date: Wed, 16 Oct 2013 13:49:48 -0700 Subject: [PATCH] added a handful of metadata access methods Includes methods: get_citation, get_license, get_metadata (top-level meta elements only) summary (just uses phylo method for now, should display basic metadata too...). methods are defined for nexml class, and extended to nexmlTree class --- DESCRIPTION | 1 + NAMESPACE | 2 + R/classes.R | 5 +- R/extend_phylo.R | 28 ++++++++++- R/meta.R | 8 ++-- R/metadata_methods.R | 86 ++++++++++++++++++++++++++++++++++ R/nexml_read.R | 2 +- inst/tests/test_meta_extract.R | 54 +++++++++++++++++++++ man/nexml_read.Rd | 2 +- 9 files changed, 179 insertions(+), 9 deletions(-) create mode 100644 R/metadata_methods.R create mode 100644 inst/tests/test_meta_extract.R diff --git a/DESCRIPTION b/DESCRIPTION index 50a51c2..3eabb98 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -26,3 +26,4 @@ Collate: 'nexml_write.R' 'meta.R' 'extend_phylo.R' + 'metadata_methods.R' diff --git a/NAMESPACE b/NAMESPACE index 5ae8a92..c101bc2 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,8 @@ export(nexml_read) export(nexml_write) export(read.nexml) export(write.nexml) +exportMethods(get_citation) +exportMethods(get_license) import(XML) import(ape) import(plyr) diff --git a/R/classes.R b/R/classes.R index e4c3675..13a8a0d 100644 --- a/R/classes.R +++ b/R/classes.R @@ -560,6 +560,7 @@ nexml_namespaces <- "cdao" = "http://www.evolutionaryontology.org/cdao/1.0/cdao.owl#", "xsd" = "http://www.w3.org/2001/XMLSchema#", "dc" = "http://purl.org/dc/elements/1.1/", + "dcterms" = "http://purl.org/dc/terms/", "prism" = "http://prismstandard.org/namespaces/1.2/basic/", "cc" = "http://creativecommons.org/ns#", "http://www.nexml.org/2009") @@ -615,10 +616,10 @@ setMethod("toNeXML", parent }) setAs("nexml", "XMLInternalNode", - function(from) toNeXML(from, newXMLNode("nex:nexml", namespaceDefinitions = from@namespaces))) + function(from) suppressWarnings(toNeXML(from, newXMLNode("nex:nexml", namespaceDefinitions = from@namespaces)))) setAs("nexml", "XMLInternalElementNode", - function(from) toNeXML(from, newXMLNode("nex:nexml", namespaceDefinitions = from@namespaces))) + function(from) suppressWarnings(toNeXML(from, newXMLNode("nex:nexml", namespaceDefinitions = from@namespaces)))) setAs("XMLInternalElementNode", "nexml", function(from) fromNeXML(new("nexml"), from)) diff --git a/R/extend_phylo.R b/R/extend_phylo.R index cef2afd..188c17d 100644 --- a/R/extend_phylo.R +++ b/R/extend_phylo.R @@ -12,6 +12,33 @@ setClass("nexmlTree", representation(nexml = "nexml"), contains="phylo") setMethod("show", "nexmlTree", function(object) print.phylo(object)) # callNextMethod(object) ## callNextMethod might have been an option, but it looks for 'show' method, not print method?? +## constructor function +nexmlTree <- function(object){ + if(is(object, "nexml")){ + phylo <- as(object, "phylo") + } + new("nexmlTree", nexml = object, phylo) +} + +## Coercions between classes +setAs("XMLInternalElementNode", "nexmlTree", function(from) + nexmlTree(as(from, "nexml"))) +setAs("nemxmlTree", "XMLInternalElementNode", function(from) + as(from@nexml, "XMLInternalElementNode")) +setAs("XMLInternalNode", "nexmlTree", function(from) + nexmlTree(as(from, "nexml"))) +setAs("nemxmlTree", "XMLInternalNode", function(from) + as(from@nexml, "XMLInternalNode")) +setAs("phylo", "nexmlTree", function(from) + nexmlTree(as(from, "nexml"))) +setAs("nexmlTree", "phylo", function(from) + as(from@nexml, "phylo")) +setAs("nexml", "nexmlTree", function(from) + nexmlTree(from)) +setAs("nexmlTree", "nexml", function(from) + from@nexml) + + ### Testing # a <- new("phylo", bird.orders) @@ -43,4 +70,3 @@ setAs("phylo", "phyloS4", function(from) edge.length = from$edge.length)) - diff --git a/R/meta.R b/R/meta.R index 5b8d479..2d92e5d 100644 --- a/R/meta.R +++ b/R/meta.R @@ -87,10 +87,10 @@ nexml_citation <- function(obj){ property="prism:publicationDate"), meta(content=obj$title, datatype="xsd:string", - property="dc:title")#, -# meta(content=format(obj, "text"), ## Some invalid type errors here, probably need to cgi escape first? -# datatype="xsd:string", -# property="dcterms:bibliographicCitation") + property="dc:title"), + meta(content=format(obj, "text"), + datatype="xsd:string", + property="dcterms:bibliographicCitation") ), lapply(obj$author, function(x){ meta(content = format(x, c("given", "family")), diff --git a/R/metadata_methods.R b/R/metadata_methods.R new file mode 100644 index 0000000..c985d77 --- /dev/null +++ b/R/metadata_methods.R @@ -0,0 +1,86 @@ +#' @export +setGeneric("get_license", function(object) standardGeneric("get_license")) + +#' @export +setGeneric("get_citation", function(object) standardGeneric("get_citation")) + + +setMethod("summary", + signature("nexml"), + function(object, ...) + summary(as(object, "phylo")) + ) +setMethod("summary", + signature("nexmlTree"), + function(object, ...) + summary(as(object, "nexml")) + ) + + +## Ironically, it is easier to extract the license from the XML representation using XPath than to extract it from the R S4 representation. + + + +## Using newXMLDoc(object) leads invariably to segfaults.... +## safer to write out and parse. +setxpath <- function(object){ + suppressWarnings(saveXML(object, "tmp.xml")) + doc <- xmlParse("tmp.xml") + unlink("tmp.xml") + doc +} + +## FIXME handle namespaces correctly! +setMethod("get_citation", + signature("nexml"), + function(object){ + b <- setxpath(as(object, "XMLInternalElementNode")) + unname(xpathSApply(b, "/nex:nexml/nex:meta[@property='dcterms:bibliographicCitation']/@content")) + }) + +setMethod("get_license", + signature("nexml"), + function(object){ + b <- setxpath(as(object, "XMLInternalElementNode")) + dc_rights <- unname(xpathSApply(b, "/nex:nexml/nex:meta[@property='dc:rights']/@content")) + cc_license <- unname(xpathSApply(b, "/nex:nexml/nex:meta[@rel='cc:license']/@href")) + if(length(dc_rights) > 0) + dc_rights + else + cc_license + }) + +#' get all top-level metadata +setMethod("get_metadata", signature("nexml"), function(object){ + b <- setxpath(as(object, "XMLInternalElementNode")) + references <- getNodeSet(b, "/nex:nexml/nex:meta[@property]") + rel = sapply(references, + function(x) + xmlAttrs(x)['rel']) + href = sapply(references, + function(x) + xmlAttrs(x)['href']) + names(href) = rel + literals <- getNodeSet(b, "/nex:nexml/nex:meta[@rel]") + property = sapply(literals, + function(x) + xmlAttrs(x)['property']) + content = sapply(literals, + function(x) + xmlAttrs(x)['content']) + names(content) = property + c(content, href) + }) + + + + + +## Would be convenient to inherit these automatically... +setMethod("get_metadata", signature("nexmlTree"), function(object) + get_metadata(as(object, "nexml"))) +setMethod("get_citation", signature("nexmlTree"), function(object) + get_citation(as(object, "nexml"))) +setMethod("get_license", signature("nexmlTree"), function(object) + get_license(as(object, "nexml"))) + diff --git a/R/nexml_read.R b/R/nexml_read.R index 82ac7d8..ba2ddf0 100644 --- a/R/nexml_read.R +++ b/R/nexml_read.R @@ -11,7 +11,7 @@ #' @examples #' f <- system.file("examples", "trees.xml", package="RNeXML") #' nexml_read(f) -nexml_read <- function(x, type = c("phylo", "phylo4", "ouch", +nexml_read <- function(x, type = c("nexmlTree", "phylo", "phylo4", "ouch", "matrix", "nexml")){ type <- match.arg(type) doc <- xmlParse(x) diff --git a/inst/tests/test_meta_extract.R b/inst/tests/test_meta_extract.R new file mode 100644 index 0000000..0d7cd3d --- /dev/null +++ b/inst/tests/test_meta_extract.R @@ -0,0 +1,54 @@ +content("extract_metadata") + +library(ape) +library(RNeXML) +library(XML) +data(bird.orders) + + history <- new("meta", + content = "Mapped from the bird.orders data in the ape package using RNeXML", + datatype = "xsd:string", id = "meta5144", property = "skos:historyNote", + 'xsi:type' = "LiteralMeta") + modified <- new("meta", + content = "2013-10-04", datatype = "xsd:string", id = "meta5128", + property = "prism:modificationDate", 'xsi:type' = "LiteralMeta") + website <- new("meta", + href = "http://carlboettiger.info", + rel = "foaf:homepage", 'xsi:type' = "ResourceMeta") +nexml_write(bird.orders, file="example.xml", + title = "My test title", + description = "A description of my test", + creator = "Carl Boettiger ", + publisher = "unpublished data", + pubdate = "2012-04-01", + citation = citation("ape"), + additional_metadata = list(history, modified, website, rdfa), + additional_namespaces = c(skos = "http://www.w3.org/2004/02/skos/core#", + prism = "http://prismstandard.org/namespaces/1.2/basic/", + foaf = "http://xmlns.com/foaf/0.1/")) + + + + +test_that("we can extract various metadata", { + + ## FIXME add the appropriate expect_that checks here + nex <- read.nexml("example.xml") + get_citation(nex) + get_license(nex) + get_metadata(nex) + summary(nex) + + }) + + + +test_that("example parses", { + require(XML) + ## xmlParse and check with xpath + results <- xmlSchemaValidate("http://www.nexml.org/2009/nexml.xsd", "example.xml") + expect_equal(results$status, 0) + expect_equal(length(results$errors), 0) + + unlink("example.xml") # cleanup + }) diff --git a/man/nexml_read.Rd b/man/nexml_read.Rd index 4d249ed..a4294f7 100644 --- a/man/nexml_read.Rd +++ b/man/nexml_read.Rd @@ -4,7 +4,7 @@ \title{Read NeXML files into various R formats} \usage{ nexml_read(x, - type = c("phylo", "phylo4", "ouch", "matrix", "nexml")) + type = c("nexmlTree", "phylo", "phylo4", "ouch", "matrix", "nexml")) } \arguments{ \item{x}{Path to the file to be read in}