diff --git a/.travis.yml b/.travis.yml index 1b671ed..88df570 100644 --- a/.travis.yml +++ b/.travis.yml @@ -6,3 +6,14 @@ r: - oldrel - release - devel + +# needed for eml2: +addons: + apt: + sources: + - sourceline: 'ppa:opencpu/jq' + packages: + - librdf0-dev + - libv8-dev + - libjq-dev + - libudunits2-dev diff --git a/R/dataspice.R b/R/dataspice.R index bdd715e..f63c5cd 100644 --- a/R/dataspice.R +++ b/R/dataspice.R @@ -1 +1,3 @@ -utils::globalVariables("variableName") +utils::globalVariables(c("variableName", + "value", "name", #eml_to_spice + "long", "lat", "region")) #edit_biblio diff --git a/R/eml_to_spice.R b/R/eml_to_spice.R new file mode 100644 index 0000000..b829799 --- /dev/null +++ b/R/eml_to_spice.R @@ -0,0 +1,294 @@ +get_entities <- function(eml, + entities = c("dataTable", "spatialRaster", "spatialVector", "storedProcedure", "view", "otherEntity"), + level_id = "entityName"){ + entities <- entities[entities %in% names(eml$dataset)] + + #look for specific fields to determine if the entity needs to be listed ("boxed") or not + level_cond <- paste0("~", paste(sprintf("!is.null(.x$%s)", level_id), collapse = " | ")) + purrr::map(entities, ~eml2::eml_get(eml, .x)) %>% + # restructure so that all entities are at the same level + # use level id to determine if .x should be listed or not + purrr::map_if(eval(parse(text = level_cond)), list) %>% + unlist(recursive = FALSE) +} + +get_access_spice <- function(x){ + x %>% + unlist() %>% + tibble::enframe() %>% + dplyr::mutate(name = dplyr::case_when( + grepl("objectName", name) ~ "fileName", + grepl("entityName", name) ~ "name", + grepl("url", name) ~ "contentUrl", + grepl("formatName", name) ~ "fileFormat" + )) %>% + stats::na.omit() %>% + filter(value != "download") %>% #often also included as url + tidyr::spread(name, value) +} + +#' Get access from EML +#' +#' Return EML access in the dataspice access.csv format. +#' +#' @param eml (emld) an EML object +#' @param path (character) folder path for saving the table to disk +#' +#' @export +#' @import eml2 +#' +#' @examples +#' \dontrun{ +#' eml_path <- system.file("example-dataset/broodTable_metadata.xml", package = "dataspice") +#' eml <- read_eml(eml_path) +#' es_access(eml) +#' } + +es_access <- function(eml, path = NULL) { + entities <- get_entities(eml) + access_entities <- lapply(entities, get_access_spice) + + out <- dplyr::bind_rows(access_entities) + + #reorder + fields <- c("fileName", "name", "contentUrl", "fileFormat") + out <- out[, fields[fields %in% colnames(out)]] + + if(!is.null(path)){ + if(!dir.exists(path)){ + dir.create(path) + } + readr::write_csv(out, file.path(path, "access.csv")) + } + + return(out) +} + +get_attributes_spice <- function(x) { + #reformat attributes to tabular format specified in dataspice + #input a dataTable or otherEntity + + objName <- eml2::eml_get(x, "objectName") + objName <- ifelse(length(objName) == 2, objName[[1]], NA) + + attrList <- eml2::eml_get(x, "attributeList") + + if(length(attrList) <= 1){ + out <- dplyr::tibble(fileName = objName, + variableName = NA, + description = NA, + unitText = NA) + } else { + attr <- eml2::get_attributes(attrList) + + if(is.null(attr$attributes$unit)){ + attr$attributes$unit <- NA + } + + #set datetime format as unitText if available + if(!is.null(attr$attributes$formatString)){ + na_units <- is.na(attr$attributes$unit) + attr$attributes$unit[na_units] <- attr$attributes$formatString[na_units] + } + + #get missing value info in text form: + missing_val <- dplyr::tibble(missingValueCode = c(attr$attributes$missingValueCode, "NA"), + missingValueCodeExplanation = c(attr$attributes$missingValueCodeExplanation, "NA")) %>% + dplyr::distinct() %>% + stats::na.omit() + + missing_val_text <- paste(missing_val$missingValueCode, + missing_val$missingValueCodeExplanation, + sep = " = ", + collapse = "; ") + + out <- dplyr::tibble(fileName = objName, + variableName = attr$attributes$attributeName, + description = paste0(attr$attributes$attributeDefinition, + "; missing values: ", missing_val_text), + unitText = attr$attributes$unit) + } + + return(out) +} + +#' Get attributes from EML +#' +#' Return EML attributes in the dataspice attributes.csv format. +#' +#' @param eml (emld) an EML object +#' @param path (character) folder path for saving the table to disk +#' +#' @export +#' +#' @import dplyr +#' @importFrom readr write_csv +#' +#' @examples +#' \dontrun{ +#' eml_path <- system.file("example-dataset/broodTable_metadata.xml", package = "dataspice") +#' eml <- read_eml(eml_path) +#' es_attributes(eml) +#' } + +es_attributes <- function(eml, path = NULL) { + entities <- get_entities(eml) + attrTables <- lapply(entities, get_attributes_spice) + + out <- dplyr::bind_rows(attrTables) %>% + filter(!is.na(variableName)) + + if(!is.null(path)){ + if(!dir.exists(path)){ + dir.create(path) + } + readr::write_csv(out, file.path(path, "attributes.csv")) + } + + return(out) +} + +#' Get biblio from EML +#' +#' Return EML biblio in the dataspice biblio.csv format. +#' +#' @param eml (emld) an EML object +#' @param path (character) folder path for saving the table to disk +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' eml_path <- system.file("example-dataset/broodTable_metadata.xml", package = "dataspice") +#' eml <- read_eml(eml_path) +#' es_biblio(eml) +#' } + +es_biblio <- function(eml, path = NULL) { + biblio_eml <- eml %>% + unlist() %>% + tibble::enframe() %>% + dplyr::mutate(name = dplyr::case_when( + grepl("dataset.title", name) ~ "title", + grepl("abstract", name) ~ "description", + grepl("pubDate", name) ~ "datePublished", + grepl("packageId", name) ~ "identifier", + grepl("keyword", name) ~ "keywords", + grepl("intellectual", name) ~ "license", + grepl("fund", name) ~ "funder", + grepl("geographicDescription", name) ~ "geographicDescription", + grepl("northBoundingCoordinate", name) ~ "northBoundCoord", + grepl("eastBoundingCoordinate", name) ~ "eastBoundCoord", + grepl("southBoundingCoordinate", name) ~ "southBoundCoord", + grepl("westBoundingCoordinate", name) ~ "westBoundCoord", + #wktString? + grepl("beginDate|singleDateTime", name) ~ "startDate", + grepl("endDate", name) ~ "endDate" + )) %>% + stats::na.omit() %>% + dplyr::group_by(name) %>% + dplyr::summarize(value = paste(value, collapse = "; ")) %>% + tidyr::spread(name, value) + + #reorder + fields <- c("title", "description", "datePublished", "citation", "keywords", "license", "funder", "geographicDescription", "northBoundCoord", "eastBoundCoord", "southBoundCoord", "westBoundCoord", "wktString", "startDate", "endDate") + + + out <- biblio_eml[, fields[fields %in% colnames(biblio_eml)]] + + if(!is.null(path)){ + if(!dir.exists(path)){ + dir.create(path) + } + readr::write_csv(out, file.path(path, "biblio.csv")) + } + + return(out) +} + +#' Get creators from EML +#' +#' Return EML creators in the dataspice creators.csv format. +#' +#' @param eml (emld) an EML object +#' @param path (character) folder path for saving the table to disk +#' +#' @importFrom purrr discard +#' @importFrom tibble enframe +#' @importFrom tidyr spread +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' eml_path <- system.file("example-dataset/broodTable_metadata.xml", package = "dataspice") +#' eml <- read_eml(eml_path) +#' es_creators(eml) +#' } + +es_creators <- function(eml, path = NULL) { + people <- get_entities(eml, + entities = c("creator", "contact", "associatedParty", "metadataProvider"), + level_id = c("individualName", "organizationName")) + if(!is.null(names(people))){ + people <- people[names(people) == ""] + } + + people_parsed <- lapply(people, function(x){x %>% + unlist() %>% + tibble::enframe() %>% + dplyr::mutate(name = dplyr::case_when( + grepl("userId.userId", name) ~ "id", + grepl("givenName", name) ~ "givenName", + grepl("surName", name) ~ "familyName", + grepl("organizationName", name) ~ "affiliation", + grepl("electronicMailAddress", name) ~ "email" + )) %>% + stats::na.omit() %>% + # merge fields together if duplicated (ex: givenName1 & givenName2) + group_by(name) %>% + dplyr::summarize(value = paste(value, collapse = " ")) %>% + tidyr::spread(name, value) + }) + + out <- dplyr::bind_rows(people_parsed) %>% + dplyr::distinct() + + fields <- c("id", "givenName", "familyName", "affiliation", "email") + out <- out[, fields[fields %in% colnames(out)]] + + if(!is.null(path)){ + if(!dir.exists(path)){ + dir.create(path) + } + readr::write_csv(out, file.path(path, "creators.csv")) + } + + return(out) +} + +#' Get dataspice tabular formats from EML +#' +#' Return EML in the dataspice dataframes. +#' +#' @param eml (emld) an EML object +#' @param path (character) folder path for saving the table to disk +#' +#' @export +#' +#' @examples +#' \dontrun{ +#' eml_path <- system.file("example-dataset/broodTable_metadata.xml", package = "dataspice") +#' eml <- read_eml(eml_path) +#' my_spice <- eml_to_spice(eml, ".") +#' } + +eml_to_spice <- function(eml, path = NULL) { + out <- list(attributes = es_attributes(eml, path), + access = es_access(eml, path), + biblio = es_biblio(eml, path), + creators = es_creators(eml, path)) + + invisible(out) +} + diff --git a/R/prep_access.R b/R/prep_access.R new file mode 100644 index 0000000..7c9da11 --- /dev/null +++ b/R/prep_access.R @@ -0,0 +1,42 @@ +#' Prepare access +#' +#' Extract variableNames for a given data file and add them to the attributes.csv +#' @param data_path path to the data folder. Defaults to "data" and R 'data' file types +#' @param access_path path to the access.csv file. Defaults to "data/metadata/access.csv". +#' +#' @return the functions writes out the updated access.csv file to access_path. +#' @export +prep_access <- function(data_path = here::here("data"), + access_path = here::here("data", "metadata", + "access.csv") + ){ + + if(!file.exists(data_path)){stop("invalid path to data folder")} + if(!file.exists(access_path)){ + stop("access file does not exist. Check path or run create_spice?")} + + access <- readr::read_csv(access_path) + + # read file info + fileNames <- tools::list_files_with_exts(data_path, + exts = c("csv", "tsv"), + full.names = TRUE) + fileTypes <- vapply(fileNames, tools::file_ext) + + if(all(basename(fileNames) %in% unique(access$fileName))){ + stop("Entries already exist in access.csv for fileNames: ", + paste(basename(fileNames), collapse = ", ")) + } + + access <- tibble::add_row(access, + fileName = basename(fileNames), + name = basename(fileNames), + contentUrl = NA, + fileFormat = fileTypes) + + + readr::write_csv(access, path = access_path) + message("The following fileNames have been added to the access file: ", + paste(basename(fileNames), collapse = ", ")) +} + diff --git a/README.Rmd b/README.Rmd index e17b56e..f49d61f 100644 --- a/README.Rmd +++ b/README.Rmd @@ -115,27 +115,28 @@ Completed metadata tables in this example will look like this: `access.csv` has one row for each file ```{r, echo=FALSE, message=FALSE} -readr::read_csv(system.file("metadata-tables/access.csv", package = "dataspice")) %>% head() %>% kable() +readr::read_csv(system.file("metadata-tables/access.csv", package = "dataspice")) %>% head() %>% kable(format = "markdown") ``` `attributes.csv` has one row for each variable in each file ```{r, echo=FALSE, message=FALSE} -readr::read_csv(system.file("metadata-tables/attributes.csv", package = "dataspice")) %>% head() %>% kable() +readr::read_csv(system.file("metadata-tables/attributes.csv", package = "dataspice")) %>% head() %>% kable(format = "markdown") ``` `biblio.csv` is one row containing descriptors including spatial and temporal coverage -```{r, echo=FALSE, message=FALSE} +```{r, echo=FALSE, message=FALSE, warning=FALSE} readr::read_csv(system.file("metadata-tables/biblio.csv", package = "dataspice")) %>% dplyr::mutate(description = str_trunc(description, 200, side = "right")) %>% - kable() + kable(format = "markdown") ``` `creators.csv` has one row for each of the dataset authors ```{r, echo=FALSE, message=FALSE} -readr::read_csv(system.file("metadata-tables/creators.csv", package = "dataspice")) %>% kable() +readr::read_csv(system.file("metadata-tables/creators.csv", package = "dataspice")) %>% + kable(format = "markdown") ``` diff --git a/README.md b/README.md index 3dd16a4..e60f10b 100644 --- a/README.md +++ b/README.md @@ -200,335 +200,120 @@ Completed metadata tables in this example will look like this: `access.csv` has one row for each file - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-fileName - -name - -contentUrl - -fileFormat -
-StockInfo.csv - -StockInfo.csv - -NA - -CSV -
-BroodTables.csv - -BroodTables.csv - -NA - -CSV -
-SourceInfo.csv - -SourceInfo.csv - -NA - -CSV -
+| fileName | name | contentUrl | fileFormat | +|:----------------|:----------------|:-----------|:-----------| +| StockInfo.csv | StockInfo.csv | NA | CSV | +| BroodTables.csv | BroodTables.csv | NA | CSV | +| SourceInfo.csv | SourceInfo.csv | NA | CSV | + `attributes.csv` has one row for each variable in each file - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
-fileName - -variableName - -description - -unitText -
-BroodTables.csv - -Stock.ID - -Unique stock identifier - -NA -
-BroodTables.csv - -Species - -species of stock - -NA -
-BroodTables.csv - -Stock - -Stock name, generally river where stock is found - -NA -
-BroodTables.csv - -Ocean.Region - -Ocean region - -NA -
-BroodTables.csv - -Region - -Region of stock - -NA -
-BroodTables.csv - -Sub.Region - -Sub.Region of stock - -NA -
+| fileName | variableName | description | unitText | +|:----------------|:-------------|:-------------------------------------------------|:---------| +| BroodTables.csv | Stock.ID | Unique stock identifier | NA | +| BroodTables.csv | Species | species of stock | NA | +| BroodTables.csv | Stock | Stock name, generally river where stock is found | NA | +| BroodTables.csv | Ocean.Region | Ocean region | NA | +| BroodTables.csv | Region | Region of stock | NA | +| BroodTables.csv | Sub.Region | Sub.Region of stock | NA | + `biblio.csv` is one row containing descriptors including spatial and temporal coverage - +
+++++++++++++++++ - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + +
-title - -description - -datePublished - -citation - -keywords - -license - -funder - -geographicDescription - -northBoundCoord - -eastBoundCoord - -southBoundCoord - -westBoundCoord - -wktString - -startDate - -endDate -
titledescriptiondatePublishedcitationkeywordslicensefundergeographicDescriptionnorthBoundCoordeastBoundCoordsouthBoundCoordwestBoundCoordwktStringstartDateendDate
-Compiled annual statewide Alaskan salmon escapement counts, 1921-2017 - -The number of mature salmon migrating from the marine environment to freshwater streams is defined as escapement. Escapement data are the enumeration of these migrating fish as they pass upstream, ... - -2018-02-12 08:00:00 - -NA - -salmon, alaska, escapement - -NA - -NA - -NA - -78 - --131 - -47 - --171 - -NA - -1921-01-01 08:00:00 - -2017-01-01 08:00:00 -
Compiled annual statewide Alaskan salmon escapement counts, 1921-2017The number of mature salmon migrating from the marine environment to freshwater streams is defined as escapement. Escapement data are the enumeration of these migrating fish as they pass upstream, ...2018-02-12 08:00:00NAsalmon, alaska, escapementNANANA78-13147-171NA1921-01-01 08:00:002017-01-01 08:00:00
+ `creators.csv` has one row for each of the dataset authors +++++++ - - - - - - + + + + + + - - - - - - + + + + + + - - - - - - + + + + + +
-id - -givenName - -familyName - -affiliation - -email -
idgivenNamefamilyNameaffiliationemail
-NA - -Jeanette - -Clark - -National Center for Ecological Analysis and Synthesis - - -
NAJeanetteClarkNational Center for Ecological Analysis and Synthesisjclark@nceas.ucsb.edu
-NA - -Rich - -Brenner - -Alaska Department of Fish and Game - -richard.brenner.alaska.gov -
NARichBrennerAlaska Department of Fish and Gamerichard.brenner.alaska.gov
+ ### Save json-ld file - `write_spice()` generates a json-ld file ("linked data") to aid in [dataset discovery](https://developers.google.com/search/docs/data-types/dataset), creation of more extensive metadata (e.g. [EML](https://knb.ecoinformatics.org/#api)), and creating a website. diff --git a/dataspice.Rproj b/dataspice.Rproj index 497f8bf..270314b 100644 --- a/dataspice.Rproj +++ b/dataspice.Rproj @@ -18,3 +18,4 @@ StripTrailingWhitespace: Yes BuildType: Package PackageUseDevtools: Yes PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/man/eml_to_spice.Rd b/man/eml_to_spice.Rd new file mode 100644 index 0000000..4cedcd4 --- /dev/null +++ b/man/eml_to_spice.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml_to_spice.R +\name{eml_to_spice} +\alias{eml_to_spice} +\title{Get dataspice tabular formats from EML} +\usage{ +eml_to_spice(eml, path = NULL) +} +\arguments{ +\item{eml}{(emld) an EML object} + +\item{path}{(character) folder path for saving the table to disk} +} +\description{ +Return EML in the dataspice dataframes. +} +\examples{ +\dontrun{ +eml_path <- system.file("example-dataset/broodTable_metadata.xml", package = "dataspice") +eml <- read_eml(eml_path) +my_spice <- eml_to_spice(eml, ".") +} +} diff --git a/man/es_access.Rd b/man/es_access.Rd new file mode 100644 index 0000000..3b10851 --- /dev/null +++ b/man/es_access.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml_to_spice.R +\name{es_access} +\alias{es_access} +\title{Get access from EML} +\usage{ +es_access(eml, path = NULL) +} +\arguments{ +\item{eml}{(emld) an EML object} + +\item{path}{(character) folder path for saving the table to disk} +} +\description{ +Return EML access in the dataspice access.csv format. +} +\examples{ +\dontrun{ +eml_path <- system.file("example-dataset/broodTable_metadata.xml", package = "dataspice") +eml <- read_eml(eml_path) +es_access(eml) +} +} diff --git a/man/es_attributes.Rd b/man/es_attributes.Rd new file mode 100644 index 0000000..0af395c --- /dev/null +++ b/man/es_attributes.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml_to_spice.R +\name{es_attributes} +\alias{es_attributes} +\title{Get attributes from EML} +\usage{ +es_attributes(eml, path = NULL) +} +\arguments{ +\item{eml}{(emld) an EML object} + +\item{path}{(character) folder path for saving the table to disk} +} +\description{ +Return EML attributes in the dataspice attributes.csv format. +} +\examples{ +\dontrun{ +eml_path <- system.file("example-dataset/broodTable_metadata.xml", package = "dataspice") +eml <- read_eml(eml_path) +es_attributes(eml) +} +} diff --git a/man/es_biblio.Rd b/man/es_biblio.Rd new file mode 100644 index 0000000..6523a27 --- /dev/null +++ b/man/es_biblio.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml_to_spice.R +\name{es_biblio} +\alias{es_biblio} +\title{Get biblio from EML} +\usage{ +es_biblio(eml, path = NULL) +} +\arguments{ +\item{eml}{(emld) an EML object} + +\item{path}{(character) folder path for saving the table to disk} +} +\description{ +Return EML biblio in the dataspice biblio.csv format. +} +\examples{ +\dontrun{ +eml_path <- system.file("example-dataset/broodTable_metadata.xml", package = "dataspice") +eml <- read_eml(eml_path) +es_biblio(eml) +} +} diff --git a/man/es_creators.Rd b/man/es_creators.Rd new file mode 100644 index 0000000..dd532f8 --- /dev/null +++ b/man/es_creators.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/eml_to_spice.R +\name{es_creators} +\alias{es_creators} +\title{Get creators from EML} +\usage{ +es_creators(eml, path = NULL) +} +\arguments{ +\item{eml}{(emld) an EML object} + +\item{path}{(character) folder path for saving the table to disk} +} +\description{ +Return EML creators in the dataspice creators.csv format. +} +\examples{ +\dontrun{ +eml_path <- system.file("example-dataset/broodTable_metadata.xml", package = "dataspice") +eml <- read_eml(eml_path) +es_creators(eml) +} +} diff --git a/man/prep_access.Rd b/man/prep_access.Rd new file mode 100644 index 0000000..432e716 --- /dev/null +++ b/man/prep_access.Rd @@ -0,0 +1,20 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/prep_access.R +\name{prep_access} +\alias{prep_access} +\title{Prepare access} +\usage{ +prep_access(data_path = here::here("data"), access_path = here::here("data", + "metadata", "access.csv")) +} +\arguments{ +\item{data_path}{path to the data folder. Defaults to "data" and R 'data' file types} + +\item{access_path}{path to the access.csv file. Defaults to "data/metadata/access.csv".} +} +\value{ +the functions writes out the updated access.csv file to access_path. +} +\description{ +Extract variableNames for a given data file and add them to the attributes.csv +} diff --git a/tests/testthat/test-eml_to_spice.R b/tests/testthat/test-eml_to_spice.R new file mode 100644 index 0000000..3e4f33a --- /dev/null +++ b/tests/testthat/test-eml_to_spice.R @@ -0,0 +1,139 @@ +context("test-eml_to_spice") +library(dplyr) + +eml_path <- system.file("example-dataset/broodTable_metadata.xml", package = "dataspice") +eml <- eml2::read_eml(eml_path) + +test_that("Access tabular format matches EML", { + eml_access <- es_access(eml) + + objectNames <- eml2::eml_get(eml, "objectName") %>% paste(collapse = " ") + urls <- eml2::eml_get(eml, "url") %>% paste(collapse = " ") + formatNames <- eml2::eml_get(eml, "formatName") %>% paste(collapse = " ") + + expect_true(all(stringr::str_detect(objectNames, eml_access$fileName), + na.rm = TRUE)) + expect_true(all(stringr::str_detect(urls, eml_access$contentUrl), + na.rm = TRUE)) + expect_true(all(stringr::str_detect(formatNames, eml_access$fileFormat), + na.rm = TRUE)) +}) + + +test_that("Attributes tabular format matches EML", { + eml_attributes <- es_attributes(eml) + + objectNames <- eml2::eml_get(eml, "objectName") %>% paste(collapse = " ") + attributeNames <- eml2::eml_get(eml, "attributeName") %>% paste(collapse = " ") + + expect_true(all(stringr::str_detect(objectNames, eml_attributes$fileName), + na.rm = TRUE)) + expect_true(all(stringr::str_detect(attributeNames, eml_attributes$variableName), + na.rm = TRUE)) + + standardUnits <- eml2::eml_get(eml, "standardUnit") + customUnits <- eml2::eml_get(eml, "customUnit") + formatStrings <- eml2::eml_get(eml, "formatString") + unitText <- paste(standardUnits, customUnits, formatStrings, collapse = " ") + + expect_true(all(stringr::str_detect(unitText, eml_attributes$unitText), + na.rm = TRUE)) + + #description = description + missing vals +}) + +#additional tests: +#units match defintions/etc +#entity names match attributes + +test_that("Biblio tabular format matches EML", { + eml_biblio <- es_biblio(eml) + + #title + expect_equal(eml$dataset$title, + eml_biblio$title) + + #date published + expect_equal(eml$dataset$pubDate, + eml_biblio$datePublished) + + #license/intellectual rights + expect_equal(eml$dataset$intellectualRights[[1]], + eml_biblio$license) + + #funding/funder + expect_equal(paste(unlist(eml$dataset$project$funding), collapse = "; ") %>% nchar(), + eml_biblio$funder %>% nchar()) + + #geographic coverage + expect_equal(eml$dataset$coverage$geographicCoverage$geographicDescription, + eml_biblio$geographicDescription) + + #check coordinates via sum instead of one-by-one + expect_equal(eml$dataset$coverage$geographicCoverage$boundingCoordinates %>% + unlist() %>% + as.numeric() %>% + sum(), + eml_biblio %>% + select(contains("Coord")) %>% + as.numeric() %>% + sum()) + + #temporal coverage + expect_equal(eml$dataset$coverage$temporalCoverage$rangeOfDates$beginDate$calendarDate, + eml_biblio$startDate) + expect_equal(eml$dataset$coverage$temporalCoverage$rangeOfDates$endDate$calendarDate, + eml_biblio$endDate) +}) + +test_that("Creators tabular format matches EML", { + eml_creators <- es_creators(eml) + + orcids <- eml2::eml_get(eml, "userId") %>% paste(collapse = " ") + givenNames <- eml2::eml_get(eml, "givenName") %>% paste(collapse = " ") + surNames <- eml2::eml_get(eml, "surName") %>% paste(collapse = " ") + affiliations <- eml2::eml_get(eml, "organizationName") %>% paste(collapse = " ") + emails <- eml2::eml_get(eml, "electronicMailAddress") %>% paste(collapse = " ") + + # expect_true(all(stringr::str_detect(orcids, eml_creators$id), + # na.rm = TRUE)) + # doesn't work if no orcids available + + expect_true(all(stringr::str_detect(givenNames, eml_creators$givenName), + na.rm = TRUE)) + + expect_true(all(stringr::str_detect(surNames, eml_creators$familyName), + na.rm = TRUE)) + + expect_true(all(stringr::str_detect(affiliations, eml_creators$affiliation), + na.rm = TRUE)) + + expect_true(all(stringr::str_detect(emails, eml_creators$email), + na.rm = TRUE)) +}) + +test_that("eml_to_spice files write to disk", { + dir_path <- tempdir() + eml_to_spice(eml, dir_path) + + files <- list.files(dir_path, + pattern = "access|attributes|biblio|creators", + full.names = TRUE) + + expect_true(any(grepl("access.csv", files))) + expect_true(any(grepl("attributes.csv", files))) + expect_true(any(grepl("biblio.csv", files))) + expect_true(any(grepl("creators.csv", files))) + + file.remove(files) +}) + +test_that("eml_to_spice returns a list of tibbles", { + spice_ex <- eml_to_spice(eml) + + expect_equal(length(spice_ex), 4) + + tbl_lgl <- spice_ex %>% purrr::map(class) %>% purrr::map(~"tbl" %in% .) + expect_true(all(unlist(tbl_lgl))) +}) +