Skip to content

Commit

Permalink
Merge pull request #14 from mrc-ide/mrc-5020
Browse files Browse the repository at this point in the history
Add orderly db metadata
  • Loading branch information
r-ash committed Feb 5, 2024
2 parents e8cb4cd + 6742db9 commit 653e5fa
Show file tree
Hide file tree
Showing 5 changed files with 191 additions and 19 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: outpack.orderly
Title: Orderly to outpack metadata migration
Version: 0.1.2
Version: 0.1.3
Authors@R: c(person("Rich", "FitzJohn", role = c("aut", "cre"),
email = "rich.fitzjohn@gmail.com"),
person("Imperial College of Science, Technology and Medicine",
Expand Down
98 changes: 81 additions & 17 deletions R/migrate.R
Original file line number Diff line number Diff line change
Expand Up @@ -106,28 +106,14 @@ orderly_metadata_to_outpack <- function(path, hash_algorithm) {
found <- dir(path, recursive = TRUE, all.files = TRUE, no.. = TRUE)
extra <- setdiff(found, c(files, ignore))

if (is.null(data$meta$depends)) {
depends <- NULL
} else {
## Seen in rtm_incoming_serology/20200603-204022-70b8bfa5
if (is.null(data$meta$depends$index)) {
data$meta$depends$index <- as.integer(factor(data$meta$depends$id))
}
depends <- unname(lapply(
split(data$meta$depends, data$meta$depends$index), function(x) {
list(packet = x$id[[1]],
query = x$id_requested[[1]],
files = data_frame(here = x$as,
there = x$filename))
}))
}

parameters <- data$meta$parameters
if (inherits(parameters, "data.frame")) {
## Seen in native-201910-201710-compare-impact/20200603-103158-9a8cb992
parameters <- as.list(parameters)
}

depends <- archive_migrate_depends(data$meta$depends, names(parameters))

script <- data$meta$file_info_inputs$filename[
data$meta$file_info_inputs$file_purpose == "script"]
session <- orderly2:::orderly_session_info(data$session_info)
Expand Down Expand Up @@ -160,14 +146,16 @@ orderly_metadata_to_outpack <- function(path, hash_algorithm) {
role$role[role$role == "global"] <- "shared"

custom <- data$meta$extra_fields
custom <- custom[!vlapply(custom, function(x) is.null(x) || is.na(x))]
if (!is.null(custom)) {
custom <- lapply(custom, scalar)
}

orderly_db <- orderly_db_metadata_to_outpack(path, data)

oo <- options(outpack.schema_validate = TRUE)
on.exit(options(oo))

## TODO: also get the orderly.db bits added here.
orderly <- list(
artefacts = artefacts,
shared = shared,
Expand All @@ -180,6 +168,12 @@ orderly_metadata_to_outpack <- function(path, hash_algorithm) {
orderly_json <- orderly2:::to_json(orderly, "orderly/orderly.json")

custom <- list(list(application = "orderly", data = orderly_json))
if (!is.null(orderly_db)) {
orderly_db_json <- orderly2:::to_json(orderly_db,
"orderly.db/orderly.db.json")
custom$orderly.db <- list(application = "orderly.db",
data = orderly_db_json)
}
json <- orderly2:::outpack_metadata_create(
path = path, name = name, id = id, time = time, files = files,
depends = depends, parameters = parameters, custom = custom,
Expand All @@ -202,3 +196,73 @@ check_complete_tree <- function(path) {
stop("orderly graph is incomplete")
}
}


orderly_db_metadata_to_outpack <- function(path, data) {
ret <- list()

view <- data$meta$view
if (!is.null(view)) {
ret$view <- lapply(seq_len(nrow(view)), function(i) {
database <- view$database[[i]]
list(database = scalar(database),
instance = scalar(data$meta$instance[[database]]),
as = scalar(view$name[[i]]),
query = scalar(view$query[[i]]))
})
}

query <- data$meta$data
if (!is.null(query)) {
path_data <- file.path(dirname(dirname(dirname(path))), "data/rds")
ret$query <- lapply(seq_len(nrow(query)), function(i) {
d <- readRDS(file.path(path_data, paste0(query$hash[[i]], ".rds")))
database <- query$database[[i]]
list(database = scalar(database),
instance = scalar(data$meta$instance[[database]]),
name = scalar(query$name[[i]]),
query = scalar(query$query[[i]]),
rows = scalar(nrow(d)),
cols = names(d))
})
}

connection <- data$meta$connection
if (isTRUE(connection)) {
## Turns out we never saved this information properly anyway:
yml <- orderly1:::yaml_read(file.path(path, "orderly.yml"))
config <- orderly1::orderly_config(file.path(path, "../../.."), FALSE)
con <- orderly1:::recipe_migrate(yml, config, filename)$connection
ret$connection <- lapply(unname(con), function(database) {
list(database = scalar(database),
instance = scalar(data$meta$instance[[database]]))
})
}

if (length(ret) == 0) NULL else ret
}


archive_migrate_depends <- function(depends, parameters) {
if (is.null(depends)) {
return(NULL)
}
## Seen in rtm_incoming_serology/20200603-204022-70b8bfa5
if (is.null(depends$index)) {
depends$index <- as.integer(factor(depends$id))
}
unname(lapply(
split(depends, depends$index), function(x) {
if (x$id_requested %in% c("latest", "latest()")) {
query <- sprintf('latest(name == "%s")', x$name)
} else if (grepl("latest\\(", x$id_requested)) {
str <- sub(")$", sprintf(' && name == "%s")', x$name), x$id_requested)
query <- src_migrate_query(str, parameters)
} else {
query <- x$id_requested[[1]]
}
list(packet = x$id[[1]],
query = query,
files = data_frame(here = x$as, there = x$filename))
}))
}
2 changes: 1 addition & 1 deletion R/src.R
Original file line number Diff line number Diff line change
Expand Up @@ -280,7 +280,7 @@ src_migrate_db_data <- function(cfg, dat) {
ret <- character(0)
for (i in names(dat$data)) {
x <- dat$data[[i]]
args <- c(query = x$query)
args <- c(query = x$query, name = i)
if (!is.null(x$database)) {
args[["database"]] <- x$database
}
Expand Down
49 changes: 49 additions & 0 deletions tests/testthat/test-migrate.R
Original file line number Diff line number Diff line change
Expand Up @@ -134,3 +134,52 @@ test_that("can update archive", {
root = dst)
expect_true(id %in% ids)
})


test_that("dependency migration", {
depends <- data.frame(
id = "a",
index = 1,
name = "foo",
id_requested = "latest",
as = "here.csv",
filename = "there.csv")
expect_equal(
archive_migrate_depends(depends, NULL),
list(list(packet = "a",
query = 'latest(name == "foo")',
files = data_frame(here = "here.csv", there = "there.csv"))))
})


test_that("dependency migration with fixed id", {
depends <- data.frame(
id = "a",
index = 1,
name = "foo",
id_requested = "20240202-111943-fa53e980",
as = "here.csv",
filename = "there.csv")
expect_equal(
archive_migrate_depends(depends, NULL),
list(list(packet = "a",
query = "20240202-111943-fa53e980",
files = data_frame(here = "here.csv", there = "there.csv"))))
})


test_that("dependency migration with fancy query", {
depends <- data.frame(
id = "a",
index = 1,
name = "foo",
id_requested = "latest(x == parameter:y)",
as = "here.csv",
filename = "there.csv")
parameters <- list(x = 1)
expect_equal(
archive_migrate_depends(depends, "x"),
list(list(packet = "a",
query = 'latest(this:x == parameter:y && name == "foo")',
files = data_frame(here = "here.csv", there = "there.csv"))))
})
59 changes: 59 additions & 0 deletions tests/testthat/test-zzz-integration.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
test_that("migrating source then archive is same as archive then source", {
src1 <- orderly_demo_archive()
suppressMessages(dst1 <- orderly2outpack(src1, tempfile()))

src2 <- orderly_demo_src()
suppressMessages(orderly2outpack_src(src2, delete_yml = TRUE, strict = TRUE))
dat <- orderly1:::read_demo_yml(src2)
for (i in seq_along(dat)) {
x <- dat[[i]]
if (!is.null(x$before)) {
withr::with_dir(src2, x$before())
}
env <- new.env(parent = .GlobalEnv)
expect_no_error(suppressMessages(
dat[[i]]$id <- orderly2::orderly_run(x$name, x$parameters, envir = env,
root = src2, echo = FALSE)))
}

## Need to copy over configuration to the archive migration, because
## otherwise we don't get custom deserialisers for orderly.db, which
## is a bit annoying.
file.copy(file.path(src2, "orderly_config.yml"),
file.path(dst1, "orderly_config.yml"), overwrite = TRUE)

id1 <- sort(dir(file.path(dst1, ".outpack", "metadata")))
id2 <- vcapply(dat, "[[", "id")
expect_length(id2, length(id1))

for (i in seq_along(id1)) {
meta1 <- orderly2::orderly_metadata(id1[[i]], root = dst1)
meta2 <- orderly2::orderly_metadata(id2[[i]], root = src2)
expect_setequal(names(meta1), names(meta2))
expect_equal(meta1$schema_version, meta2$schema_version)
expect_equal(meta1$name, meta2$name)
expect_equal(meta1$parameters, meta2$parameters)

## Can't check id, files, time as we don't expect these to be the
## same. The git cases are empty, and depends needs some work to
## remap ids.
depends2 <- meta2$depends
depends2$packet <- id1[match(depends2$packet, id2)]
expect_equal(meta1$depends, depends2)

expect_equal(meta1$git, meta2$git)

## custom metadata
expect_setequal(names(meta1$custom), names(meta2$custom))

orderly1 <- meta1$custom$orderly
orderly2 <- meta2$custom$orderly
expect_setequal(names(orderly1), names(orderly2))
expect_equal(orderly1$artefacts, orderly2$artefacts)
expect_equal(orderly1$shared, orderly2$shared)
expect_equal(orderly1$description, orderly2$description)
## Can't check role or session; these are expected to differ.

expect_equal(meta1$custom$orderly.db, meta2$custom$orderly.db)
}
})

0 comments on commit 653e5fa

Please sign in to comment.