Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(add_list)
export(add_pkg)
export(add_repo)
export(add_tar_index)
export(build)
export(file_packager)
export(make_library)
Expand Down
8 changes: 7 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
# rwasm (development version)

* Support for a new `compression` argument in `build()`, `add_pkg()`, `make_vfs_library()`, and other related functions. When enabled, VFS images will be compressed using `gzip`. Note: Loading compressed VFS images requires at least version 0.4.1 of webR (#39).
## New features

* When building R packages with `compress` set to `TRUE`, use the binary R package `.tgz` file for the Emscripten filesystem image data and generate custom metadata rather than using Emscripten's `file_packager` tool.

* Support for a new `compress` argument in `file_packager()`, `make_vfs_library()`, and other related functions. When enabled, VFS images will be compressed using `gzip` (#39).

Note: Mounting processed `.tgz` archives or compressed VFS images requires at least version 0.4.2 of webR.

# rwasm 0.1.0

Expand Down
27 changes: 16 additions & 11 deletions R/build.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ build <- function(packages,
out_dir = ".",
remotes = NULL,
dependencies = FALSE,
compress = FALSE) {
compress = TRUE) {
tmp_dir <- tempfile()
on.exit(unlink(tmp_dir, recursive = TRUE))
dir.create(tmp_dir)
Expand Down Expand Up @@ -215,16 +215,21 @@ wasm_build <- function(pkg, tarball_path, contrib_bin, compress) {
bin_dest <- fs::path(contrib_bin, paste0(pkg, "_", bin_ver, ".tgz"))
fs::file_copy(bin_path, bin_dest, overwrite = TRUE)

# Build an Emscripten filesystem image for the package
tmp_bin_dir <- fs::path(tempfile())
on.exit(unlink(tmp_bin_dir, recursive = TRUE), add = TRUE)
untar(bin_dest, exdir = tmp_bin_dir)
file_packager(
fs::dir_ls(tmp_bin_dir)[[1]],
contrib_bin,
fs::path_file(bin_dest),
compress
)
if (compress) {
# Use binary .tgz file to build Emscripten filesystem image metadata
add_tar_index(bin_dest, strip = 1)
} else {
# Build an uncompressed Emscripten filesystem image for the package
tmp_bin_dir <- fs::path(tempfile())
on.exit(unlink(tmp_bin_dir, recursive = TRUE), add = TRUE)
untar(bin_dest, exdir = tmp_bin_dir)
file_packager(
fs::dir_ls(tmp_bin_dir)[[1]],
contrib_bin,
fs::path_file(bin_dest),
compress = FALSE
)
}

invisible(NULL)
}
14 changes: 9 additions & 5 deletions R/lib.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,14 @@ make_library <- function(repo_dir = "./repo", lib_dir = "./lib", strip = NULL) {
#'
#' Each filesystem image is generated using Emscripten's [file_packager()] tool
#' and the output `.data` and `.js.metadata` filesystem image files are written
#' to the repository in the same directory as the package binary `.tar.gz`
#' files.
#' to the repository in the same directory as the package binary `.tgz` files.
#'
#' The resulting filesystem images may then be used by webR to download and
#' install R packages faster by mounting the `.data` images to the Emscripten
#' virtual filesystem, rather than decompressing and extracting the equivalent
#' `.tar.gz` files.
#' install R packages by mounting the `.data` images to the Emscripten virtual
#' filesystem.
#'
#' When `compress` is `TRUE`, an additional file with extension `".data.gz"` is
#' also output containing a compressed version of the filesystem data.
#'
#' @inheritParams add_pkg
#'
Expand Down Expand Up @@ -100,6 +101,9 @@ make_vfs_repo <- function(repo_dir = "./repo", compress = FALSE) {
#' tool and the output `.data` and `.js.metadata` filesystem image files are
#' written to the directory `out_dir`.
#'
#' When `compress` is `TRUE`, an additional file with extension `".data.gz"` is
#' also output containing a compressed version of the filesystem data.
#'
#' The resulting image can be downloaded by webR and mounted on the Emscripten
#' virtual filesystem as an efficient way to provide a pre-configured R library,
#' without installing each R package individually.
Expand Down
18 changes: 10 additions & 8 deletions R/repo.R
Original file line number Diff line number Diff line change
Expand Up @@ -76,20 +76,22 @@ add_list <- function(list_file, ...) {
#' source. Defaults to `NA`, meaning prefer a built-in list of references to
#' packages pre-modified for use with webR.
#' @param dependencies Dependency specification for packages to additionally
#' add to the repository. Defaults to `FALSE`, meaning no additional packages.
#' Use `NA` to install only hard dependencies whereas `TRUE` installs all
#' optional dependencies as well. See [pkgdepends::as_pkg_dependencies]
#' for details.
#' @inheritParams file_packager
#'
#' add to the repository. Defaults to `FALSE`, meaning no additional packages.
#' Use `NA` to install only hard dependencies whereas `TRUE` installs all
#' optional dependencies as well. See [pkgdepends::as_pkg_dependencies]
#' for details.
#' @param compress When `TRUE`, add and compress Emscripten virtual filesystem
#' metadata in the resulting R package binary `.tgz` files. Otherwise,
#' [file_packager()] is used to create uncompressed virtual filesystem images
#' included in the output binary package repository. Defaults to `TRUE`.
#' @importFrom dplyr rows_update select
#' @importFrom pkgdepends new_pkg_download_proposal
#' @export
add_pkg <- function(packages,
repo_dir = "./repo",
remotes = NA,
dependencies = FALSE,
compress = FALSE) {
compress = TRUE) {
# Set up pkgdepends configuration
config <- ppm_config
config$dependencies <- dependencies
Expand Down Expand Up @@ -185,7 +187,7 @@ prefer_remotes <- function(package_info, remotes = NA) {
update_repo <- function(package_info,
remotes = NA,
repo_dir = "./repo",
compress = FALSE) {
compress = TRUE) {
r_version <- R_system_version(getOption("rwasm.webr_version"))

writeLines(sprintf("Processing %d package(s).", nrow(package_info)))
Expand Down
229 changes: 229 additions & 0 deletions R/tar.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
#' Add Emscripten virtual filesystem metadata to a given `tar` archive
#'
#' Calculates file offsets and other metadata for content stored in an
#' (optionally gzip compressed) `tar` archive. Once added, the `tar` archive
#' with metadata can be mounted as an Emscripten filesystem image, making the
#' contents of the archive available to the WebAssembly R process.
#'
#' The virtual filesystem metadata is appended to the end of the `tar` archive,
#' with the output replacing the original file. The resulting archive should be
#' hosted online so that its URL can be provided to webR for mounting on the
#' virtual filesystem.
#'
#' If `strip` is greater than `0` the virtual filesystem metadata is generated
#' such that when mounted by webR the specified number of leading path elements
#' are removed. Useful for R package binaries where data files are stored in the
#' original `.tgz` file under a subdirectory. Files with fewer path name
#' elements than the specified amount are skipped.
#'
#' @param file Filename of the `tar` archive for which metadata is to be added.
#' @param strip Remove the specified number of leading path elements when
#' mounting with webR. Defaults to `0`.
#' @export
add_tar_index <- function(file, strip = 0) {
file <- fs::path_norm(file)
file_ext <- tolower(fs::path_ext(file))
file_base <- fs::path_ext_remove(file)

message(paste("Appending virtual filesystem metadata for:", file))

# Check if our tar is compatible
if (!any(file_ext == c("tgz", "gz", "tar"))) {
stop(paste0("Can't make index for \"", file,
"\". Only uncompressed or `gzip` compressed tar files can be indexed."))
}

# Handle two-component extensions
if (file_ext == "gz") {
file_base <- fs::path_ext_remove(file_base)
}

# Read archive contents, decompressing if necessary
gzip <- any(file_ext == c("tgz", "gz"))
data <- readBin(file, "raw", n = file.size(file))
if (gzip) {
data <- memDecompress(data)
}

# Build metadata from source .tar file
con <- rawConnection(data, open = "rb")
on.exit(close(con), add = TRUE)
entries <- read_tar_offsets(con, strip)
tar_end <- seek(con)

metadata <- list(
files = entries,
gzip = gzip,
remote_package_size = length(data)
)

# Add metadata as additional .tar entry
entry <- create_metadata_entry(metadata)
json_block <- as.integer(tar_end / 512) + 1L

# Append additional metadata hint for webR
magic <- charToRaw('webR')
reserved <- raw(4) # reserved for future use
block <- writeBin(json_block, raw(), size = 4, endian = "big")
len <- writeBin(entry$length, raw(), size = 4, endian = "big")
hint <- c(magic, reserved, block, len)

# Build new .tar archive data
data <- c(data[1:tar_end], entry$data, raw(1024), hint)

# Write output and move into place
out <- tempfile()
out_con <- if (gzip) {
gzfile(out, open = "wb", compression = 9)
} else {
file(out, open = "wb")
}
writeBin(data, out_con, size = 1L)
close(out_con)
fs::file_copy(out, file, overwrite = TRUE)
}

create_metadata_entry <- function(metadata) {
# metadata contents
json <- charToRaw(jsonlite::toJSON(metadata, auto_unbox = TRUE))
len <- length(json)
blocks <- ceiling(len/512)
length(json) <- 512 * blocks

# entry header
timestamp <- as.integer(Sys.time())
header <- raw(512)
header[1:15] <- charToRaw('.vfs-index.json') # filename
header[101:108] <- charToRaw('0000644 ') # mode
header[109:116] <- charToRaw('0000000 ') # uid
header[117:124] <- charToRaw('0000000 ') # gid
header[125:136] <- charToRaw(sprintf("%011o ", len)) # length
header[137:148] <- charToRaw(sprintf("%011o ", timestamp)) # timestamp
header[149:156] <- charToRaw(' ') # placeholder
header[157:157] <- charToRaw('0') # type
header[258:262] <- charToRaw('ustar') # ustar magic
header[264:265] <- charToRaw('00') # ustar version
header[266:269] <- charToRaw('root') # user
header[298:302] <- charToRaw('wheel') # group

# populate checksum field
checksum <- raw(8)
checksum[1:6] <- charToRaw(sprintf("%06o", sum(as.integer(header))))
checksum[8] <- charToRaw(' ')
header[149:156] <- checksum

list(data = c(header, json), length = len)
}

read_tar_offsets <- function(con, strip) {
entries <- list()
next_filename <- NULL

while (TRUE) {
# Read tar entry header block
header <- readBin(con, "raw", n = 512)

# Basic tar filename
filename <- rawToChar(header[1:100])

# Empty header indicates end of archive, early exit for existing metadata
if (all(header == 0) || filename == ".vfs-index.json") {
# Return connection position to just before this header
seek(con, -512, origin = "current")
break
}

# Entry size and offset
offset <- seek(con)
size <- strtoi(sub("\\s.*", "", rawToChar(header[125:136])), 8)
file_blocks <- ceiling(size / 512)

# Skip directories, global, and vendor-specific extended headers
type <- rawToChar(header[157])
if (grepl("5|g|[A-Z]", type)) {
next
}

# Handle PAX extended header
if (type == "x") {
pax_data <- readBin(con, "raw", n = 512 * ceiling(size / 512))
pax_data <- pax_data[1:max(which(pax_data != as.raw(0x00)))]
lines <- raw_split(pax_data, "\n")
for (line in lines) {
payload <- raw_split(line, " ")[[2]]
kv <- raw_split(payload, "=")
if (rawToChar(kv[[1]]) == "path") {
next_filename <- rawToChar(kv[[2]])
break
}
}
next
}

# Apply ustar formatted extended filename
magic <- rawToChar(header[258:263])
if (magic == "ustar"){
prefix <- rawToChar(header[346:501])
filename <- paste(prefix, filename, sep = "/")
}

# Apply PAX formatted extended filename
if (!is.null(next_filename)) {
filename <- next_filename
next_filename <- NULL
}

# Strip path elements, ignoring leading slash, skip if no path remains
if (strip > 0) {
filename <- gsub("^/", "", filename)
parts <- fs::path_split(filename)[[1]]
parts <- parts[-strip:-1]
if (length(parts) == 0) {
seek(con, 512 * file_blocks, origin = "current")
next
}
filename <- fs::path_join(c("/", parts))
}

# Calculate file offsets
entry <- list(filename = filename, start = offset, end = offset + size)

# Deal with hard and symbolic links
if (grepl("1|2", type)) {
link_name <- rawToChar(header[158:257])
if (type == "2") {
link_name <- fs::path_norm(fs::path(fs::path_dir(filename), link_name))
}
link_entry <- Find(\(e) e$filename == link_name, entries)
entry$start = link_entry$start
entry$end = link_entry$end
file_blocks <- 0
}

entries <- append(entries, list(entry))

# Skip to next entry header
seek(con, 512 * file_blocks, origin = "current")
}
entries
}

# Split the elements of a raw vector x according to matches of element `split`
raw_split <- function(x, split) {
if (is.character(split)) {
split <- charToRaw(split)
}

start <- 1
out <- list()
for (end in which(x == split)) {
out <- c(out, list(x[start:(end - 1)]))
start <- end + 1
}

if (start <= length(x)) {
out <- c(out, list(x[start:length(x)]))
}

out
}
3 changes: 2 additions & 1 deletion _pkgdown.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
url: https://r-wasm.github.io/rwasm/
template:
bootstrap: 5

deploy:
install_metadata: true
13 changes: 13 additions & 0 deletions inst/pkgdown.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
pandoc: '3.2'
pkgdown: 2.0.9.9000
pkgdown_sha: 34ee692e4ce10c8abfb863cc782da771838558f7
articles:
github-actions: github-actions.html
mount-fs-image: mount-fs-image.html
mount-host-dir: mount-host-dir.html
rwasm: rwasm.html
tar-metadata: tar-metadata.html
last_built: 2024-09-10T15:29Z
urls:
reference: https://r-wasm.github.io/rwasm/reference
article: https://r-wasm.github.io/rwasm/articles
6 changes: 4 additions & 2 deletions man/add_list.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading