Skip to content

Commit

Permalink
feat(R microkernel): Handle conversion of data frames and plots
Browse files Browse the repository at this point in the history
  • Loading branch information
nokome committed Dec 6, 2021
1 parent 9ce3329 commit 2af6ad0
Show file tree
Hide file tree
Showing 2 changed files with 286 additions and 28 deletions.
191 changes: 166 additions & 25 deletions rust/kernel-r/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,34 @@ pub fn new() -> MicroKernel {
#[cfg(test)]
mod tests {
use super::*;
use kernel::{eyre::Result, stencila_schema::Node, KernelTrait};
use test_utils::{assert_json_eq, serde_json::json, skip_ci_os};
use kernel::{
eyre::{bail, Result},
stencila_schema::Node,
KernelTrait,
};
use test_utils::{assert_json_eq, serde_json::json};

async fn skip_or_kernel() -> Result<MicroKernel> {
let mut kernel = new();
if !kernel.available().await {
eprintln!("R not available on this machine");
bail!("Skipping")
} else {
kernel.start().await?;
}

Ok(kernel)
}

/// Tests of basic functionality
/// This test is replicated in all the microkernels.
/// Other test should be written for language specific quirks and regressions.
#[tokio::test]
async fn basics() -> Result<()> {
if skip_ci_os("linux", "Failing on Linux CI for unknown reasons")
{
return Ok(());
}

let mut kernel = new();
if !kernel.available().await {
return Ok(());
} else {
kernel.start().await?;
}
let mut kernel = match skip_or_kernel().await {
Ok(kernel) => kernel,
Err(..) => return Ok(()),
};

// Assign a variable and output it
let (outputs, messages) = kernel.exec("a = 2\na").await?;
Expand Down Expand Up @@ -77,18 +86,10 @@ mod tests {
/// Test that an assignment on the last line does not generate an output
#[tokio::test]
async fn assignment_no_output() -> Result<()> {
if skip_ci_os("linux", "Failing on Linux CI for unknown reasons")
|| skip_ci_os("macos", "Hanging on Mac CI")
{
return Ok(());
}

let mut kernel = new();
if !kernel.available().await {
return Ok(());
} else {
kernel.start().await?;
}
let mut kernel = match skip_or_kernel().await {
Ok(kernel) => kernel,
Err(..) => return Ok(()),
};

let (outputs, messages) = kernel.exec("a <- 1").await?;
assert!(messages.is_empty());
Expand All @@ -104,4 +105,144 @@ mod tests {

Ok(())
}

#[tokio::test]
async fn encode_general() -> Result<()> {
let mut kernel = match skip_or_kernel().await {
Ok(kernel) => kernel,
Err(..) => return Ok(()),
};

// Null, booleans, integers, numbers, strings
let (outputs, messages) = kernel
.exec("list(NULL, TRUE, FALSE, 1, 1.23456789, 'str')")
.await?;
assert_json_eq!(messages, json!([]));
assert_json_eq!(
outputs,
json!([[null, [true], [false], [1], [1.23456789], ["str"]]])
);

// Arrays
let (outputs, messages) = kernel.exec("1:5").await?;
assert_json_eq!(messages, json!([]));
assert_json_eq!(outputs, [[1, 2, 3, 4, 5]]);

// Objects
let (outputs, messages) = kernel.exec("list(a=1, b=list(c=2))").await?;
assert_json_eq!(messages, json!([]));
assert_json_eq!(outputs, json!([{"type": "Entity"}]));
// TODO: correct output when deserialization of Node::Object is working
//assert_json_eq!(outputs, json!([{"a": [1], "b": {"c": [2]}}]));

// Matrix
let (outputs, messages) = kernel.exec("matrix(c(1:4), 2, 2)").await?;
assert_json_eq!(messages, json!([]));
assert_json_eq!(outputs, [[[1, 3], [2, 4]]]);

Ok(())
}

#[tokio::test]
async fn encode_dataframes() -> Result<()> {
let mut kernel = match skip_or_kernel().await {
Ok(kernel) => kernel,
Err(..) => return Ok(()),
};

let (outputs, messages) = kernel
.exec(
r#"data.frame(
a = 1:2,
b = c(TRUE, FALSE),
c = c("x", "y"),
d = factor(c("X", "Y"), levels = c("X", "Y", "Z")),
stringsAsFactors = FALSE
)"#,
)
.await?;
assert_json_eq!(messages, json!([]));
let dt = match &outputs[0] {
Node::Datatable(dt) => dt.clone(),
_ => bail!("unexpected type {:?}", outputs[0]),
};
assert_eq!(
dt.columns
.iter()
.map(|column| column.name.as_str())
.collect::<Vec<&str>>(),
vec!["a", "b", "c", "d"]
);
assert_json_eq!(
dt.columns[0].validator.as_ref().unwrap().items_validator,
json!({ "type": "NumberValidator"})
);
assert_json_eq!(
dt.columns[1].validator.as_ref().unwrap().items_validator,
json!({ "type": "BooleanValidator"})
);
assert_json_eq!(
dt.columns[2].validator.as_ref().unwrap().items_validator,
json!({ "type": "StringValidator"})
);
assert_json_eq!(
dt.columns[3].validator.as_ref().unwrap().items_validator,
json!({
"type": "EnumValidator",
"values": ["X", "Y", "Z"]
})
);

let (outputs, messages) = kernel.exec("mtcars").await?;
assert_json_eq!(messages, json!([]));
let dt = match &outputs[0] {
Node::Datatable(dt) => dt.clone(),
_ => bail!("unexpected type {:?}", outputs[0]),
};
assert_eq!(
dt.columns
.iter()
.map(|column| column.name.as_str())
.collect::<Vec<&str>>(),
vec![
"name", "mpg", "cyl", "disp", "hp", "drat", "wt", "qsec", "vs", "am", "gear",
"carb"
]
);
assert_json_eq!(
dt.columns[0].validator.as_ref().unwrap().items_validator,
json!({ "type": "StringValidator"})
);
assert_json_eq!(
dt.columns[1].validator.as_ref().unwrap().items_validator,
json!({ "type": "NumberValidator"})
);

let (outputs, messages) = kernel.exec("chickwts").await?;
assert_json_eq!(messages, json!([]));
let dt = match &outputs[0] {
Node::Datatable(dt) => dt.clone(),
_ => bail!("unexpected type {:?}", outputs[0]),
};
assert_eq!(
dt.columns
.iter()
.map(|column| column.name.as_str())
.collect::<Vec<&str>>(),
vec!["weight", "feed"]
);
assert_json_eq!(
dt.columns[0].validator.as_ref().unwrap().items_validator,
json!({ "type": "NumberValidator"})
);
assert_json_eq!(
dt.columns[1].validator.as_ref().unwrap().items_validator,
json!({
"type": "EnumValidator",
"values": ["casein", "horsebean", "linseed", "meatmeal", "soybean", "sunflower"]
})
);

Ok(())
}
}
123 changes: 120 additions & 3 deletions rust/kernel-r/src/r-codec.r
Original file line number Diff line number Diff line change
@@ -1,14 +1,131 @@
if (!suppressPackageStartupMessages(require("jsonlite", quietly=TRUE)))
install.packages("jsonlite")
install.packages("jsonlite")

# Decode JSON to a R value
decode_value <- function(json) {
jsonlite::fromJSON(json)
fromJSON(json)
}

# Encode an R value to JSON
encode_value <- function(value) {
jsonlite::toJSON(value)
converted <- convert_value(value)
toJSON(converted, null = "null", digits = NA, force = TRUE)
}

# Convert a value prior to encoding
convert_value <- function(value, options = list()) {
# The order of these if statements is important (since for e.g. a data.frame is a list)
if (inherits(value, "Entity")) {
# A Stencila Schema entity so just return it
value
} else if (inherits(value, "recordedplot") || inherits(value, "ggplot")) {
convert_plot(value, options = options)
} else if (inherits(value, "table")) {
# The functions `summary` and `table` return class "table" results
# Currently, just "print" them. In the future, we may convert these to Datatables.
paste(utils::capture.output(print(value)), collapse = "\n")
} else if (is.data.frame(value)) {
# Decode to a Datatable
convert_data_frame(value)
} else if (
is.null(value) ||
is.logical(value) ||
is.numeric(value) ||
is.character(value) ||
is.matrix(value) ||
is.array(value) ||
is.vector(value) ||
is.list(value)
) {
# Return value because, for these types, `toJSON()` will convert
# to the appropriate JSON type e.g. a matrix to an array of arrays
value
} else {
warning(paste("Default conversion for R type:", typeof(value), ", class:", class(value)))
paste(utils::capture.output(print(value)), collapse = "\n")
}
}

# Convert a R plot to an `ImageObject`
convert_plot <- function(value, options = list(), format = "png") {
# Check that a graphics device exists for the requested format
if (!exists(format)) {
log$warn(paste("Unsupported format, defaulting to PNG:", format))
format <- "png"
}

# Create a new graphics device for the format, with
# a temporary path
filename <- tempfile(fileext = paste0(".", format))
width <- try(as.numeric(options$width))
height <- try(as.numeric(options$height))

func <- get(format)
func(
filename,
width = ifelse(is.numeric(width) && length(width) == 1, width, 10),
height = ifelse(is.numeric(height) && length(width) == 1, height, 10),
units = "cm",
res = 150
)
print(value)
grDevices::dev.off()

list(
type = unbox("ImageObject"),
contentUrl = unbox(paste0("data:image/", format, ";base64,", base64enc::base64encode(filename)))
)
}

# Convert a R `data.frame` to a `Datatable`
convert_data_frame <- function(df) {
row_names <- attr(df, "row.names")
if (!identical(row_names, seq_len(nrow(df)))) {
columns <- list(convert_data_frame_column("name", row_names))
} else {
columns <- NULL
}

columns <- c(columns, Filter(function(column) !is.null(column), lapply(colnames(df), function(colname) {
convert_data_frame_column(colname, df[[colname]])
})))

list(
type = unbox("Datatable"),
columns = columns
)
}

# Convert a R `vector` to a `DatatableColumn`
#
# Because a `factor`'s levels are always a character vector, factors are converted into a
# column with `validator.items` of type `EnumValidator` with `values` containing the levels.
convert_data_frame_column <- function(name, object) {
if (is.factor(object)) {
validator <- list(type = unbox("EnumValidator"), values = levels(object))
values <- as.character.factor(object)
} else if (is.logical(object)) {
validator <- list(type = unbox("BooleanValidator"))
values <- object
} else if (is.numeric(object)) {
validator <- list(type = unbox("NumberValidator"))
values <- object
} else if (is.character(object)) {
validator <- list(type = unbox("StringValidator"))
values <- object
} else {
return(NULL)
}

list(
type = unbox("DatatableColumn"),
name = unbox(name),
values = values,
validator = list(type = unbox("ArrayValidator"), itemsValidator = validator)
)
}

# Encode a message to JSON
encode_message <- function(message, type) {
escaped <- gsub('\\"', '\\\\"', message)
escaped <- gsub('\\n', '\\\\n', escaped)
Expand Down

0 comments on commit 2af6ad0

Please sign in to comment.