Skip to content

Commit

Permalink
feat(R): Add compilation of CodeChunks
Browse files Browse the repository at this point in the history
  • Loading branch information
nokome authored and beneboy committed Sep 2, 2019
1 parent 3cee6d8 commit 68a183e
Show file tree
Hide file tree
Showing 7 changed files with 289 additions and 19 deletions.
11 changes: 7 additions & 4 deletions r/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,13 @@ Suggests:
Roxygen: list(markdown = TRUE)
RoxygenNote: 6.1.1.9000
Collate:
'read_funcs.R'
'nse_funcs.R'
'compile.R'
'datatable.R'
'entity.R'
'node.R'
'stencila.R'
'util.R'
'typing.R'
'types.R'
'node.R'
'entity.R'
'datatable.R'
'util.R'
23 changes: 21 additions & 2 deletions r/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -2,45 +2,61 @@

S3method(as.data.frame,Datatable)
S3method(print,Entity)
export(ArraySchema)
export(Article)
export(AudioObject)
export(BlockContent)
export(BooleanSchema)
export(Brand)
export(Cite)
export(CiteGroup)
export(Code)
export(CodeBlock)
export(CodeChunk)
export(CodeExpr)
export(CodeExpression)
export(CodeFragment)
export(Collection)
export(ConstantSchema)
export(ContactPoint)
export(CreativeWork)
export(CreativeWorkTypes)
export(Datatable)
export(DatatableColumn)
export(DatatableColumnSchema)
export(Delete)
export(Emphasis)
export(Entity)
export(EnumSchema)
export(Environment)
export(Figure)
export(Heading)
export(ImageObject)
export(Include)
export(InlineContent)
export(IntegerSchema)
export(Link)
export(List)
export(ListItem)
export(Mark)
export(MediaObject)
export(Mount)
export(Node)
export(NumberSchema)
export(Organization)
export(Paragraph)
export(Parameter)
export(Periodical)
export(Person)
export(Product)
export(PublicationIssue)
export(PublicationVolume)
export(Quote)
export(QuoteBlock)
export(ResourceParameters)
export(SchemaTypes)
export(SoftwareApplication)
export(SoftwareSession)
export(SoftwareSourceCode)
export(StringSchema)
export(Strong)
export(Subscript)
export(Superscript)
Expand All @@ -49,10 +65,13 @@ export(TableCell)
export(TableRow)
export(ThematicBreak)
export(Thing)
export(TupleSchema)
export(VideoObject)
export(as.Datatable.data.frame)
export(datatable_from_dataframe)
export(datatable_to_dataframe)
export(entity_from_list)
export(node_from_json)
export(node_to_json)
export(nse_funcs)
export(read_funcs)
161 changes: 161 additions & 0 deletions r/R/compile.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#' @include nse_funcs.R
#' @include read_funcs.R

#' Names of function that assign
assign_func_names <- c("assign", "base::assign", "<<-", "<-", "=")

#' Names of functions that "import" packages
import_func_names <- c("library", "require", "::", ":::")

#' Names of function in the base R environment
base_func_names <- ls(baseenv())

# Temporary
Function <- function(...) list(type = "Function", ...)

compile_chunk <- function(chunk) {
language <- chunk$language
text <- chunk$text
imports <- NULL
declares <- NULL
assigns <- NULL
alters <- NULL
uses <- NULL
reads <- NULL

# Only handle R code
if (is.null(language) || !(language %in% c("r", "R"))) return(chunk)

# Parse the code into an AST
ast <- as.list(parse(text = text))

# Record assignments that are local
# to functions, they need to be considered
# for `uses`, but not for `assigns`
local_assigns <- NULL

ast_walker <- function(node, depth = 0) {
if (is.symbol(node)) {
name <- as.character(node)
if (!(name %in% assigns)) uses <<- unique(c(uses, name))
} else if (is.call(node)) {
# Resolve the function name
func <- node[[1]]
if (is.symbol(func)) {
# 'Normal' function call
# Add function to `uses` if it is is not in base environment
func_name <- as.character(func)
if (!(func_name %in% base_func_names)) {
uses <<- unique(c(uses, func_name))
}
} else if (is.call(func) && func[[1]] == "::") {
# Call of namespaced function e.g pkg::func
# Do not add these to `uses`
func_name <- paste0(func[[2]], "::", func[[3]])
} else {
# No func_name for other more complex calls
# that do not need to be detected below e.g. instance$method()
func_name <- ""
}

if (func_name == "$") {
# Only walk the left side, not the right since they are symbols to
# extract from an object so should not be included in `uses`
ast_walker(node[[2]], depth)
return()
} else if (func_name == "function") {
# Function definition
# Walk the body with incremented depth
ast_walker(node[[length(node)]], depth + 1)
return()
} else if (func_name %in% assign_func_names) {
left <- node[[2]]
right <- node[[3]]
if (is.call(right) && right[[1]] == "function") {
# Assignment of a function
# Treat as a declaration
func_decl <- Function(
name = as.character(left)
)
if (!is.null(right[[2]])) {
parameters <- NULL
params <- as.list(right[[2]])
print(names(params))
for (name in names(params)) {
param <- params[[name]]
parameters <- c(parameters, list(
Parameter(
name = name
)
))
}
func_decl$parameters <- parameters
}
declares <<- c(declares, list(func_decl))

# Only walk the function so that left is not made a `uses`
ast_walker(right, depth)
return()
} else if (func == "assign") {
# Assignment using `assign` function
# TODO: Check the `pos` arg relative to current depth
assigns <<- unique(c(assigns, left))
} else if (is.symbol(left) && depth == 0) {
# Assignment to a name
assigns <<- unique(c(assigns, as.character(left)))
} else if (is.call(left)) {
# Assignment to an existing object e.g. a$b[1] <- NULL
# Recurse until we find the variable that is target of alteration
walk <- function(node) {
target <- node[[2]]
if (is.symbol(target)) {
if (is.null(alters) || !(target %in% alters)) {
alters <<- c(alters, as.character(target))
}
} else {
walk(target)
}
}
walk(target)
}
} else if (func_name %in% import_func_names) {
# Package import
# Get the names of the package
if (length(node) > 1) {
imports <<- unique(c(imports, as.character(node[[2]])))
}
} else if (func_name %in% read_funcs_names) {
# File read
# Collect relevant argument(s) from function call
args <- as.list(node[2:length(node)])
read_func_index <- floor((match(func_name, read_funcs_names) - 1) / 2) + 1
read_func <- read_funcs[[read_func_index]]
if (any(read_func$names %in% names(args))) {
files <- unlist(args[read_func$names])
} else {
files <- unlist(args[read_func$positions])
}
# Only use character arguments i.e. not symbols (variable names)
files <- files[is.character(files)]
if (length(files) > 0) reads <<- unique(c(reads, files))
}
}

# If there are child nodes, walk over them too
if (length(node) > 1) {
lapply(node[2:length(node)], ast_walker, depth)
}
}
lapply(ast, ast_walker)

list(
language = language,
text = text,
imports = imports,
declares = declares,
assigns = assigns,
alters = alters,
uses = uses,
reads = reads
)
}
30 changes: 30 additions & 0 deletions r/R/nse_funcs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#' Module for defining a list of non-standard evaluation functions,
#' functions that use `substitute()` (or related) on one or more arguments.
#' See http://adv-r.had.co.nz/Computing-on-the-language.html.
#' This list is used when R code is compiled to ignore some
#' variable names when determining the `uses` property
#' of the chunk.

#' Create a entry for a function that uses NSE
#'
#' @param func Name of the function
#' @param package Name of the package that the function is in
#' @param names Names of parameters that should be ignored
#' @param positions Positions of parameters that should be ignored
nse_func <- function(func, package, names=NULL, positions=NULL) {
list(
func = func,
package = package,
names = names,
positions = positions
)
}

#' List of functions that read from files
#' @export
nse_funcs <- list(
# base package
nse_func("base", "subset", c("subset", "select"), c(2, 4)),
# dplyr package
nse_func("dplyr", "filter")
)
54 changes: 54 additions & 0 deletions r/R/read_funcs.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#' Module for defining a list of "read" functions, functions
#' that read files from the filesystem. This list is used
#' when R code is compiled to help determine the `reads` propery
#' of the chunk

#' Create a entry for a function that reads a file
#'
#' Most file reading functions have the file path as their first
#' parameter named `file`. If this is not the case, or if there
#' is more than one parameter that relates to a file that is read
#' by the function, use the `names` and `positions` parameters
#'
#' @param package Name of the package that the function is in
#' @param func Name of the function
#' @param names Names of parameters that are file paths that are read
#' @param positions Positions of parameters that are file paths that are read
read_func <- function(package, func, names="file", positions=1) {
list(
package = package,
func = func,
names = names,
positions = positions
)
}

#' List of functions that read from files
#' @export
read_funcs <- list(
# utils package
read_func("utils", "read.table"),
read_func("utils", "read.csv"),
read_func("utils", "read.csv2"),
read_func("utils", "read.delim"),
read_func("utils", "read.delim2"),
read_func("utils", "read.fwf"),
# foreign package
read_func("foreign", "read.arff"),
read_func("foreign", "read.dbf"),
read_func("foreign", "read.dta"),
read_func("foreign", "read.epiinfo"),
read_func("foreign", "read.mtp"),
read_func("foreign", "read.octave"),
read_func("foreign", "read.spss"),
read_func("foreign", "read.ssd"),
read_func("foreign", "read.systat"),
read_func("foreign", "read.xport")
)

#' List of possible function call names to match
read_funcs_names <- Reduce(
function(prev, curr) c(prev, curr$func, paste0(curr$package, "::", curr$func)),
read_funcs,
character()
)
2 changes: 2 additions & 0 deletions r/R/types.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
# Do not modify it by hand. Instead, modify the source `.schema.yaml` files
# in the `schema` directory and run `npm run build:r` to regenerate it.

#' @include typing.R

#' The most basic item, defining the minimum properties required.
#'
#' @name Entity
Expand Down
27 changes: 14 additions & 13 deletions r/R/util.R
Original file line number Diff line number Diff line change
@@ -1,25 +1,26 @@
# Utility functions used internally in this package
# and not intended to be exported.

#' Map a function across entries in a node
#' Map a function across entries in an object
#'
#' This is analagous to `Object.entries(node).map(...)`
#' This is analagous to `Object.entries(object).map(...)`
#' in Javascript. It handles bother scalar and vector
#' node types.
#' object types.
#'
#' @param node The node to map over
#' @param func The function to apply to each of the node's entries
#' @param object The object to map over
#' @param func The function to apply to each of the object's entries
#' @param ... Additional arguments to pass through to the function
map <- function(node, func, ...) {
if (is.list(node)) lapply(node, func, ...)
else func(node, ...)
map <- function(object, func, ...) {
if (is.list(object)) lapply(object, func, ...)
else func(object, ...)
}

#' Transform a node by recursively applying a function to it.
#' Create a transformattion of a object by recursively
#' applying a function to it. Could be called `deepMap`.
#'
#' @param node The node to map over
#' @param func The function to apply to each node
#' @param object The object to map over
#' @param func The function to apply to each object
#' @param ... Additional arguments to pass through to the function
transform <- function(node, func, ...) {
map(node, function(child) map(child, func, ...))
transform <- function(object, func, ...) {
map(object, function(child) map(child, func, ...))
}

0 comments on commit 68a183e

Please sign in to comment.