Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

initial work on knitr python engine #107

Merged
merged 22 commits into from Oct 20, 2017
Merged
Changes from all commits
Commits
File filter...
Filter file types
Jump to…
Jump to file or symbol
Failed to load files and symbols.

Always

Just for now

@@ -52,13 +52,15 @@ export(conda_list)
export(conda_remove)
export(conda_version)
export(dict)
export(eng_python)
export(import)
export(import_builtins)
export(import_from_path)
export(import_main)
export(iter_next)
export(iterate)
export(np_array)
export(py)
export(py_available)
export(py_call)
export(py_capture_output)
@@ -0,0 +1,228 @@
#' A reticulate Engine for Knitr
#'
#' This provides a `reticulate` engine for `knitr`, suitable for usage when
#' attempting to render Python chunks. Using this engine allows for shared state
#' between Python chunks in a document -- that is, variables defined by one
#' Python chunk can be used by later Python chunks.
#'
#' The engine can be activated by setting (for example)
#'
#' ```
#' knitr::knit_engines$set(python = reticulate::eng_python)
#' ```
#'
#' Typically, this will be set within a document's setup chunk, or by the
#' environment requesting that Python chunks be processed by this engine.
#'
#' @param options
#' Chunk options, as provided by `knitr` during chunk execution.
#'
#' @export
eng_python <- function(options) {

engine.path <- if (is.list(options[["engine.path"]]))
options[["engine.path"]][["python"]]
else
options[["engine.path"]]

# if the user has requested a custom Python, attempt
# to honor that request (warn if Python already initialized
# to a different version)
if (is.character(engine.path)) {

# if Python has not yet been loaded, then try
# to load it with the requested version of Python
if (!py_available())
use_python(engine.path, required = TRUE)

# double-check that we've loaded the requested Python
conf <- py_config()
requestedPython <- normalizePath(engine.path)
actualPython <- normalizePath(conf$python)
if (requestedPython != actualPython) {
fmt <- "cannot honor request to use Python %s [%s already loaded]"
msg <- sprintf(fmt, requestedPython, actualPython)
warning(msg, immediate. = TRUE, call. = FALSE)
}
}

context <- new.env(parent = emptyenv())
eng_python_initialize(
options,
context = context,
envir = environment()
)

ast <- import("ast", convert = TRUE)

# helper function for extracting range of code, dropping blank lines
extract <- function(code, range) {
snippet <- code[range[1]:range[2]]
paste(snippet[nzchar(snippet)], collapse = "\n")
}

# extract the code to be run -- we'll attempt to run the code line by line
# and detect changes so that we can interleave code and output (similar to
# what one sees when executing an R chunk in knitr). to wit, we'll do our
# best to emulate the return format of 'evaluate::evaluate()'
code <- options$code
n <- length(code)
if (n == 0)
return(list())

# use 'ast.parse()' to parse Python code and collect line numbers, so we
# can split source code into statements
pasted <- paste(code, collapse = "\n")
parsed <- ast$parse(pasted, "<string>")

# iterate over top-level nodes and extract line numbers
lines <- vapply(parsed$body, function(node) {
node$lineno
}, integer(1))

# convert from lines to ranges
starts <- lines
ends <- c(lines[-1] - 1, length(code))
ranges <- mapply(c, starts, ends, SIMPLIFY = FALSE)

# line index from which source should be emitted
pending_source_index <- 1

# actual outputs to be returned to knitr
outputs <- list()

# synchronize state R -> Python
eng_python_synchronize_before()

for (range in ranges) {

# evaluate current chunk of code
snippet <- extract(code, range)
captured <- py_capture_output(
py_run_string(snippet, convert = FALSE)
)

if (nzchar(captured) || length(context$pending_plots)) {

# append pending source to outputs
outputs[[length(outputs) + 1]] <- structure(
list(src = extract(code, c(pending_source_index, range[2]))),
class = "source"
)

# append captured outputs
if (nzchar(captured))
outputs[[length(outputs) + 1]] <- captured

# append captured images / figures
if (length(context$pending_plots)) {
for (plot in context$pending_plots)
outputs[[length(outputs) + 1]] <- plot
context$pending_plots <- list()
}

# update pending source range
pending_source_index <- range[2] + 1
}
}

# if we have leftover input, add that now
if (pending_source_index < n) {
leftover <- extract(code, c(pending_source_index, n))
outputs[[length(outputs) + 1]] <- structure(
list(src = leftover),
class = "source"
)
}

eng_python_synchronize_after()

# TODO: development version of knitr supplies new 'engine_output()'
# interface -- use that when it's on CRAN
# https://github.com/yihui/knitr/commit/71bfd8796d485ed7bb9db0920acdf02464b3df9a
wrap <- yoink("knitr", "wrap")
wrap(outputs, options)

}

eng_python_initialize <- function(options, context, envir) {

if (is.character(options$engine.path))
use_python(options$engine.path[[1]])

eng_python_initialize_matplotlib(options, context, envir)
}

eng_python_initialize_matplotlib <- function(options,
context,
envir)
{
if (!py_module_available("matplotlib"))
return()

# initialize pending_plots list
context$pending_plots <- list()

matplotlib <- import("matplotlib", convert = FALSE)
plt <- matplotlib$pyplot

# rudely steal 'plot_counter' (used below), and reset
# it when we're done
plot_counter <- yoink("knitr", "plot_counter")

This comment has been minimized.

Copy link
@yihui

yihui Oct 12, 2017

Member

I don't quite understand the defer() magic below, but the counter should be reset in eng_python() after wrap() is done (e.g. in on.exit()), otherwise I guess you will always get the same figure file path chunk-label-1.png.

This comment has been minimized.

Copy link
@kevinushey

kevinushey Oct 14, 2017

Author Collaborator

defer() is basically a way of calling on.exit() in an arbitrary frame; ie, I use the fact that this function will only be called within eng_python(), and attach the on.exit() handler there.

This comment has been minimized.

Copy link
@yihui

yihui Oct 14, 2017

Member

Okay, that sounds clever!

This comment has been minimized.

Copy link
@jimhester

jimhester Jan 8, 2018

Member

FWIW defer has been in the CRAN version of withr for a while now as withr::defer() if you don't want to duplicate the code here (but I guess presumably you do?).

This comment has been minimized.

Copy link
@shrektan

shrektan Jan 18, 2018

It's so great to have withr::defer(). Solved my headache elegantly. Thanks.

defer(plot_counter(reset = TRUE), envir = envir)

# save + restore old show hook
show <- plt$show
defer(plt$show <- show, envir = envir)
plt$show <- function(...) {

# write plot to file
path <- knitr::fig_path(options$dev, number = plot_counter())
dir.create(dirname(path), recursive = TRUE, showWarnings = FALSE)
plt$savefig(path, dpi = options$dpi)

# return as a knitr image path
context$pending_plots[[length(context$pending_plots) + 1]] <<-
knitr::include_graphics(path)
}

# set up figure dimensions
plt$rc("figure", figsize = tuple(options$fig.width, options$fig.height))

}

# synchronize objects R -> Python
eng_python_synchronize_before <- function() {

# define our 'R' class
py_run_string("class R(object): pass")

# extract it from the main module
main <- import_main(convert = FALSE)
R <- main$R

# extract active knit environment
.knitEnv <- yoink("knitr", ".knitEnv")
envir <- .knitEnv$knit_global

# define the getters, setters we'll attach to the Python class
getter <- function(self, code) {
r_to_py(eval(parse(text = as_r_value(code)), envir = envir))
}

setter <- function(self, name, value) {
envir[[as_r_value(name)]] <<- as_r_value(value)
}

py_set_attr(R, "__getattr__", getter)
py_set_attr(R, "__setattr__", setter)
py_set_attr(R, "__getitem__", getter)
py_set_attr(R, "__setitem__", setter)

# now define the R object
py_run_string("r = R()")
}

# synchronize objects Python -> R
eng_python_synchronize_after <- function() {
}
@@ -26,12 +26,6 @@ NULL



.onUnload <- function(libpath) {
if (is_python_initialized())
py_finalize();
}


is_python_initialized <- function() {
!is.null(.globals$py_config)
}
@@ -45,4 +45,14 @@ as_r_value <- function(x) {
x
}

yoink <- function(package, symbol) {
do.call(":::", list(package, symbol))
}

defer <- function(expr, envir = parent.frame()) {
call <- substitute(
evalq(expr, envir = envir),
list(expr = substitute(expr), envir = parent.frame())
)
do.call(base::on.exit, list(substitute(call), add = TRUE), envir = envir)
}
33 R/zzz.R
@@ -0,0 +1,33 @@
#' Interact with the Python Main Module
#'
#' The `py` object provides a means for interacting
#' with the Python main session directly from \R.
#'
#' @format An \R object acting as an interface to the
#' Python main module.
#'
#' @export
"py"

This comment has been minimized.

Copy link
@jjallaire

jjallaire Oct 20, 2017

Member

I think it would be preferable if we made this available only during execution of R chunks in knitr. I have often used py <- import_builtins() in example code so exporting py might conflict with this. I also don't think that library(reticulate) should be required in order to access Python chunks. Do you think this is reasonable or do you feel strongly about making it globally available?


.onLoad <- function(libname, pkgname) {
main <- NULL
makeActiveBinding("py", env = asNamespace(pkgname), function() {

# return main module if already initialized
if (!is.null(main))
return(main)

# attempt to initialize main
if (is_python_initialized())
main <<- import_main()

# return value of main
main

})
}

.onUnload <- function(libpath) {
if (is_python_initialized())
py_finalize();
}

Some generated files are not rendered by default. Learn more.

Some generated files are not rendered by default. Learn more.

@@ -0,0 +1,6 @@
*.md
*.html
*.pdf

figure/
example_files/
ProTip! Use n and p to navigate between commits in a pull request.
You can’t perform that action at this time.