Join GitHub today
GitHub is home to over 50 million developers working together to host and review code, manage projects, and build software together.
Sign up| #' Syntax highlight and link a md document | |
| #' | |
| #' @description | |
| #' `downlit_md_*` works by traversing the markdown AST generated by Pandoc. | |
| #' It applies [highlight()] to `CodeBlock`s and [autolink()] to inline `Code`. | |
| #' | |
| #' Use `downlit_md_path()` to transform a file on disk; use | |
| #' `downlit_md_string()` to transform a string containing markdown as part | |
| #' of a larger pipeline. | |
| #' | |
| #' Needs pandoc 1.19 or later. | |
| #' | |
| #' @export | |
| #' @param in_path,out_path Input and output paths for markdown file. | |
| #' @param x A string containing markdown. | |
| #' @param format Pandoc format; defaults to "gfm" if you have pandoc 2.0.0 or | |
| #' greater, otherwise "markdown_github". | |
| #' @return `downlit_md_path()` invisibly returns `output_path`; | |
| #' `downlit_md_string()` returns a string containing markdown. | |
| #' @examples | |
| #' if (rmarkdown::pandoc_available("1.19")) { | |
| #' downlit_md_string("`base::t()`") | |
| #' downlit_md_string("`base::t`") | |
| #' downlit_md_string("* `base::t`") | |
| #' | |
| #' # But don't highlight in headings | |
| #' downlit_md_string("## `base::t`") | |
| #' } | |
| downlit_md_path <- function(in_path, out_path, format = NULL) { | |
| check_packages() | |
| ast_path <- tempfile() | |
| on.exit(unlink(ast_path)) | |
| md2ast(in_path, ast_path, format = format) | |
| ast <- jsonlite::read_json(ast_path) | |
| ast$blocks <- transform_code(ast$blocks, ast_version(ast)) | |
| jsonlite::write_json(ast, ast_path, auto_unbox = TRUE, null = "null") | |
| ast2md(ast_path, out_path, format = format) | |
| } | |
| #' @export | |
| #' @rdname downlit_md_path | |
| downlit_md_string <- function(x, format = NULL) { | |
| check_packages() | |
| path <- tempfile() | |
| on.exit(unlink(path)) | |
| brio::write_lines(x, path) | |
| downlit_md_path(path, path, format = format) | |
| brio::read_file(path) | |
| } | |
| # Markdown <-> pandoc AST ------------------------------------------------- | |
| md2ast <- function(path, out_path, format = NULL) { | |
| format <- format %||% md_format() | |
| rmarkdown::pandoc_convert( | |
| input = normalizePath(path, mustWork = FALSE), | |
| output = normalizePath(out_path, mustWork = FALSE), | |
| from = format, | |
| to = "json" | |
| ) | |
| invisible(out_path) | |
| } | |
| ast2md <- function(path, out_path, format = NULL) { | |
| format <- format %||% md_format() | |
| options <- c( | |
| if (rmarkdown::pandoc_available("2.0")) "--eol=lf", | |
| "--atx-headers", # 1.19 | |
| "--wrap=none" # 1.16 | |
| ) | |
| rmarkdown::pandoc_convert( | |
| input = normalizePath(path, mustWork = FALSE), | |
| output = normalizePath(out_path, mustWork = FALSE), | |
| from = "json", | |
| to = format, | |
| options = options | |
| ) | |
| invisible(out_path) | |
| } | |
| ast_version <- function(ast) { | |
| string <- paste(unlist(ast$`pandoc-api-version`), collapse = ".") | |
| package_version(string) | |
| } | |
| md_format <- function() { | |
| if (rmarkdown::pandoc_available("2.0.0")) { | |
| "gfm" | |
| } else { | |
| "markdown_github" | |
| } | |
| } | |
| # Code transformation ----------------------------------------------------- | |
| # Data types at | |
| # https://hackage.haskell.org/package/pandoc-types-1.20/docs/Text-Pandoc-Definition.html | |
| transform_code <- function(x, version) { | |
| stopifnot(is.list(x)) | |
| # Blocks that are a list of blocks | |
| block_list <- c( | |
| # Block | |
| "Plain", "Para", "LineBlock", "BlockQuote", "BulletList", | |
| # Inline | |
| "Emph", "Strong", "Strikeout", "Superscript", "Subscript", | |
| "SmallCaps", "Note", "Underline" | |
| ) | |
| # Blocks that have a list of blocks as second child | |
| block_list2 <- c( | |
| "OrderedList", "Quoted", | |
| "Div", "Span", | |
| "Caption", "TableHead", "TableFoot", "Row" | |
| ) | |
| skip <- c( | |
| "Header", "CodeBlock", "RawBlock", "HorizontalRule", "Null", | |
| "Math", "RawInline", "Link", "Image", "Cite", | |
| "Str", "Space", "SoftBreak", "LineBreak" | |
| ) | |
| if (!is_named(x)) { | |
| lapply(x, transform_code, version = version) | |
| } else { | |
| if (x$t == "Code") { | |
| package_name <- extract_curly_package(x$c[[2]]) | |
| # packages à la {pkgname} | |
| if(!is.na(package_name)) { | |
| href <- href_package(package_name) | |
| if (!is.na(href)) { | |
| x <- list(t = "Str", c = package_name) | |
| x <- pandoc_link(pandoc_attr(), list(x), pandoc_target(href)) | |
| } # otherwise we do not touch x | |
| } else { | |
| # other cases | |
| href <- autolink_url(x$c[[2]]) | |
| if (!is.na(href)) { | |
| x <- pandoc_link(pandoc_attr(), list(x), pandoc_target(href)) | |
| } | |
| } | |
| } else if (x$t == "CodeBlock") { | |
| out <- highlight(x$c[[2]], pre_class = "chroma") | |
| if (!is.na(out)) { | |
| x <- pandoc_raw_block("html", out) | |
| } | |
| } else if (x$t %in% block_list) { | |
| # Plain [Inline] | |
| # Para [Inline] | |
| # LineBlock [[Inline]] | |
| # BlockQuote [Block] | |
| # BulletList [[Block]] | |
| # Emph [Inline] | |
| # Strong [Inline] | |
| # Strikeout [Inline] | |
| # Superscript [Inline] | |
| # Subscript [Inline] | |
| # SmallCaps [Inline] | |
| # Note [Block] | |
| # Underline [Inline] <v1.21> | |
| x$c <- lapply(x$c, transform_code, version = version) | |
| } else if (x$t %in% block_list2) { | |
| # OrderedList ListAttributes [[Block]] | |
| # Quoted QuoteType [Inline] | |
| # Div Attr [Block] | |
| # Span Attr [Inline] | |
| # TableHead Attr [Row] <v1.21> | |
| # TableFoot Attr [Row] <v1.21> | |
| # Caption (Maybe ShortCaption) [Block] <v1.21> | |
| x$c[[2]] <- lapply(x$c[[2]], transform_code, version = version) | |
| } else if (x$t %in% "Table") { | |
| if (version >= "1.21") { | |
| # Attr Caption [ColSpec] TableHead [TableBody] TableFoot | |
| x$c[c(2, 4, 5, 6)] <- lapply(x$c[c(2, 4, 5, 6)], transform_code, version = version) | |
| } else { | |
| # [Inline] [Alignment] [Double] [TableCell] [[TableCell]] | |
| x$c[c(1, 4, 5)] <- lapply(x$c[c(1, 4, 5)], transform_code, version = version) | |
| } | |
| } else if (x$t %in% "TableBody") { | |
| # Attr RowHeadColumns [Row] [Row] <v1.21> | |
| x$c[c(3, 4)] <- lapply(x$c[c(3, 4)], transform_code, version = version) | |
| } else if (x$t %in% "Cell") { | |
| # Attr Alignment RowSpan ColSpan [Block] | |
| x$c[[5]] <- lapply(x$c[[5]], transform_code, version = version) | |
| } else if (x$t %in% "DefinitionList") { | |
| # DefinitionList [([Inline], [[Block]])] | |
| x$c <- lapply(x$c, | |
| function(x) list( | |
| transform_code(x[[1]], version = version), | |
| transform_code(x[[2]], version = version) | |
| ) | |
| ) | |
| } else if (x$t %in% skip) { | |
| } else { | |
| inform(paste0("Unknown type: ", x$t)) | |
| } | |
| x | |
| } | |
| } | |
| # Pandoc AST constructors ------------------------------------------------- | |
| pandoc_node <- function(type, ...) { | |
| list(t = type, c = list(...)) | |
| } | |
| pandoc_raw_block <- function(format, text) { | |
| # Format Text | |
| pandoc_node("RawBlock", format, text) | |
| } | |
| pandoc_link <- function(attr, contents, target) { | |
| # Attr [Inline] Target | |
| pandoc_node("Link", attr, contents, target) | |
| } | |
| pandoc_attr <- function(id = "", classes = list(), keyval = list()) { | |
| list(id, classes, keyval) | |
| } | |
| pandoc_target <- function(url, title = "") { | |
| list(url, title) | |
| } | |
| # Helpers ----------------------------------------------------------------- | |
| check_packages <- function() { | |
| if (!is_installed("rmarkdown") || !is_installed("jsonlite")) { | |
| abort("rmarkdown and jsonlite required for .md transformation") | |
| } | |
| } |