fix(R Markdown): Implement encoding

stencila · Aug 31, 2021 · b7cb681 · b7cb681
1 parent 9975b42
commit b7cb681
Show file tree

Hide file tree

Showing 6 changed files with 173 additions and 44 deletions.
diff --git a/fixtures/articles/simple.Rmd b/fixtures/articles/simple.Rmd
@@ -3,28 +3,56 @@ title: A simple R Markdown article
 keywords: text, fixture, Markdown
 ---
 
-# Introduction
+This is a relatively simple test article written in R Markdown. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
 
-A simple Markdown article for testing. When making changes please note that test snapshots based on this fixture may need to be updated.
+## Code chunks
 
-# Methods
+A basic code chunk has the language code surrounded by curly braces e.g.
 
-This is the methods section.
+```{r}
+summary(cars)
+```
+
+Chunks can also have a label and options e.g.
+
+```{r pressure, echo=FALSE}
+plot(pressure)
+```
+
+### Using fig.cap option
+
+The `fig.cap` option can be used to set the code chunk caption,
+
+```{r fig1, fig.width=8, fig.cap='A plot'}
+plot(data)
+```
 
-# Results
+```{r table2, fig.cap='A table'}
+head(data)
+```
+
+Bookdown-style `fig.cap` "text references" are also supported,
+
+```{r figure3, fig.cap='(ref:fig3)'}
+# R code here
+```
 
-The results include a table (Table 1).
+(ref:fig3) A caption for figure 3.
 
-| Group | Value |
-| ----- | ----- |
-| A     | 1.1   |
-| B     | 2.2   |
+Some intermediary content
 
-# Discussion
+```{r figure4, fig.cap='(ref:figure-4)'}
+# R code here but non-existent figure reference
+```
 
-This is the discussion section.
+And follow up with a duplicate figure reference
 
-```r
-# Some R code
-a <- 1
+```{r fig5, fig.cap='(ref:fig3)'}
+# R code here
 ```
+
+(ref:fig3) **Distinct and dorsoventrally organized properties of layer 2 stellate cells.**(**A**) Representative action potential after hyperpolarization waveforms from a SC (left), a pyramidal cell (middle) and an unidentified cell (right). The pyramidal and unidentified cells were...
+
+## Code expressions
+
+In R Markdown code expressions are represented using back ticks prefixed with the language e.g. `r 1+2`.
diff --git a/rust/src/methods/decode/rmd.rs b/rust/src/methods/decode/rmd.rs
@@ -5,6 +5,8 @@ use stencila_schema::{
     InlineContent, Node, NontextualAnnotation, Paragraph, Strong, Subscript, Superscript,
 };
 
+const LANGUAGES: &[&str] = &["r", "py", "python", "js", "javascript"];
+
 /// Decode a R Markdown document to a `Node`
 pub fn decode(input: &str) -> Result<Node> {
     let mut node = md::decode(input)?;
@@ -19,23 +21,26 @@ pub fn decode(input: &str) -> Result<Node> {
 fn transform_blocks(blocks: &mut Vec<BlockContent>) {
     for block in blocks {
         match block {
+            // Code blocks with curly braced language are transformed to code chunks
             BlockContent::CodeBlock(CodeBlock {
                 programming_language,
                 text,
                 ..
             }) => {
-                let programming_language = programming_language
+                let lang = programming_language
                     .clone()
                     .map(|boxed| *boxed)
-                    .unwrap_or("".to_string());
-                if programming_language.starts_with("{r") && programming_language.ends_with("}") {
+                    .unwrap_or_else(|| "".to_string());
+                if lang.starts_with('{') && lang.ends_with('}') {
+                    let lang = lang[1..(lang.len() - 1)].to_string();
                     *block = BlockContent::CodeChunk(CodeChunk {
-                        programming_language: "r".to_string(),
+                        programming_language: lang,
                         text: text.to_string(),
                         ..Default::default()
                     })
                 }
             }
+            // Transform the inline content of other block types
             BlockContent::Paragraph(Paragraph { content, .. }) => transform_inlines(content),
             _ => (),
         }
@@ -45,14 +50,17 @@ fn transform_blocks(blocks: &mut Vec<BlockContent>) {
 fn transform_inlines(inlines: &mut Vec<InlineContent>) {
     for inline in inlines {
         match inline {
-            // Code fragments prefixed with `r` get transformed to a CodeExpression
+            // Code fragments prefixed with a language code get transformed to a code expression
             InlineContent::CodeFragment(CodeFragment { text, .. }) => {
-                if let Some(text) = text.strip_prefix("r ") {
-                    *inline = InlineContent::CodeExpression(CodeExpression {
-                        programming_language: "r".to_string(),
-                        text: text.to_string(),
-                        ..Default::default()
-                    })
+                for lang in LANGUAGES {
+                    if let Some(text) = text.strip_prefix(&[lang, " "].concat()) {
+                        *inline = InlineContent::CodeExpression(CodeExpression {
+                            programming_language: lang.to_string(),
+                            text: text.to_string(),
+                            ..Default::default()
+                        });
+                        break;
+                    }
                 }
             }
             // Recursively transform other inlines

diff --git a/rust/src/methods/encode/rmd.rs b/rust/src/methods/encode/rmd.rs
@@ -1,9 +1,67 @@
 use super::md;
 use eyre::Result;
-use stencila_schema::Node;
+use stencila_schema::{
+    BlockContent, CodeBlock, CodeChunk, CodeExpression, CodeFragment, Delete, Emphasis,
+    InlineContent, Node, NontextualAnnotation, Paragraph, Strong, Subscript, Superscript,
+};
 
 /// Encode a `Node` to R Markdown
 pub fn encode(node: &Node) -> Result<String> {
-    // TODO: Any necessary translations of Markdown to RMarkdown
-    md::encode(node)
+    let mut node = node.clone();
+    if let Node::Article(article) = &mut node {
+        if let Some(content) = &mut article.content {
+            transform_blocks(content)
+        }
+    }
+    md::encode(&node)
+}
+
+fn transform_blocks(blocks: &mut Vec<BlockContent>) {
+    for block in blocks {
+        match block {
+            // Code chunks are transformed to code blocks with curly braced language
+            BlockContent::CodeChunk(CodeChunk {
+                programming_language,
+                text,
+                ..
+            }) => {
+                *block = BlockContent::CodeBlock(CodeBlock {
+                    programming_language: Some(Box::new(["{", programming_language, "}"].concat())),
+                    text: text.to_string(),
+                    ..Default::default()
+                })
+            }
+            // Transform the inline content of other block types
+            BlockContent::Paragraph(Paragraph { content, .. }) => transform_inlines(content),
+            _ => (),
+        }
+    }
+}
+
+fn transform_inlines(inlines: &mut Vec<InlineContent>) {
+    for inline in inlines {
+        match inline {
+            // Code expressions are transformed to code fragments prefixed with the language
+            InlineContent::CodeExpression(CodeExpression {
+                programming_language,
+                text,
+                ..
+            }) => {
+                *inline = InlineContent::CodeFragment(CodeFragment {
+                    text: [programming_language, " ", text].concat(),
+                    ..Default::default()
+                })
+            }
+            // Recursively transform other inlines
+            InlineContent::Delete(Delete { content, .. })
+            | InlineContent::Emphasis(Emphasis { content, .. })
+            | InlineContent::Subscript(Subscript { content, .. })
+            | InlineContent::Superscript(Superscript { content, .. })
+            | InlineContent::Strong(Strong { content, .. })
+            | InlineContent::NontextualAnnotation(NontextualAnnotation { content, .. }) => {
+                transform_inlines(content)
+            }
+            _ => (),
+        }
+    }
 }
diff --git a/rust/src/projects.rs b/rust/src/projects.rs
@@ -781,7 +781,7 @@ impl ProjectHandler {
             }
 
             // Should the event trigger a recompilation of the project's graph?
-            let should_compile_graph = |event_path: &Path| {
+            let should_compile_graph = |_event_path: &Path| {
                 // TODO: Filter based on whether the path is in the graph's nodes
                 true
             };

diff --git a/rust/tests/ende.rs b/rust/tests/ende.rs
@@ -101,6 +101,17 @@ proptest! {
         )
     }
 
+    #[cfg(all(feature="encode-rmd", feature="decode-rmd"))]
+    #[test]
+    fn rmd(input in article(Freedom::Min)) {
+        let content = encode::rmd::encode(&input).unwrap();
+        let output = decode::rmd::decode(&content).unwrap();
+        assert_eq!(
+            serde_json::to_value(&input).unwrap(),
+            serde_json::to_value(&output).unwrap()
+        )
+    }
+
     #[cfg(all(feature="encode-pandoc", feature="decode-pandoc"))]
     #[test]
     fn pandoc(input in article(Freedom::Min)) {

diff --git a/rust/tests/strategies/mod.rs b/rust/tests/strategies/mod.rs
@@ -46,6 +46,22 @@ prop_compose! {
     }
 }
 
+prop_compose! {
+    /// Generate inline content for inside other inline content
+    pub fn inline_inner_content(freedom: Freedom)(
+        string in (match freedom {
+            Freedom::Min => r"string",
+            Freedom::Low => r"[A-Za-z0-9]+", // Note: no whitespace or "special" characters
+            _ => any::<String>(),
+        }).prop_filter(
+            "Inline strings should not be empty",
+            |string| !string.is_empty()
+        )
+    ) -> InlineContent {
+        InlineContent::String(string)
+    }
+}
+
 prop_compose! {
     /// Generate an arbitrary audio object
     /// Use audio file extensions because Markdown decoding uses that to determine
@@ -102,14 +118,18 @@ prop_compose! {
 
 prop_compose! {
     /// Generate a code expression node with arbitrary text and programming language
+    ///
+    /// With `Freedom::Low` only allow language codes that are recognized when decoding
+    /// formats such as R Markdown.
     pub fn code_expression(freedom: Freedom)(
         programming_language in match freedom {
-            Freedom::Min => "lang",
-            Freedom::Low => r"[A-Za-z0-9-]+",
+            Freedom::Min => "py",
+            Freedom::Low => r"js|py|r",
+            Freedom::High => r"[A-Za-z0-9-]+",
             _ => any::<String>()
         },
         text in match freedom {
-            Freedom::Min => r"text",
+            Freedom::Min => "text",
             Freedom::Low => r"[A-Za-z0-9-_ ]+",
             _ => any::<String>()
         },
@@ -152,7 +172,7 @@ prop_compose! {
 prop_compose! {
     /// Generate a delete node with arbitrary content
     pub fn delete(freedom: Freedom)(
-        content in string_no_whitespace(freedom)
+        content in inline_inner_content(freedom)
     ) -> InlineContent {
         InlineContent::Delete(Delete{
             content:vec![content],
@@ -164,7 +184,7 @@ prop_compose! {
 prop_compose! {
     /// Generate a emphasis node with arbitrary content
     pub fn emphasis(freedom: Freedom)(
-        content in string_no_whitespace(freedom)
+        content in inline_inner_content(freedom)
     ) -> InlineContent {
         InlineContent::Emphasis(Emphasis{
             content:vec![content],
@@ -181,7 +201,7 @@ prop_compose! {
             Freedom::Low => r"[A-Za-z0-9-]*",
             _ => any::<String>()
         },
-        content in string(freedom)
+        content in inline_inner_content(freedom)
     ) -> InlineContent {
         InlineContent::Link(Link{
             target,
@@ -194,7 +214,7 @@ prop_compose! {
 prop_compose! {
     /// Generate a nontextual annotation node with arbitrary content
     pub fn nontextual_annotation(freedom: Freedom)(
-        content in string(freedom)
+        content in inline_inner_content(freedom)
     ) -> InlineContent {
         InlineContent::NontextualAnnotation(NontextualAnnotation{
             content:vec![content],
@@ -206,7 +226,7 @@ prop_compose! {
 prop_compose! {
     /// Generate a quote node with arbitrary content
     pub fn quote(freedom: Freedom)(
-        content in string(freedom)
+        content in inline_inner_content(freedom)
     ) -> InlineContent {
         InlineContent::Quote(Quote{
             content:vec![content],
@@ -218,7 +238,7 @@ prop_compose! {
 prop_compose! {
     /// Generate a strong node with arbitrary content
     pub fn strong(freedom: Freedom)(
-        content in string_no_whitespace(freedom)
+        content in inline_inner_content(freedom)
     ) -> InlineContent {
         InlineContent::Strong(Strong{
             content:vec![content],
@@ -230,7 +250,7 @@ prop_compose! {
 prop_compose! {
     /// Generate a subscript node with arbitrary content
     pub fn subscript(freedom: Freedom)(
-        content in string_no_whitespace(freedom)
+        content in inline_inner_content(freedom)
     ) -> InlineContent {
         InlineContent::Subscript(Subscript{
             content:vec![content],
@@ -242,7 +262,7 @@ prop_compose! {
 prop_compose! {
     /// Generate a superscript node with arbitrary content
     pub fn superscript(freedom: Freedom)(
-        content in string_no_whitespace(freedom)
+        content in inline_inner_content(freedom)
     ) -> InlineContent {
         InlineContent::Superscript(Superscript{
             content:vec![content],
@@ -493,14 +513,18 @@ prop_compose! {
 
 prop_compose! {
     /// Generate a code chunk
+    ///
+    /// With `Freedom::Low` only allow language codes that are recognized when decoding
+    /// formats such as R Markdown.
     pub fn code_chunk(freedom: Freedom)(
         programming_language in match freedom {
-            Freedom::Min => "lang",
-            Freedom::Low => r"[A-Za-z0-9-]+",
+            Freedom::Min => "py",
+            Freedom::Low => r"js|py|r",
+            Freedom::High => r"[A-Za-z0-9-]+",
             _ => any::<String>()
         },
         text in match freedom {
-            Freedom::Min => r"text",
+            Freedom::Min => "text",
             Freedom::Low => r"[A-Za-z0-9-_ ]+",
             _ => any::<String>()
         }