Skip to content

Commit

Permalink
fix(R markdown): Handling of specific format for code chunks and expr…
Browse files Browse the repository at this point in the history
…essions
  • Loading branch information
nokome committed Aug 31, 2021
1 parent eae08fc commit 9975b42
Show file tree
Hide file tree
Showing 6 changed files with 153 additions and 4 deletions.
3 changes: 3 additions & 0 deletions fixtures/fragments/rmd/code-chunk.rmd
@@ -0,0 +1,3 @@
```{r}
# No label, no options
```
3 changes: 3 additions & 0 deletions fixtures/fragments/rmd/code-expression.rmd
@@ -0,0 +1,3 @@
A R Markdown code expression (inline code chunk): `r 1+1`.

Plain old code fragments: `1+1`, `r2d2`, `r`.
2 changes: 1 addition & 1 deletion rust/Cargo.toml
Expand Up @@ -50,7 +50,7 @@ decode-docx = ["decode-pandoc"]
decode-html = ["kuchiki", "markup5ever"]
decode-json = []
decode-latex = ["decode-pandoc"]
decode-md = ["pulldown-cmark", "nom", "decode-html"]
decode-md = ["pulldown-cmark", "nom", "coerce", "decode-html", "encode-txt"]
decode-pandoc = ["binaries", "pandoc_types"]
decode-person = ["human_name"]
decode-rmd = ["decode-md"]
Expand Down
89 changes: 86 additions & 3 deletions rust/src/methods/decode/rmd.rs
@@ -1,9 +1,92 @@
use super::md;
use eyre::Result;
use stencila_schema::Node;
use stencila_schema::{
BlockContent, CodeBlock, CodeChunk, CodeExpression, CodeFragment, Delete, Emphasis,
InlineContent, Node, NontextualAnnotation, Paragraph, Strong, Subscript, Superscript,
};

/// Decode a R Markdown document to a `Node`
pub fn decode(input: &str) -> Result<Node> {
// TODO: Any necessary translations before parsing as Markdown
md::decode(input)
let mut node = md::decode(input)?;
if let Node::Article(article) = &mut node {
if let Some(content) = &mut article.content {
transform_blocks(content)
}
}
Ok(node)
}

fn transform_blocks(blocks: &mut Vec<BlockContent>) {
for block in blocks {
match block {
BlockContent::CodeBlock(CodeBlock {
programming_language,
text,
..
}) => {
let programming_language = programming_language
.clone()
.map(|boxed| *boxed)
.unwrap_or("".to_string());
if programming_language.starts_with("{r") && programming_language.ends_with("}") {
*block = BlockContent::CodeChunk(CodeChunk {
programming_language: "r".to_string(),
text: text.to_string(),
..Default::default()
})
}
}
BlockContent::Paragraph(Paragraph { content, .. }) => transform_inlines(content),
_ => (),
}
}
}

fn transform_inlines(inlines: &mut Vec<InlineContent>) {
for inline in inlines {
match inline {
// Code fragments prefixed with `r` get transformed to a CodeExpression
InlineContent::CodeFragment(CodeFragment { text, .. }) => {
if let Some(text) = text.strip_prefix("r ") {
*inline = InlineContent::CodeExpression(CodeExpression {
programming_language: "r".to_string(),
text: text.to_string(),
..Default::default()
})
}
}
// Recursively transform other inlines
InlineContent::Delete(Delete { content, .. })
| InlineContent::Emphasis(Emphasis { content, .. })
| InlineContent::Subscript(Subscript { content, .. })
| InlineContent::Superscript(Superscript { content, .. })
| InlineContent::Strong(Strong { content, .. })
| InlineContent::NontextualAnnotation(NontextualAnnotation { content, .. }) => {
transform_inlines(content)
}
_ => (),
}
}
}

#[cfg(test)]
mod tests {
use super::*;
use crate::utils::tests::snapshot_content;
use insta::assert_json_snapshot;

#[ignore]
#[test]
fn rmd_articles() {
snapshot_content("articles/*.Rmd", |_path, content| {
assert_json_snapshot!(decode(&content).unwrap());
});
}

#[test]
fn rmd_fragments() {
snapshot_content("fragments/rmd/*.Rmd", |_path, content| {
assert_json_snapshot!(decode(&content).unwrap());
});
}
}
@@ -0,0 +1,16 @@
---
source: rust/src/methods/decode/rmd.rs
expression: decode(&content).unwrap()
input_file: fixtures/fragments/rmd/code-chunk.rmd

---
{
"type": "Article",
"content": [
{
"type": "CodeChunk",
"programmingLanguage": "r",
"text": "# No label, no options"
}
]
}
@@ -0,0 +1,44 @@
---
source: rust/src/methods/decode/rmd.rs
expression: decode(&content).unwrap()
input_file: fixtures/fragments/rmd/code-expression.rmd

---
{
"type": "Article",
"content": [
{
"type": "Paragraph",
"content": [
"A R Markdown code expression (inline code chunk): ",
{
"type": "CodeExpression",
"programmingLanguage": "r",
"text": "1+1"
},
"."
]
},
{
"type": "Paragraph",
"content": [
"Plain old code fragments: ",
{
"type": "CodeFragment",
"text": "1+1"
},
", ",
{
"type": "CodeFragment",
"text": "r2d2"
},
", ",
{
"type": "CodeFragment",
"text": "r"
},
"."
]
}
]
}

0 comments on commit 9975b42

Please sign in to comment.