Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
fix(R markdown): Handling of specific format for code chunks and expr…
…essions
- Loading branch information
Showing
6 changed files
with
153 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
```{r} | ||
# No label, no options | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
A R Markdown code expression (inline code chunk): `r 1+1`. | ||
|
||
Plain old code fragments: `1+1`, `r2d2`, `r`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,92 @@ | ||
use super::md; | ||
use eyre::Result; | ||
use stencila_schema::Node; | ||
use stencila_schema::{ | ||
BlockContent, CodeBlock, CodeChunk, CodeExpression, CodeFragment, Delete, Emphasis, | ||
InlineContent, Node, NontextualAnnotation, Paragraph, Strong, Subscript, Superscript, | ||
}; | ||
|
||
/// Decode a R Markdown document to a `Node` | ||
pub fn decode(input: &str) -> Result<Node> { | ||
// TODO: Any necessary translations before parsing as Markdown | ||
md::decode(input) | ||
let mut node = md::decode(input)?; | ||
if let Node::Article(article) = &mut node { | ||
if let Some(content) = &mut article.content { | ||
transform_blocks(content) | ||
} | ||
} | ||
Ok(node) | ||
} | ||
|
||
fn transform_blocks(blocks: &mut Vec<BlockContent>) { | ||
for block in blocks { | ||
match block { | ||
BlockContent::CodeBlock(CodeBlock { | ||
programming_language, | ||
text, | ||
.. | ||
}) => { | ||
let programming_language = programming_language | ||
.clone() | ||
.map(|boxed| *boxed) | ||
.unwrap_or("".to_string()); | ||
if programming_language.starts_with("{r") && programming_language.ends_with("}") { | ||
*block = BlockContent::CodeChunk(CodeChunk { | ||
programming_language: "r".to_string(), | ||
text: text.to_string(), | ||
..Default::default() | ||
}) | ||
} | ||
} | ||
BlockContent::Paragraph(Paragraph { content, .. }) => transform_inlines(content), | ||
_ => (), | ||
} | ||
} | ||
} | ||
|
||
fn transform_inlines(inlines: &mut Vec<InlineContent>) { | ||
for inline in inlines { | ||
match inline { | ||
// Code fragments prefixed with `r` get transformed to a CodeExpression | ||
InlineContent::CodeFragment(CodeFragment { text, .. }) => { | ||
if let Some(text) = text.strip_prefix("r ") { | ||
*inline = InlineContent::CodeExpression(CodeExpression { | ||
programming_language: "r".to_string(), | ||
text: text.to_string(), | ||
..Default::default() | ||
}) | ||
} | ||
} | ||
// Recursively transform other inlines | ||
InlineContent::Delete(Delete { content, .. }) | ||
| InlineContent::Emphasis(Emphasis { content, .. }) | ||
| InlineContent::Subscript(Subscript { content, .. }) | ||
| InlineContent::Superscript(Superscript { content, .. }) | ||
| InlineContent::Strong(Strong { content, .. }) | ||
| InlineContent::NontextualAnnotation(NontextualAnnotation { content, .. }) => { | ||
transform_inlines(content) | ||
} | ||
_ => (), | ||
} | ||
} | ||
} | ||
|
||
#[cfg(test)] | ||
mod tests { | ||
use super::*; | ||
use crate::utils::tests::snapshot_content; | ||
use insta::assert_json_snapshot; | ||
|
||
#[ignore] | ||
#[test] | ||
fn rmd_articles() { | ||
snapshot_content("articles/*.Rmd", |_path, content| { | ||
assert_json_snapshot!(decode(&content).unwrap()); | ||
}); | ||
} | ||
|
||
#[test] | ||
fn rmd_fragments() { | ||
snapshot_content("fragments/rmd/*.Rmd", |_path, content| { | ||
assert_json_snapshot!(decode(&content).unwrap()); | ||
}); | ||
} | ||
} |
16 changes: 16 additions & 0 deletions
16
rust/src/methods/decode/snapshots/rmd_fragments@code-chunk.rmd.snap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
--- | ||
source: rust/src/methods/decode/rmd.rs | ||
expression: decode(&content).unwrap() | ||
input_file: fixtures/fragments/rmd/code-chunk.rmd | ||
|
||
--- | ||
{ | ||
"type": "Article", | ||
"content": [ | ||
{ | ||
"type": "CodeChunk", | ||
"programmingLanguage": "r", | ||
"text": "# No label, no options" | ||
} | ||
] | ||
} |
44 changes: 44 additions & 0 deletions
44
rust/src/methods/decode/snapshots/rmd_fragments@code-expression.rmd.snap
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
--- | ||
source: rust/src/methods/decode/rmd.rs | ||
expression: decode(&content).unwrap() | ||
input_file: fixtures/fragments/rmd/code-expression.rmd | ||
|
||
--- | ||
{ | ||
"type": "Article", | ||
"content": [ | ||
{ | ||
"type": "Paragraph", | ||
"content": [ | ||
"A R Markdown code expression (inline code chunk): ", | ||
{ | ||
"type": "CodeExpression", | ||
"programmingLanguage": "r", | ||
"text": "1+1" | ||
}, | ||
"." | ||
] | ||
}, | ||
{ | ||
"type": "Paragraph", | ||
"content": [ | ||
"Plain old code fragments: ", | ||
{ | ||
"type": "CodeFragment", | ||
"text": "1+1" | ||
}, | ||
", ", | ||
{ | ||
"type": "CodeFragment", | ||
"text": "r2d2" | ||
}, | ||
", ", | ||
{ | ||
"type": "CodeFragment", | ||
"text": "r" | ||
}, | ||
"." | ||
] | ||
} | ||
] | ||
} |