Skip to content

Commit

Permalink
fix(R Markdown): Implement encoding
Browse files Browse the repository at this point in the history
  • Loading branch information
nokome committed Aug 31, 2021
1 parent 9975b42 commit b7cb681
Show file tree
Hide file tree
Showing 6 changed files with 173 additions and 44 deletions.
58 changes: 43 additions & 15 deletions fixtures/articles/simple.Rmd
Expand Up @@ -3,28 +3,56 @@ title: A simple R Markdown article
keywords: text, fixture, Markdown
---

# Introduction
This is a relatively simple test article written in R Markdown. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.

A simple Markdown article for testing. When making changes please note that test snapshots based on this fixture may need to be updated.
## Code chunks

# Methods
A basic code chunk has the language code surrounded by curly braces e.g.

This is the methods section.
```{r}
summary(cars)
```

Chunks can also have a label and options e.g.

```{r pressure, echo=FALSE}
plot(pressure)
```

### Using fig.cap option

The `fig.cap` option can be used to set the code chunk caption,

```{r fig1, fig.width=8, fig.cap='A plot'}
plot(data)
```

# Results
```{r table2, fig.cap='A table'}
head(data)
```

Bookdown-style `fig.cap` "text references" are also supported,

```{r figure3, fig.cap='(ref:fig3)'}
# R code here
```

The results include a table (Table 1).
(ref:fig3) A caption for figure 3.

| Group | Value |
| ----- | ----- |
| A | 1.1 |
| B | 2.2 |
Some intermediary content

# Discussion
```{r figure4, fig.cap='(ref:figure-4)'}
# R code here but non-existent figure reference
```

This is the discussion section.
And follow up with a duplicate figure reference

```r
# Some R code
a <- 1
```{r fig5, fig.cap='(ref:fig3)'}
# R code here
```

(ref:fig3) **Distinct and dorsoventrally organized properties of layer 2 stellate cells.**(**A**) Representative action potential after hyperpolarization waveforms from a SC (left), a pyramidal cell (middle) and an unidentified cell (right). The pyramidal and unidentified cells were...

## Code expressions

In R Markdown code expressions are represented using back ticks prefixed with the language e.g. `r 1+2`.
30 changes: 19 additions & 11 deletions rust/src/methods/decode/rmd.rs
Expand Up @@ -5,6 +5,8 @@ use stencila_schema::{
InlineContent, Node, NontextualAnnotation, Paragraph, Strong, Subscript, Superscript,
};

const LANGUAGES: &[&str] = &["r", "py", "python", "js", "javascript"];

/// Decode a R Markdown document to a `Node`
pub fn decode(input: &str) -> Result<Node> {
let mut node = md::decode(input)?;
Expand All @@ -19,23 +21,26 @@ pub fn decode(input: &str) -> Result<Node> {
fn transform_blocks(blocks: &mut Vec<BlockContent>) {
for block in blocks {
match block {
// Code blocks with curly braced language are transformed to code chunks
BlockContent::CodeBlock(CodeBlock {
programming_language,
text,
..
}) => {
let programming_language = programming_language
let lang = programming_language
.clone()
.map(|boxed| *boxed)
.unwrap_or("".to_string());
if programming_language.starts_with("{r") && programming_language.ends_with("}") {
.unwrap_or_else(|| "".to_string());
if lang.starts_with('{') && lang.ends_with('}') {
let lang = lang[1..(lang.len() - 1)].to_string();
*block = BlockContent::CodeChunk(CodeChunk {
programming_language: "r".to_string(),
programming_language: lang,
text: text.to_string(),
..Default::default()
})
}
}
// Transform the inline content of other block types
BlockContent::Paragraph(Paragraph { content, .. }) => transform_inlines(content),
_ => (),
}
Expand All @@ -45,14 +50,17 @@ fn transform_blocks(blocks: &mut Vec<BlockContent>) {
fn transform_inlines(inlines: &mut Vec<InlineContent>) {
for inline in inlines {
match inline {
// Code fragments prefixed with `r` get transformed to a CodeExpression
// Code fragments prefixed with a language code get transformed to a code expression
InlineContent::CodeFragment(CodeFragment { text, .. }) => {
if let Some(text) = text.strip_prefix("r ") {
*inline = InlineContent::CodeExpression(CodeExpression {
programming_language: "r".to_string(),
text: text.to_string(),
..Default::default()
})
for lang in LANGUAGES {
if let Some(text) = text.strip_prefix(&[lang, " "].concat()) {
*inline = InlineContent::CodeExpression(CodeExpression {
programming_language: lang.to_string(),
text: text.to_string(),
..Default::default()
});
break;
}
}
}
// Recursively transform other inlines
Expand Down
64 changes: 61 additions & 3 deletions rust/src/methods/encode/rmd.rs
@@ -1,9 +1,67 @@
use super::md;
use eyre::Result;
use stencila_schema::Node;
use stencila_schema::{
BlockContent, CodeBlock, CodeChunk, CodeExpression, CodeFragment, Delete, Emphasis,
InlineContent, Node, NontextualAnnotation, Paragraph, Strong, Subscript, Superscript,
};

/// Encode a `Node` to R Markdown
pub fn encode(node: &Node) -> Result<String> {
// TODO: Any necessary translations of Markdown to RMarkdown
md::encode(node)
let mut node = node.clone();
if let Node::Article(article) = &mut node {
if let Some(content) = &mut article.content {
transform_blocks(content)
}
}
md::encode(&node)
}

fn transform_blocks(blocks: &mut Vec<BlockContent>) {
for block in blocks {
match block {
// Code chunks are transformed to code blocks with curly braced language
BlockContent::CodeChunk(CodeChunk {
programming_language,
text,
..
}) => {
*block = BlockContent::CodeBlock(CodeBlock {
programming_language: Some(Box::new(["{", programming_language, "}"].concat())),
text: text.to_string(),
..Default::default()
})
}
// Transform the inline content of other block types
BlockContent::Paragraph(Paragraph { content, .. }) => transform_inlines(content),
_ => (),
}
}
}

fn transform_inlines(inlines: &mut Vec<InlineContent>) {
for inline in inlines {
match inline {
// Code expressions are transformed to code fragments prefixed with the language
InlineContent::CodeExpression(CodeExpression {
programming_language,
text,
..
}) => {
*inline = InlineContent::CodeFragment(CodeFragment {
text: [programming_language, " ", text].concat(),
..Default::default()
})
}
// Recursively transform other inlines
InlineContent::Delete(Delete { content, .. })
| InlineContent::Emphasis(Emphasis { content, .. })
| InlineContent::Subscript(Subscript { content, .. })
| InlineContent::Superscript(Superscript { content, .. })
| InlineContent::Strong(Strong { content, .. })
| InlineContent::NontextualAnnotation(NontextualAnnotation { content, .. }) => {
transform_inlines(content)
}
_ => (),
}
}
}
2 changes: 1 addition & 1 deletion rust/src/projects.rs
Expand Up @@ -781,7 +781,7 @@ impl ProjectHandler {
}

// Should the event trigger a recompilation of the project's graph?
let should_compile_graph = |event_path: &Path| {
let should_compile_graph = |_event_path: &Path| {
// TODO: Filter based on whether the path is in the graph's nodes
true
};
Expand Down
11 changes: 11 additions & 0 deletions rust/tests/ende.rs
Expand Up @@ -101,6 +101,17 @@ proptest! {
)
}

#[cfg(all(feature="encode-rmd", feature="decode-rmd"))]
#[test]
fn rmd(input in article(Freedom::Min)) {
let content = encode::rmd::encode(&input).unwrap();
let output = decode::rmd::decode(&content).unwrap();
assert_eq!(
serde_json::to_value(&input).unwrap(),
serde_json::to_value(&output).unwrap()
)
}

#[cfg(all(feature="encode-pandoc", feature="decode-pandoc"))]
#[test]
fn pandoc(input in article(Freedom::Min)) {
Expand Down
52 changes: 38 additions & 14 deletions rust/tests/strategies/mod.rs
Expand Up @@ -46,6 +46,22 @@ prop_compose! {
}
}

prop_compose! {
/// Generate inline content for inside other inline content
pub fn inline_inner_content(freedom: Freedom)(
string in (match freedom {
Freedom::Min => r"string",
Freedom::Low => r"[A-Za-z0-9]+", // Note: no whitespace or "special" characters
_ => any::<String>(),
}).prop_filter(
"Inline strings should not be empty",
|string| !string.is_empty()
)
) -> InlineContent {
InlineContent::String(string)
}
}

prop_compose! {
/// Generate an arbitrary audio object
/// Use audio file extensions because Markdown decoding uses that to determine
Expand Down Expand Up @@ -102,14 +118,18 @@ prop_compose! {

prop_compose! {
/// Generate a code expression node with arbitrary text and programming language
///
/// With `Freedom::Low` only allow language codes that are recognized when decoding
/// formats such as R Markdown.
pub fn code_expression(freedom: Freedom)(
programming_language in match freedom {
Freedom::Min => "lang",
Freedom::Low => r"[A-Za-z0-9-]+",
Freedom::Min => "py",
Freedom::Low => r"js|py|r",
Freedom::High => r"[A-Za-z0-9-]+",
_ => any::<String>()
},
text in match freedom {
Freedom::Min => r"text",
Freedom::Min => "text",
Freedom::Low => r"[A-Za-z0-9-_ ]+",
_ => any::<String>()
},
Expand Down Expand Up @@ -152,7 +172,7 @@ prop_compose! {
prop_compose! {
/// Generate a delete node with arbitrary content
pub fn delete(freedom: Freedom)(
content in string_no_whitespace(freedom)
content in inline_inner_content(freedom)
) -> InlineContent {
InlineContent::Delete(Delete{
content:vec![content],
Expand All @@ -164,7 +184,7 @@ prop_compose! {
prop_compose! {
/// Generate a emphasis node with arbitrary content
pub fn emphasis(freedom: Freedom)(
content in string_no_whitespace(freedom)
content in inline_inner_content(freedom)
) -> InlineContent {
InlineContent::Emphasis(Emphasis{
content:vec![content],
Expand All @@ -181,7 +201,7 @@ prop_compose! {
Freedom::Low => r"[A-Za-z0-9-]*",
_ => any::<String>()
},
content in string(freedom)
content in inline_inner_content(freedom)
) -> InlineContent {
InlineContent::Link(Link{
target,
Expand All @@ -194,7 +214,7 @@ prop_compose! {
prop_compose! {
/// Generate a nontextual annotation node with arbitrary content
pub fn nontextual_annotation(freedom: Freedom)(
content in string(freedom)
content in inline_inner_content(freedom)
) -> InlineContent {
InlineContent::NontextualAnnotation(NontextualAnnotation{
content:vec![content],
Expand All @@ -206,7 +226,7 @@ prop_compose! {
prop_compose! {
/// Generate a quote node with arbitrary content
pub fn quote(freedom: Freedom)(
content in string(freedom)
content in inline_inner_content(freedom)
) -> InlineContent {
InlineContent::Quote(Quote{
content:vec![content],
Expand All @@ -218,7 +238,7 @@ prop_compose! {
prop_compose! {
/// Generate a strong node with arbitrary content
pub fn strong(freedom: Freedom)(
content in string_no_whitespace(freedom)
content in inline_inner_content(freedom)
) -> InlineContent {
InlineContent::Strong(Strong{
content:vec![content],
Expand All @@ -230,7 +250,7 @@ prop_compose! {
prop_compose! {
/// Generate a subscript node with arbitrary content
pub fn subscript(freedom: Freedom)(
content in string_no_whitespace(freedom)
content in inline_inner_content(freedom)
) -> InlineContent {
InlineContent::Subscript(Subscript{
content:vec![content],
Expand All @@ -242,7 +262,7 @@ prop_compose! {
prop_compose! {
/// Generate a superscript node with arbitrary content
pub fn superscript(freedom: Freedom)(
content in string_no_whitespace(freedom)
content in inline_inner_content(freedom)
) -> InlineContent {
InlineContent::Superscript(Superscript{
content:vec![content],
Expand Down Expand Up @@ -493,14 +513,18 @@ prop_compose! {

prop_compose! {
/// Generate a code chunk
///
/// With `Freedom::Low` only allow language codes that are recognized when decoding
/// formats such as R Markdown.
pub fn code_chunk(freedom: Freedom)(
programming_language in match freedom {
Freedom::Min => "lang",
Freedom::Low => r"[A-Za-z0-9-]+",
Freedom::Min => "py",
Freedom::Low => r"js|py|r",
Freedom::High => r"[A-Za-z0-9-]+",
_ => any::<String>()
},
text in match freedom {
Freedom::Min => r"text",
Freedom::Min => "text",
Freedom::Low => r"[A-Za-z0-9-_ ]+",
_ => any::<String>()
}
Expand Down

0 comments on commit b7cb681

Please sign in to comment.