Skip to content

Commit

Permalink
feat(Pandoc): End-to-end encoding and decoding for code
Browse files Browse the repository at this point in the history
  • Loading branch information
nokome committed Aug 31, 2021
1 parent f9fb2ec commit 7d0d12d
Show file tree
Hide file tree
Showing 2 changed files with 111 additions and 14 deletions.
107 changes: 100 additions & 7 deletions rust/src/methods/decode/pandoc.rs
Expand Up @@ -4,7 +4,7 @@ use crate::{
methods::{coerce::coerce, encode::txt::ToTxt},
};
use eyre::{bail, Result};
use pandoc_types::definition as pandoc;
use pandoc_types::definition::{self as pandoc};
use std::{collections::HashMap, io::Write, path::PathBuf, process::Stdio};
use stencila_schema::{
Article, AudioObjectSimple, BlockContent, Cite, CiteCitationMode, CiteGroup, CodeBlock,
Expand All @@ -14,6 +14,8 @@ use stencila_schema::{
TableCellContent, TableRow, TableRowRowType, TableSimple, ThematicBreak, VideoObjectSimple,
};

use super::{json, rpng};

/// The semver requirement for Pandoc
/// Used on the `decode::pandoc` module as well.
pub const PANDOC_SEMVER: &str = "2.11";
Expand Down Expand Up @@ -177,10 +179,18 @@ fn translate_block(element: &pandoc::Block, context: &Context) -> Vec<BlockConte
})]
}

pandoc::Block::Para(inlines) => vec![BlockContent::Paragraph(Paragraph {
content: translate_inlines(inlines, context),
..Default::default()
})],
pandoc::Block::Para(inlines) => {
let content = translate_inlines(inlines, context);
if content.len() == 1 {
if let Some(translated) = try_code_chunk(&content[0]) {
return vec![translated];
}
}
vec![BlockContent::Paragraph(Paragraph {
content,
..Default::default()
})]
}

pandoc::Block::BlockQuote(blocks) => {
vec![BlockContent::QuoteBlock(QuoteBlock {
Expand Down Expand Up @@ -413,7 +423,7 @@ fn translate_inlines(elements: &[pandoc::Inline], context: &Context) -> Vec<Inli

/// Translate a Pandoc `Inline` element into a zero or more `InlineContent` nodes
fn translate_inline(element: &pandoc::Inline, context: &Context) -> Vec<InlineContent> {
match element {
let inlines = match element {
pandoc::Inline::Str(string) => vec![InlineContent::String(string.clone())],
pandoc::Inline::Space => vec![InlineContent::String(" ".to_string())],
pandoc::Inline::SoftBreak => vec![InlineContent::String("\u{2029}".to_string())],
Expand Down Expand Up @@ -556,7 +566,19 @@ fn translate_inline(element: &pandoc::Inline, context: &Context) -> Vec<InlineCo

// Element types not supported by Stencila
pandoc::Inline::LineBreak => vec![],
}
};

// Try to transform inline nodes as needed
inlines
.into_iter()
.map(|inline| {
if let Some(code_expression) = try_code_expression(&inline) {
code_expression
} else {
inline
}
})
.collect()
}

/// Get an attribute from a Pandoc `Attr` tuple struct
Expand Down Expand Up @@ -594,6 +616,77 @@ fn get_id(attrs: &pandoc::Attr) -> Option<Box<String>> {
get_attr(attrs, "id").and_then(|value| get_string_prop(&value))
}

/// Try to extract a `CodeExpression` from an RPNG representation
fn try_code_expression(inline: &InlineContent) -> Option<InlineContent> {
match inline {
InlineContent::Link(link) => {
if link.content.len() == 1 {
// If this is a link around a code expression and the it has a
// matching title then return that code expression
let title = match link.title.as_deref() {
Some(title) => title.clone(),
None => "".to_string(),
};
if title == "CodeExpression" {
if let InlineContent::CodeExpression(expr) = &link.content[0] {
return Some(InlineContent::CodeExpression(expr.clone()));
}
}
// Try to get a code expression from the inner content
if let Some(expr) = try_code_expression(&link.content[0]) {
return Some(expr);
}
}
// Fallback to fetching code expression from the link's URL
// TODO
}
InlineContent::ImageObject(image) => {
// Try to get the code expression from the caption
if let Some(caption) = image.caption.as_deref() {
if let Ok(Node::CodeExpression(expr)) = json::decode(caption) {
return Some(InlineContent::CodeExpression(expr));
}
}
// Fallback to getting from the image
if let Ok(Node::CodeExpression(expr)) = rpng::decode(&image.content_url) {
return Some(InlineContent::CodeExpression(expr));
}
}
_ => (),
};
None
}

/// Try to extract a `CodeChunk` from an RPNG representation
fn try_code_chunk(inline: &InlineContent) -> Option<BlockContent> {
match inline {
InlineContent::Link(link) => {
// Try to get a code chunk from the inner content
if link.content.len() == 1 {
if let Some(chunk) = try_code_chunk(&link.content[0]) {
return Some(chunk);
}
}
// Fallback to fetching code chunk from the link's URL
// TODO
}
InlineContent::ImageObject(image) => {
// Try to get the code chunk from the caption
if let Some(caption) = image.caption.as_deref() {
if let Ok(Node::CodeChunk(chunk)) = json::decode(caption) {
return Some(BlockContent::CodeChunk(chunk));
}
}
// Fallback to getting from the image
if let Ok(Node::CodeChunk(chunk)) = rpng::decode(&image.content_url) {
return Some(BlockContent::CodeChunk(chunk));
}
}
_ => (),
};
None
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
18 changes: 11 additions & 7 deletions rust/src/methods/encode/pandoc.rs
Expand Up @@ -97,15 +97,15 @@ impl Context {
}

/// Push a node to be encoded as an RPNG
fn push_rpng(&mut self, title: &str, node: Node) -> pandoc::Inline {
fn push_rpng(&mut self, type_name: &str, node: Node) -> pandoc::Inline {
let id = generate_chars(22);

let path = self
.temp_dir
.path()
.join([&id, ".png"].concat())
.to_slash_lossy();
let url = ["https://hub.stenci.la/", &id].concat();
let url = ["https://hub.stenci.la/api/nodes/", &id].concat();

let json = json::encode(
&node,
Expand All @@ -123,9 +123,9 @@ impl Context {
vec![pandoc::Inline::Image(
attrs_empty(),
vec![pandoc::Inline::Str(json)],
pandoc::Target(path, title.into()),
pandoc::Target(path, type_name.into()),
)],
pandoc::Target(url, title.into()),
pandoc::Target(url, type_name.into()),
)
}

Expand Down Expand Up @@ -238,7 +238,11 @@ unimplemented_to_pandoc!(Cite);

unimplemented_to_pandoc!(CiteGroup);

unimplemented_to_pandoc!(CodeExpression);
impl ToPandoc for CodeExpression {
fn to_pandoc_inline(&self, context: &mut Context) -> pandoc::Inline {
context.push_rpng("CodeExpression", Node::CodeExpression(self.clone()))
}
}

impl ToPandoc for CodeFragment {
fn to_pandoc_inline(&self, _context: &mut Context) -> pandoc::Inline {
Expand Down Expand Up @@ -332,7 +336,7 @@ impl ToPandoc for CodeBlock {
}

impl ToPandoc for CodeChunk {
/// Encode a `CodeChunk` to a Pandoc bloc element
/// Encode a `CodeChunk` to a Pandoc block element
///
/// Encodes the code chunk as a RPNG.
/// Places any label and figure after the code chunk normal text, rather than as screenshotted content.
Expand All @@ -344,7 +348,7 @@ impl ToPandoc for CodeChunk {
stripped.label = None;
stripped.caption = None;

let image = context.push_rpng("Code chunk", Node::CodeChunk(stripped));
let image = context.push_rpng("CodeChunk", Node::CodeChunk(stripped));
let image_para = pandoc::Block::Para(vec![image]);

let blocks = if label.is_some() || caption.is_some() {
Expand Down

0 comments on commit 7d0d12d

Please sign in to comment.