Skip to content

Commit

Permalink
Merge pull request from GHSA-5r3x-p7xx-x6q5
Browse files Browse the repository at this point in the history
store/expose String in AST, not Vec<u8>
  • Loading branch information
kivikakk committed Mar 28, 2023
2 parents 70f97f3 + 22aeda8 commit 9ff5f8d
Show file tree
Hide file tree
Showing 20 changed files with 417 additions and 395 deletions.
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
src/scanners.rs: src/scanners.re
re2rust -W -Werror -i --no-generation-date -o $@ $<
cargo fmt

bench:
cargo build --release
Expand Down
10 changes: 5 additions & 5 deletions examples/headers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,23 @@ fn get_document_title(document: &str) -> String {
continue;
}

let mut text = Vec::new();
let mut text = String::new();
collect_text(node, &mut text);

// The input was already known good UTF-8 (document: &str) so comrak
// guarantees the output will be too.
return String::from_utf8(text).unwrap();
return text;
}

"Untitled Document".to_string()
}

fn collect_text<'a>(node: &'a AstNode<'a>, output: &mut Vec<u8>) {
fn collect_text<'a>(node: &'a AstNode<'a>, output: &mut String) {
match node.data.borrow().value {
NodeValue::Text(ref literal) | NodeValue::Code(NodeCode { ref literal, .. }) => {
output.extend_from_slice(literal)
output.push_str(literal)
}
NodeValue::LineBreak | NodeValue::SoftBreak => output.push(b' '),
NodeValue::LineBreak | NodeValue::SoftBreak => output.push(' '),
_ => {
for n in node.children() {
collect_text(n, output);
Expand Down
15 changes: 3 additions & 12 deletions examples/s-expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,20 @@ fn iter_nodes<'a, W: Write>(
macro_rules! try_node_inline {
($node:expr, $name:ident) => {{
if let $name(t) = $node {
return write!(
writer,
concat!(stringify!($name), "({:?})"),
String::from_utf8_lossy(&t)
);
return write!(writer, concat!(stringify!($name), "({:?})"), t,);
}
}};
}

match &node.data.borrow().value {
Text(t) => write!(writer, "{:?}", String::from_utf8_lossy(&t))?,
Text(t) => write!(writer, "{:?}", t)?,
value => {
try_node_inline!(value, FootnoteDefinition);
try_node_inline!(value, FootnoteReference);
try_node_inline!(value, HtmlInline);

if let Code(code) = value {
return write!(
writer,
"Code({:?}, {})",
String::from_utf8_lossy(&code.literal),
code.num_backticks
);
return write!(writer, "Code({:?}, {})", code.literal, code.num_backticks);
}

let has_blocks = node.children().any(|c| c.data.borrow().value.block());
Expand Down
8 changes: 2 additions & 6 deletions examples/sample.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,8 @@ fn large() {

iter_nodes(root, &|node| {
if let NodeValue::Text(ref mut text) = node.data.borrow_mut().value {
let orig = std::mem::replace(text, vec![]);
*text = String::from_utf8(orig)
.unwrap()
.replace("my", "your")
.as_bytes()
.to_vec();
let orig = std::mem::take(text);
*text = orig.replace("my", "your");
}
});

Expand Down
20 changes: 12 additions & 8 deletions examples/update-readme.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
// Update the "comrak --help" text in Comrak's own README.

use std::fmt::Write;
use std::str;

use comrak::nodes::{AstNode, NodeValue};
use comrak::{format_commonmark, parse_document, Arena, ComrakOptions};

Expand All @@ -25,22 +28,23 @@ fn main() -> Result<(), Box<dyn std::error::Error + 'static>> {
iter_nodes(doc, &|node| {
if let NodeValue::CodeBlock(ref mut ncb) = node.data.borrow_mut().value {
// Look for the Cargo.toml example block.
if ncb.info == "toml".as_bytes() && ncb.literal.starts_with(&DEPENDENCIES.as_bytes()) {
let mut content = DEPENDENCIES.as_bytes().to_vec();
if ncb.info == "toml" && ncb.literal.starts_with(DEPENDENCIES) {
let mut content = DEPENDENCIES.to_string();
let mut version_parts = comrak::version().split('.').collect::<Vec<&str>>();
version_parts.pop();
content.extend("\"".bytes());
content.extend(version_parts.join(".").bytes());
content.extend("\"".bytes());
write!(content, "\"{}\"", version_parts.join(".")).unwrap();
ncb.literal = content;
}

// Look for a console code block whose contents starts with the HELP string.
// Replace its contents with the same string and the actual command output.
if ncb.info == "console".as_bytes() && ncb.literal.starts_with(&HELP.as_bytes()) {
let mut content = HELP.as_bytes().to_vec();
if ncb.info == "console" && ncb.literal.starts_with(HELP) {
let mut content = HELP.to_string();
let mut cmd = std::process::Command::new("cargo");
content.extend(cmd.args(&["run", "--", "--help"]).output().unwrap().stdout);
content.push_str(
str::from_utf8(&cmd.args(["run", "--", "--help"]).output().unwrap().stdout)
.unwrap(),
);
ncb.literal = content;
}
}
Expand Down
7 changes: 7 additions & 0 deletions proptest-regressions/tests.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# Seeds for failure cases proptest has generated in the past. It is
# automatically read and these particular cases re-run before any
# novel cases are generated.
#
# It is recommended to check this file in to source control so that
# everyone who runs the test benefits from these saved cases.
cc d71fb284045e89bb3bd8a1bbe634d42f3e7a5dd8074ff995fb482f0554f59eb1 # shrinks to ("A-",)
98 changes: 48 additions & 50 deletions src/cm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::parser::shortcodes::NodeShortCode;
use crate::parser::ComrakOptions;
use crate::scanners;
use crate::{nodes, ComrakPlugins};
use std;

use std::cmp::max;
use std::io::{self, Write};

Expand Down Expand Up @@ -58,7 +58,7 @@ struct CommonMarkFormatter<'a, 'o> {
enum Escaping {
Literal,
Normal,
URL,
Url,
Title,
}

Expand Down Expand Up @@ -209,7 +209,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
&& (c == b'.' || c == b')')
&& follows_digit
&& (nextc == 0 || isspace(nextc)))))
|| (escaping == Escaping::URL
|| (escaping == Escaping::Url
&& (c == b'`'
|| c == b'<'
|| c == b'>'
Expand All @@ -221,7 +221,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
&& (c == b'`' || c == b'<' || c == b'>' || c == b'"' || c == b'\\')));

if needs_escaping {
if escaping == Escaping::URL && isspace(c) {
if escaping == Escaping::Url && isspace(c) {
write!(self.v, "%{:2X}", c).unwrap();
self.column += 3;
} else if ispunct(c) {
Expand Down Expand Up @@ -310,7 +310,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {

match node.data.borrow().value {
NodeValue::Document => (),
NodeValue::FrontMatter(ref fm) => self.format_front_matter(fm, entering),
NodeValue::FrontMatter(ref fm) => self.format_front_matter(fm.as_bytes(), entering),
NodeValue::BlockQuote => self.format_block_quote(entering),
NodeValue::List(..) => self.format_list(node, entering),
NodeValue::Item(..) => self.format_item(node, entering),
Expand All @@ -323,16 +323,20 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
NodeValue::HtmlBlock(ref nhb) => self.format_html_block(nhb, entering),
NodeValue::ThematicBreak => self.format_thematic_break(entering),
NodeValue::Paragraph => self.format_paragraph(entering),
NodeValue::Text(ref literal) => self.format_text(literal, allow_wrap, entering),
NodeValue::Text(ref literal) => {
self.format_text(literal.as_bytes(), allow_wrap, entering)
}
NodeValue::LineBreak => self.format_line_break(entering),
NodeValue::SoftBreak => self.format_soft_break(allow_wrap, entering),
NodeValue::Code(ref code) => self.format_code(&code.literal, allow_wrap, entering),
NodeValue::HtmlInline(ref literal) => self.format_html_inline(literal, entering),
NodeValue::Code(ref code) => {
self.format_code(code.literal.as_bytes(), allow_wrap, entering)
}
NodeValue::HtmlInline(ref literal) => {
self.format_html_inline(literal.as_bytes(), entering)
}
NodeValue::Strong => self.format_strong(),
NodeValue::Emph => self.format_emph(node),
NodeValue::TaskItem { checked, symbol } => {
self.format_task_item(checked, symbol, entering)
}
NodeValue::TaskItem { symbol } => self.format_task_item(symbol, entering),
NodeValue::Strikethrough => self.format_strikethrough(),
NodeValue::Superscript => self.format_superscript(),
NodeValue::Link(ref nl) => return self.format_link(node, nl, entering),
Expand All @@ -343,12 +347,14 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
NodeValue::TableRow(..) => self.format_table_row(entering),
NodeValue::TableCell => self.format_table_cell(node, entering),
NodeValue::FootnoteDefinition(_) => self.format_footnote_definition(entering),
NodeValue::FootnoteReference(ref r) => self.format_footnote_reference(r, entering),
NodeValue::FootnoteReference(ref r) => {
self.format_footnote_reference(r.as_bytes(), entering)
}
};
true
}

fn format_front_matter(&mut self, front_matter: &Vec<u8>, entering: bool) {
fn format_front_matter(&mut self, front_matter: &[u8], entering: bool) {
if entering {
self.output(front_matter, false, Escaping::Literal);
}
Expand Down Expand Up @@ -467,30 +473,33 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
self.blankline();
}

if ncb.info.is_empty()
&& (ncb.literal.len() > 2
&& !isspace(ncb.literal[0])
&& !(isspace(ncb.literal[ncb.literal.len() - 1])
&& isspace(ncb.literal[ncb.literal.len() - 2])))
let info = ncb.info.as_bytes();
let literal = ncb.literal.as_bytes();

if info.is_empty()
&& (literal.len() > 2
&& !isspace(literal[0])
&& !(isspace(literal[literal.len() - 1])
&& isspace(literal[literal.len() - 2])))
&& !first_in_list_item
{
write!(self, " ").unwrap();
write!(self.prefix, " ").unwrap();
self.write_all(&ncb.literal).unwrap();
self.write_all(literal).unwrap();
let new_len = self.prefix.len() - 4;
self.prefix.truncate(new_len);
} else {
let fence_char = if ncb.info.contains(&b'`') { b'~' } else { b'`' };
let numticks = max(3, longest_char_sequence(&ncb.literal, fence_char) + 1);
let fence_char = if info.contains(&b'`') { b'~' } else { b'`' };
let numticks = max(3, longest_char_sequence(literal, fence_char) + 1);
for _ in 0..numticks {
write!(self, "{}", fence_char as char).unwrap();
}
if !ncb.info.is_empty() {
if !info.is_empty() {
write!(self, " ").unwrap();
self.write_all(&ncb.info).unwrap();
self.write_all(info).unwrap();
}
self.cr();
self.write_all(&ncb.literal).unwrap();
self.write_all(literal).unwrap();
self.cr();
for _ in 0..numticks {
write!(self, "{}", fence_char as char).unwrap();
Expand All @@ -503,7 +512,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
fn format_html_block(&mut self, nhb: &NodeHtmlBlock, entering: bool) {
if entering {
self.blankline();
self.write_all(&nhb.literal).unwrap();
self.write_all(nhb.literal.as_bytes()).unwrap();
self.blankline();
}
}
Expand All @@ -522,7 +531,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
}
}

fn format_text(&mut self, literal: &Vec<u8>, allow_wrap: bool, entering: bool) {
fn format_text(&mut self, literal: &[u8], allow_wrap: bool, entering: bool) {
if entering {
self.output(literal, allow_wrap, Escaping::Normal);
}
Expand Down Expand Up @@ -550,7 +559,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
}
}

fn format_code(&mut self, literal: &Vec<u8>, allow_wrap: bool, entering: bool) {
fn format_code(&mut self, literal: &[u8], allow_wrap: bool, entering: bool) {
if entering {
let numticks = shortest_unused_sequence(literal, b'`');
for _ in 0..numticks {
Expand All @@ -577,7 +586,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
}
}

fn format_html_inline(&mut self, literal: &Vec<u8>, entering: bool) {
fn format_html_inline(&mut self, literal: &[u8], entering: bool) {
if entering {
self.write_all(literal).unwrap();
}
Expand All @@ -602,9 +611,9 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
self.write_all(&[emph_delim]).unwrap();
}

fn format_task_item(&mut self, _checked: bool, symbol: u8, entering: bool) {
fn format_task_item(&mut self, symbol: Option<char>, entering: bool) {
if entering {
write!(self, "[{}] ", symbol as char).unwrap();
write!(self, "[{}] ", symbol.unwrap_or(' ')).unwrap();
}
}

Expand All @@ -619,40 +628,34 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
fn format_link(&mut self, node: &'a AstNode<'a>, nl: &NodeLink, entering: bool) -> bool {
if is_autolink(node, nl) {
if entering {
write!(self, "<").unwrap();
if nl.url.len() >= 7 && &nl.url[..7] == b"mailto:" {
self.write_all(&nl.url[7..]).unwrap();
} else {
self.write_all(&nl.url).unwrap();
}
write!(self, ">").unwrap();
write!(self, "<{}>", nl.url.trim_start_matches("mailto:")).unwrap();
return false;
}
} else if entering {
write!(self, "[").unwrap();
} else {
write!(self, "](").unwrap();
self.output(&nl.url, false, Escaping::URL);
self.output(nl.url.as_bytes(), false, Escaping::Url);
if !nl.title.is_empty() {
write!(self, " \"").unwrap();
self.output(&nl.title, false, Escaping::Title);
self.output(nl.title.as_bytes(), false, Escaping::Title);
write!(self, "\"").unwrap();
}
write!(self, ")").unwrap();
}

return true;
true
}

fn format_image(&mut self, nl: &NodeLink, allow_wrap: bool, entering: bool) {
if entering {
write!(self, "![").unwrap();
} else {
write!(self, "](").unwrap();
self.output(&nl.url, false, Escaping::URL);
self.output(nl.url.as_bytes(), false, Escaping::Url);
if !nl.title.is_empty() {
self.output(&[b' ', b'"'], allow_wrap, Escaping::Literal);
self.output(&nl.title, false, Escaping::Title);
self.output(nl.title.as_bytes(), false, Escaping::Title);
write!(self, "\"").unwrap();
}
write!(self, ")").unwrap();
Expand Down Expand Up @@ -737,7 +740,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> {
}
}

fn format_footnote_reference(&mut self, r: &Vec<u8>, entering: bool) {
fn format_footnote_reference(&mut self, r: &[u8], entering: bool) {
if entering {
self.write_all(b"[^").unwrap();
self.write_all(r).unwrap();
Expand Down Expand Up @@ -792,7 +795,7 @@ fn shortest_unused_sequence(literal: &[u8], f: u8) -> usize {
}

fn is_autolink<'a>(node: &'a AstNode<'a>, nl: &NodeLink) -> bool {
if nl.url.is_empty() || scanners::scheme(&nl.url).is_none() {
if nl.url.is_empty() || scanners::scheme(nl.url.as_bytes()).is_none() {
return false;
}

Expand All @@ -808,12 +811,7 @@ fn is_autolink<'a>(node: &'a AstNode<'a>, nl: &NodeLink) -> bool {
},
};

let mut real_url: &[u8] = &nl.url;
if real_url.len() >= 7 && &real_url[..7] == b"mailto:" {
real_url = &real_url[7..];
}

real_url == &*link_text
nl.url.trim_start_matches("mailto:") == link_text
}

fn table_escape<'a>(node: &'a AstNode<'a>, c: u8) -> bool {
Expand Down
Loading

0 comments on commit 9ff5f8d

Please sign in to comment.