diff --git a/Makefile b/Makefile index bb7dff0d..9a4e2527 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ src/scanners.rs: src/scanners.re re2rust -W -Werror -i --no-generation-date -o $@ $< + cargo fmt bench: cargo build --release diff --git a/examples/headers.rs b/examples/headers.rs index 09c8eb84..69b8df3d 100644 --- a/examples/headers.rs +++ b/examples/headers.rs @@ -25,23 +25,23 @@ fn get_document_title(document: &str) -> String { continue; } - let mut text = Vec::new(); + let mut text = String::new(); collect_text(node, &mut text); // The input was already known good UTF-8 (document: &str) so comrak // guarantees the output will be too. - return String::from_utf8(text).unwrap(); + return text; } "Untitled Document".to_string() } -fn collect_text<'a>(node: &'a AstNode<'a>, output: &mut Vec) { +fn collect_text<'a>(node: &'a AstNode<'a>, output: &mut String) { match node.data.borrow().value { NodeValue::Text(ref literal) | NodeValue::Code(NodeCode { ref literal, .. }) => { - output.extend_from_slice(literal) + output.push_str(literal) } - NodeValue::LineBreak | NodeValue::SoftBreak => output.push(b' '), + NodeValue::LineBreak | NodeValue::SoftBreak => output.push(' '), _ => { for n in node.children() { collect_text(n, output); diff --git a/examples/s-expr.rs b/examples/s-expr.rs index 591ac1fe..fa7af0cf 100644 --- a/examples/s-expr.rs +++ b/examples/s-expr.rs @@ -30,29 +30,20 @@ fn iter_nodes<'a, W: Write>( macro_rules! try_node_inline { ($node:expr, $name:ident) => {{ if let $name(t) = $node { - return write!( - writer, - concat!(stringify!($name), "({:?})"), - String::from_utf8_lossy(&t) - ); + return write!(writer, concat!(stringify!($name), "({:?})"), t,); } }}; } match &node.data.borrow().value { - Text(t) => write!(writer, "{:?}", String::from_utf8_lossy(&t))?, + Text(t) => write!(writer, "{:?}", t)?, value => { try_node_inline!(value, FootnoteDefinition); try_node_inline!(value, FootnoteReference); try_node_inline!(value, HtmlInline); if let Code(code) = value { - return write!( - writer, - "Code({:?}, {})", - String::from_utf8_lossy(&code.literal), - code.num_backticks - ); + return write!(writer, "Code({:?}, {})", code.literal, code.num_backticks); } let has_blocks = node.children().any(|c| c.data.borrow().value.block()); diff --git a/examples/sample.rs b/examples/sample.rs index 74bf8925..552a5781 100644 --- a/examples/sample.rs +++ b/examples/sample.rs @@ -34,12 +34,8 @@ fn large() { iter_nodes(root, &|node| { if let NodeValue::Text(ref mut text) = node.data.borrow_mut().value { - let orig = std::mem::replace(text, vec![]); - *text = String::from_utf8(orig) - .unwrap() - .replace("my", "your") - .as_bytes() - .to_vec(); + let orig = std::mem::take(text); + *text = orig.replace("my", "your"); } }); diff --git a/examples/update-readme.rs b/examples/update-readme.rs index 944f1c63..567bc720 100644 --- a/examples/update-readme.rs +++ b/examples/update-readme.rs @@ -1,5 +1,8 @@ // Update the "comrak --help" text in Comrak's own README. +use std::fmt::Write; +use std::str; + use comrak::nodes::{AstNode, NodeValue}; use comrak::{format_commonmark, parse_document, Arena, ComrakOptions}; @@ -25,22 +28,23 @@ fn main() -> Result<(), Box> { iter_nodes(doc, &|node| { if let NodeValue::CodeBlock(ref mut ncb) = node.data.borrow_mut().value { // Look for the Cargo.toml example block. - if ncb.info == "toml".as_bytes() && ncb.literal.starts_with(&DEPENDENCIES.as_bytes()) { - let mut content = DEPENDENCIES.as_bytes().to_vec(); + if ncb.info == "toml" && ncb.literal.starts_with(DEPENDENCIES) { + let mut content = DEPENDENCIES.to_string(); let mut version_parts = comrak::version().split('.').collect::>(); version_parts.pop(); - content.extend("\"".bytes()); - content.extend(version_parts.join(".").bytes()); - content.extend("\"".bytes()); + write!(content, "\"{}\"", version_parts.join(".")).unwrap(); ncb.literal = content; } // Look for a console code block whose contents starts with the HELP string. // Replace its contents with the same string and the actual command output. - if ncb.info == "console".as_bytes() && ncb.literal.starts_with(&HELP.as_bytes()) { - let mut content = HELP.as_bytes().to_vec(); + if ncb.info == "console" && ncb.literal.starts_with(HELP) { + let mut content = HELP.to_string(); let mut cmd = std::process::Command::new("cargo"); - content.extend(cmd.args(&["run", "--", "--help"]).output().unwrap().stdout); + content.push_str( + str::from_utf8(&cmd.args(["run", "--", "--help"]).output().unwrap().stdout) + .unwrap(), + ); ncb.literal = content; } } diff --git a/proptest-regressions/tests.txt b/proptest-regressions/tests.txt new file mode 100644 index 00000000..83a44c54 --- /dev/null +++ b/proptest-regressions/tests.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc d71fb284045e89bb3bd8a1bbe634d42f3e7a5dd8074ff995fb482f0554f59eb1 # shrinks to ("A-",) diff --git a/src/cm.rs b/src/cm.rs index c096e462..fe104e90 100644 --- a/src/cm.rs +++ b/src/cm.rs @@ -9,7 +9,7 @@ use crate::parser::shortcodes::NodeShortCode; use crate::parser::ComrakOptions; use crate::scanners; use crate::{nodes, ComrakPlugins}; -use std; + use std::cmp::max; use std::io::{self, Write}; @@ -58,7 +58,7 @@ struct CommonMarkFormatter<'a, 'o> { enum Escaping { Literal, Normal, - URL, + Url, Title, } @@ -209,7 +209,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { && (c == b'.' || c == b')') && follows_digit && (nextc == 0 || isspace(nextc))))) - || (escaping == Escaping::URL + || (escaping == Escaping::Url && (c == b'`' || c == b'<' || c == b'>' @@ -221,7 +221,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { && (c == b'`' || c == b'<' || c == b'>' || c == b'"' || c == b'\\'))); if needs_escaping { - if escaping == Escaping::URL && isspace(c) { + if escaping == Escaping::Url && isspace(c) { write!(self.v, "%{:2X}", c).unwrap(); self.column += 3; } else if ispunct(c) { @@ -310,7 +310,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { match node.data.borrow().value { NodeValue::Document => (), - NodeValue::FrontMatter(ref fm) => self.format_front_matter(fm, entering), + NodeValue::FrontMatter(ref fm) => self.format_front_matter(fm.as_bytes(), entering), NodeValue::BlockQuote => self.format_block_quote(entering), NodeValue::List(..) => self.format_list(node, entering), NodeValue::Item(..) => self.format_item(node, entering), @@ -323,16 +323,20 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { NodeValue::HtmlBlock(ref nhb) => self.format_html_block(nhb, entering), NodeValue::ThematicBreak => self.format_thematic_break(entering), NodeValue::Paragraph => self.format_paragraph(entering), - NodeValue::Text(ref literal) => self.format_text(literal, allow_wrap, entering), + NodeValue::Text(ref literal) => { + self.format_text(literal.as_bytes(), allow_wrap, entering) + } NodeValue::LineBreak => self.format_line_break(entering), NodeValue::SoftBreak => self.format_soft_break(allow_wrap, entering), - NodeValue::Code(ref code) => self.format_code(&code.literal, allow_wrap, entering), - NodeValue::HtmlInline(ref literal) => self.format_html_inline(literal, entering), + NodeValue::Code(ref code) => { + self.format_code(code.literal.as_bytes(), allow_wrap, entering) + } + NodeValue::HtmlInline(ref literal) => { + self.format_html_inline(literal.as_bytes(), entering) + } NodeValue::Strong => self.format_strong(), NodeValue::Emph => self.format_emph(node), - NodeValue::TaskItem { checked, symbol } => { - self.format_task_item(checked, symbol, entering) - } + NodeValue::TaskItem { symbol } => self.format_task_item(symbol, entering), NodeValue::Strikethrough => self.format_strikethrough(), NodeValue::Superscript => self.format_superscript(), NodeValue::Link(ref nl) => return self.format_link(node, nl, entering), @@ -343,12 +347,14 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { NodeValue::TableRow(..) => self.format_table_row(entering), NodeValue::TableCell => self.format_table_cell(node, entering), NodeValue::FootnoteDefinition(_) => self.format_footnote_definition(entering), - NodeValue::FootnoteReference(ref r) => self.format_footnote_reference(r, entering), + NodeValue::FootnoteReference(ref r) => { + self.format_footnote_reference(r.as_bytes(), entering) + } }; true } - fn format_front_matter(&mut self, front_matter: &Vec, entering: bool) { + fn format_front_matter(&mut self, front_matter: &[u8], entering: bool) { if entering { self.output(front_matter, false, Escaping::Literal); } @@ -467,30 +473,33 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { self.blankline(); } - if ncb.info.is_empty() - && (ncb.literal.len() > 2 - && !isspace(ncb.literal[0]) - && !(isspace(ncb.literal[ncb.literal.len() - 1]) - && isspace(ncb.literal[ncb.literal.len() - 2]))) + let info = ncb.info.as_bytes(); + let literal = ncb.literal.as_bytes(); + + if info.is_empty() + && (literal.len() > 2 + && !isspace(literal[0]) + && !(isspace(literal[literal.len() - 1]) + && isspace(literal[literal.len() - 2]))) && !first_in_list_item { write!(self, " ").unwrap(); write!(self.prefix, " ").unwrap(); - self.write_all(&ncb.literal).unwrap(); + self.write_all(literal).unwrap(); let new_len = self.prefix.len() - 4; self.prefix.truncate(new_len); } else { - let fence_char = if ncb.info.contains(&b'`') { b'~' } else { b'`' }; - let numticks = max(3, longest_char_sequence(&ncb.literal, fence_char) + 1); + let fence_char = if info.contains(&b'`') { b'~' } else { b'`' }; + let numticks = max(3, longest_char_sequence(literal, fence_char) + 1); for _ in 0..numticks { write!(self, "{}", fence_char as char).unwrap(); } - if !ncb.info.is_empty() { + if !info.is_empty() { write!(self, " ").unwrap(); - self.write_all(&ncb.info).unwrap(); + self.write_all(info).unwrap(); } self.cr(); - self.write_all(&ncb.literal).unwrap(); + self.write_all(literal).unwrap(); self.cr(); for _ in 0..numticks { write!(self, "{}", fence_char as char).unwrap(); @@ -503,7 +512,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { fn format_html_block(&mut self, nhb: &NodeHtmlBlock, entering: bool) { if entering { self.blankline(); - self.write_all(&nhb.literal).unwrap(); + self.write_all(nhb.literal.as_bytes()).unwrap(); self.blankline(); } } @@ -522,7 +531,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { } } - fn format_text(&mut self, literal: &Vec, allow_wrap: bool, entering: bool) { + fn format_text(&mut self, literal: &[u8], allow_wrap: bool, entering: bool) { if entering { self.output(literal, allow_wrap, Escaping::Normal); } @@ -550,7 +559,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { } } - fn format_code(&mut self, literal: &Vec, allow_wrap: bool, entering: bool) { + fn format_code(&mut self, literal: &[u8], allow_wrap: bool, entering: bool) { if entering { let numticks = shortest_unused_sequence(literal, b'`'); for _ in 0..numticks { @@ -577,7 +586,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { } } - fn format_html_inline(&mut self, literal: &Vec, entering: bool) { + fn format_html_inline(&mut self, literal: &[u8], entering: bool) { if entering { self.write_all(literal).unwrap(); } @@ -602,9 +611,9 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { self.write_all(&[emph_delim]).unwrap(); } - fn format_task_item(&mut self, _checked: bool, symbol: u8, entering: bool) { + fn format_task_item(&mut self, symbol: Option, entering: bool) { if entering { - write!(self, "[{}] ", symbol as char).unwrap(); + write!(self, "[{}] ", symbol.unwrap_or(' ')).unwrap(); } } @@ -619,29 +628,23 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { fn format_link(&mut self, node: &'a AstNode<'a>, nl: &NodeLink, entering: bool) -> bool { if is_autolink(node, nl) { if entering { - write!(self, "<").unwrap(); - if nl.url.len() >= 7 && &nl.url[..7] == b"mailto:" { - self.write_all(&nl.url[7..]).unwrap(); - } else { - self.write_all(&nl.url).unwrap(); - } - write!(self, ">").unwrap(); + write!(self, "<{}>", nl.url.trim_start_matches("mailto:")).unwrap(); return false; } } else if entering { write!(self, "[").unwrap(); } else { write!(self, "](").unwrap(); - self.output(&nl.url, false, Escaping::URL); + self.output(nl.url.as_bytes(), false, Escaping::Url); if !nl.title.is_empty() { write!(self, " \"").unwrap(); - self.output(&nl.title, false, Escaping::Title); + self.output(nl.title.as_bytes(), false, Escaping::Title); write!(self, "\"").unwrap(); } write!(self, ")").unwrap(); } - return true; + true } fn format_image(&mut self, nl: &NodeLink, allow_wrap: bool, entering: bool) { @@ -649,10 +652,10 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { write!(self, "![").unwrap(); } else { write!(self, "](").unwrap(); - self.output(&nl.url, false, Escaping::URL); + self.output(nl.url.as_bytes(), false, Escaping::Url); if !nl.title.is_empty() { self.output(&[b' ', b'"'], allow_wrap, Escaping::Literal); - self.output(&nl.title, false, Escaping::Title); + self.output(nl.title.as_bytes(), false, Escaping::Title); write!(self, "\"").unwrap(); } write!(self, ")").unwrap(); @@ -737,7 +740,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { } } - fn format_footnote_reference(&mut self, r: &Vec, entering: bool) { + fn format_footnote_reference(&mut self, r: &[u8], entering: bool) { if entering { self.write_all(b"[^").unwrap(); self.write_all(r).unwrap(); @@ -792,7 +795,7 @@ fn shortest_unused_sequence(literal: &[u8], f: u8) -> usize { } fn is_autolink<'a>(node: &'a AstNode<'a>, nl: &NodeLink) -> bool { - if nl.url.is_empty() || scanners::scheme(&nl.url).is_none() { + if nl.url.is_empty() || scanners::scheme(nl.url.as_bytes()).is_none() { return false; } @@ -808,12 +811,7 @@ fn is_autolink<'a>(node: &'a AstNode<'a>, nl: &NodeLink) -> bool { }, }; - let mut real_url: &[u8] = &nl.url; - if real_url.len() >= 7 && &real_url[..7] == b"mailto:" { - real_url = &real_url[7..]; - } - - real_url == &*link_text + nl.url.trim_start_matches("mailto:") == link_text } fn table_escape<'a>(node: &'a AstNode<'a>, c: u8) -> bool { diff --git a/src/entity.rs b/src/entity.rs index 367fe72c..7b7a5e02 100644 --- a/src/entity.rs +++ b/src/entity.rs @@ -36,11 +36,8 @@ pub fn unescape(text: &[u8]) -> Option<(Vec, usize)> { 0 }; - if num_digits >= 1 && num_digits <= 8 && i < text.len() && text[i] == b';' { - if codepoint == 0 - || (codepoint >= 0xD800 && codepoint <= 0xE000) - || codepoint >= 0x110000 - { + if (1..=8).contains(&num_digits) && i < text.len() && text[i] == b';' { + if codepoint == 0 || (0xD800..=0xE000).contains(&codepoint) || codepoint >= 0x110000 { codepoint = 0xFFFD; } return Some(( diff --git a/src/html.rs b/src/html.rs index 8aadbde6..ba91b15f 100644 --- a/src/html.rs +++ b/src/html.rs @@ -18,7 +18,7 @@ pub fn format_document<'a>( options: &ComrakOptions, output: &mut dyn Write, ) -> io::Result<()> { - format_document_with_plugins(root, &options, output, &ComrakPlugins::default()) + format_document_with_plugins(root, options, output, &ComrakPlugins::default()) } /// Formats an AST as HTML, modified by the given options. Accepts custom plugins. @@ -174,7 +174,7 @@ const NEEDS_ESCAPED : [bool; 256] = [ ]; fn tagfilter(literal: &[u8]) -> bool { - static TAGFILTER_BLACKLIST: [&'static str; 9] = [ + static TAGFILTER_BLACKLIST: [&str; 9] = [ "title", "textarea", "style", @@ -364,24 +364,23 @@ impl<'o> HtmlFormatter<'o> { while let Some((node, plain, phase)) = stack.pop() { match phase { Phase::Pre => { - let new_plain; - if plain { + let new_plain = if plain { match node.data.borrow().value { NodeValue::Text(ref literal) | NodeValue::Code(NodeCode { ref literal, .. }) | NodeValue::HtmlInline(ref literal) => { - self.escape(literal)?; + self.escape(literal.as_bytes())?; } NodeValue::LineBreak | NodeValue::SoftBreak => { self.output.write_all(b" ")?; } _ => (), } - new_plain = plain; + plain } else { stack.push((node, false, Phase::Post)); - new_plain = self.format_node(node, true)?; - } + self.format_node(node, true)? + }; for ch in node.reverse_children() { stack.push((ch, new_plain, Phase::Pre)); @@ -397,15 +396,15 @@ impl<'o> HtmlFormatter<'o> { Ok(()) } - fn collect_text<'a>(&self, node: &'a AstNode<'a>, output: &mut Vec) { + fn collect_text<'a>(node: &'a AstNode<'a>, output: &mut Vec) { match node.data.borrow().value { NodeValue::Text(ref literal) | NodeValue::Code(NodeCode { ref literal, .. }) => { - output.extend_from_slice(literal) + output.extend_from_slice(literal.as_bytes()) } NodeValue::LineBreak | NodeValue::SoftBreak => output.push(b' '), _ => { for n in node.children() { - self.collect_text(n, output); + Self::collect_text(n, output); } } } @@ -479,7 +478,7 @@ impl<'o> HtmlFormatter<'o> { if let Some(ref prefix) = self.options.extension.header_ids { let mut text_content = Vec::with_capacity(20); - self.collect_text(node, &mut text_content); + Self::collect_text(node, &mut text_content); let mut id = String::from_utf8(text_content).unwrap(); id = self.anchorizer.anchorize(id); @@ -497,7 +496,7 @@ impl<'o> HtmlFormatter<'o> { } Some(adapter) => { let mut text_content = Vec::with_capacity(20); - self.collect_text(node, &mut text_content); + Self::collect_text(node, &mut text_content); let content = String::from_utf8(text_content).unwrap(); let heading = HeadingMeta { level: nch.level, @@ -521,13 +520,16 @@ impl<'o> HtmlFormatter<'o> { let mut code_attributes: HashMap = HashMap::new(); let code_attr: String; - if !ncb.info.is_empty() { - while first_tag < ncb.info.len() && !isspace(ncb.info[first_tag]) { + let literal = &ncb.literal.as_bytes(); + let info = &ncb.info.as_bytes(); + + if !info.is_empty() { + while first_tag < info.len() && !isspace(info[first_tag]) { first_tag += 1; } - let lang_str = str::from_utf8(&ncb.info[..first_tag]).unwrap(); - let info_str = str::from_utf8(&ncb.info[first_tag..]).unwrap().trim(); + let lang_str = str::from_utf8(&info[..first_tag]).unwrap(); + let info_str = str::from_utf8(&info[first_tag..]).unwrap().trim(); if self.options.render.github_pre_lang { pre_attributes.insert(String::from("lang"), lang_str.to_string()); @@ -555,7 +557,7 @@ impl<'o> HtmlFormatter<'o> { build_opening_tag("code", &code_attributes).as_bytes(), )?; - self.escape(&ncb.literal)?; + self.escape(literal)?; self.output.write_all(b"\n")? } @@ -569,11 +571,11 @@ impl<'o> HtmlFormatter<'o> { self.output.write_all( highlighter .highlight( - match str::from_utf8(&ncb.info[..first_tag]) { + match str::from_utf8(&info[..first_tag]) { Ok(lang) => Some(lang), Err(_) => None, }, - str::from_utf8(ncb.literal.as_slice()).unwrap(), + &ncb.literal, ) .as_bytes(), )?; @@ -586,14 +588,15 @@ impl<'o> HtmlFormatter<'o> { NodeValue::HtmlBlock(ref nhb) => { if entering { self.cr()?; + let literal = nhb.literal.as_bytes(); if self.options.render.escape { - self.escape(&nhb.literal)?; + self.escape(literal)?; } else if !self.options.render.unsafe_ { self.output.write_all(b"")?; } else if self.options.extension.tagfilter { - tagfilter_block(&nhb.literal, &mut self.output)?; + tagfilter_block(literal, &mut self.output)?; } else { - self.output.write_all(&nhb.literal)?; + self.output.write_all(literal)?; } self.cr()?; } @@ -639,7 +642,7 @@ impl<'o> HtmlFormatter<'o> { } NodeValue::Text(ref literal) => { if entering { - self.escape(literal)?; + self.escape(literal.as_bytes())?; } } NodeValue::LineBreak => { @@ -659,14 +662,15 @@ impl<'o> HtmlFormatter<'o> { NodeValue::Code(NodeCode { ref literal, .. }) => { if entering { self.output.write_all(b"")?; - self.escape(literal)?; + self.escape(literal.as_bytes())?; self.output.write_all(b"")?; } } NodeValue::HtmlInline(ref literal) => { if entering { + let literal = literal.as_bytes(); if self.options.render.escape { - self.escape(&literal)?; + self.escape(literal)?; } else if !self.options.render.unsafe_ { self.output.write_all(b"")?; } else if self.options.extension.tagfilter && tagfilter(literal) { @@ -708,12 +712,13 @@ impl<'o> HtmlFormatter<'o> { NodeValue::Link(ref nl) => { if entering { self.output.write_all(b"")?; } else { @@ -723,15 +728,16 @@ impl<'o> HtmlFormatter<'o> { NodeValue::Image(ref nl) => { if entering { self.output.write_all(b"\"")?;")?; } @@ -848,7 +854,6 @@ impl<'o> HtmlFormatter<'o> { } NodeValue::FootnoteReference(ref r) => { if entering { - let r = str::from_utf8(r).unwrap(); write!( self.output, "{}", @@ -856,16 +861,17 @@ impl<'o> HtmlFormatter<'o> { )?; } } - NodeValue::TaskItem { checked, .. } => { + NodeValue::TaskItem { symbol } => { if entering { - if checked { - self.output.write_all( - b" ", - )?; - } else { - self.output - .write_all(b" ")?; - } + write!( + self.output, + " ", + if symbol.is_some() { + "checked=\"\" " + } else { + "" + } + )?; } } } diff --git a/src/lib.rs b/src/lib.rs index c5308813..bae509ff 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -39,8 +39,8 @@ //! iter_nodes(root, &|node| { //! match &mut node.data.borrow_mut().value { //! &mut NodeValue::Text(ref mut text) => { -//! let orig = std::mem::replace(text, vec![]); -//! *text = String::from_utf8(orig).unwrap().replace("my", "your").as_bytes().to_vec(); +//! let orig = std::mem::replace(text, String::new()); +//! *text = orig.replace("my", "your"); //! } //! _ => (), //! } diff --git a/src/nodes.rs b/src/nodes.rs index b3711f24..940c018b 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -12,8 +12,8 @@ pub enum NodeValue { /// The root of every CommonMark document. Contains **blocks**. Document, - /// Non-Markdown front matter. Treated as an opaque blob. - FrontMatter(Vec), + /// Non-Markdown front matter. Treated as an opaque blob. + FrontMatter(String), /// **Block**. A [block quote](https://github.github.com/gfm/#block-quotes). Contains other /// **blocks**. @@ -85,9 +85,9 @@ pub enum NodeValue { /// children. ThematicBreak, - /// **Block**. A footnote definition. The `Vec` is the footnote's name. + /// **Block**. A footnote definition. The `String` is the footnote's name. /// Contains other **blocks**. - FootnoteDefinition(Vec), + FootnoteDefinition(String), /// **Block**. A [table](https://github.github.com/gfm/#tables-extension-) per the GFM spec. /// Contains table rows. @@ -102,14 +102,13 @@ pub enum NodeValue { /// **Inline**. [Textual content](https://github.github.com/gfm/#textual-content). All text /// in a document will be contained in a `Text` node. - Text(Vec), + Text(String), /// **Inline**. [Task list item](https://github.github.com/gfm/#task-list-items-extension-). TaskItem { - /// The `bool` `checked` indicates whether it is checked or not. - checked: bool, - /// The `symbol` that was used in the brackets to mark a task as `checked`. - symbol: u8, + /// The symbol that was used in the brackets to mark a task item + /// as checked, or None if the item is unchecked. + symbol: Option, }, /// **Inline**. A [soft line break](https://github.github.com/gfm/#soft-line-breaks). If @@ -124,7 +123,7 @@ pub enum NodeValue { Code(NodeCode), /// **Inline**. [Raw HTML](https://github.github.com/gfm/#raw-html) contained inline. - HtmlInline(Vec), + HtmlInline(String), /// **Inline**. [Emphasised](https://github.github.com/gfm/#emphasis-and-strong-emphasis) /// text. @@ -147,8 +146,8 @@ pub enum NodeValue { /// **Inline**. An [image](https://github.github.com/gfm/#images). Image(NodeLink), - /// **Inline**. A footnote reference; the `Vec` is the referent footnote's name. - FootnoteReference(Vec), + /// **Inline**. A footnote reference; the `String` is the referent footnote's name. + FootnoteReference(String), #[cfg(feature = "shortcodes")] /// **Inline**. An Emoji character generated from a shortcode. Enable with feature "emoji" @@ -181,20 +180,20 @@ pub struct NodeCode { /// As the contents are not interpreted as Markdown at all, /// they are contained within this structure, /// rather than inserted into a child inline of any kind. - pub literal: Vec, + pub literal: String, } /// The details of a link's destination, or an image's source. #[derive(Debug, Clone)] pub struct NodeLink { /// The URL for the link destination or image source. - pub url: Vec, + pub url: String, /// The title for the link or image. /// /// Note this field is used for the `title` attribute by the HTML formatter even for images; /// `alt` text is supplied in the image inline text. - pub title: Vec, + pub title: String, } /// The metadata of a list; the kind of list, the delimiter used and so on. @@ -234,37 +233,27 @@ pub struct NodeDescriptionItem { } /// The type of list. -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq, Default)] pub enum ListType { /// A bullet list, i.e. an unordered list. + #[default] Bullet, /// An ordered list. Ordered, } -impl Default for ListType { - fn default() -> ListType { - ListType::Bullet - } -} - /// The delimiter for ordered lists, i.e. the character which appears after each number. -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq, Default)] pub enum ListDelimType { /// A period character `.`. + #[default] Period, /// A paren character `)`. Paren, } -impl Default for ListDelimType { - fn default() -> ListDelimType { - ListDelimType::Period - } -} - /// The metadata and data of a code block (fenced or indented). #[derive(Default, Debug, Clone)] pub struct NodeCodeBlock { @@ -282,12 +271,12 @@ pub struct NodeCodeBlock { /// For fenced code blocks, the [info string](https://github.github.com/gfm/#info-string) after /// the opening fence, if any. - pub info: Vec, + pub info: String, /// The literal contents of the code block. As the contents are not interpreted as Markdown at /// all, they are contained within this structure, rather than inserted into a child inline of /// any kind. - pub literal: Vec, + pub literal: String, } /// The metadata of a heading. @@ -308,7 +297,7 @@ pub struct NodeHtmlBlock { /// The literal contents of the HTML block. Per NodeCodeBlock, the content is included here /// rather than in any inline. - pub literal: Vec, + pub literal: String, } impl NodeValue { @@ -347,7 +336,7 @@ impl NodeValue { /// Return a reference to the text of a `Text` inline, if this node is one. /// /// Convenience method. - pub fn text(&self) -> Option<&Vec> { + pub fn text(&self) -> Option<&String> { match *self { NodeValue::Text(ref t) => Some(t), _ => None, @@ -357,7 +346,7 @@ impl NodeValue { /// Return a mutable reference to the text of a `Text` inline, if this node is one. /// /// Convenience method. - pub fn text_mut(&mut self) -> Option<&mut Vec> { + pub fn text_mut(&mut self) -> Option<&mut String> { match *self { NodeValue::Text(ref mut t) => Some(t), _ => None, @@ -384,7 +373,7 @@ pub struct Ast { /// The line in the input document the node starts at. pub start_line: u32, - pub(crate) content: Vec, + pub(crate) content: String, pub(crate) open: bool, pub(crate) last_line_blank: bool, pub(crate) table_visited: bool, @@ -395,7 +384,7 @@ impl Ast { pub fn new(value: NodeValue) -> Self { Ast { value, - content: vec![], + content: String::new(), start_line: 0, open: true, last_line_blank: false, diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs index 67de349d..ea03b464 100644 --- a/src/parser/autolink.rs +++ b/src/parser/autolink.rs @@ -9,8 +9,9 @@ use unicode_categories::UnicodeCategories; pub fn process_autolinks<'a>( arena: &'a Arena>, node: &'a AstNode<'a>, - contents: &mut Vec, + contents_str: &mut String, ) { + let contents = contents_str.as_bytes(); let len = contents.len(); let mut i = 0; @@ -46,11 +47,11 @@ pub fn process_autolinks<'a>( i -= reverse; node.insert_after(post); if i + skip < len { - let remain = contents[i + skip..].to_vec(); + let remain = str::from_utf8(&contents[i + skip..]).unwrap(); assert!(!remain.is_empty()); - post.insert_after(make_inline(arena, NodeValue::Text(remain))); + post.insert_after(make_inline(arena, NodeValue::Text(remain.to_string()))); } - contents.truncate(i); + contents_str.truncate(i); return; } } @@ -88,14 +89,24 @@ fn www_match<'a>( link_end = autolink_delim(&contents[i..], link_end); - let mut url = b"http://".to_vec(); - url.extend_from_slice(&contents[i..link_end + i]); + let mut url = "http://".to_string(); + url.push_str(str::from_utf8(&contents[i..link_end + i]).unwrap()); - let inl = make_inline(arena, NodeValue::Link(NodeLink { url, title: vec![] })); + let inl = make_inline( + arena, + NodeValue::Link(NodeLink { + url, + title: String::new(), + }), + ); inl.append(make_inline( arena, - NodeValue::Text(contents[i..link_end + i].to_vec()), + NodeValue::Text( + str::from_utf8(&contents[i..link_end + i]) + .unwrap() + .to_string(), + ), )); Some((inl, 0, link_end)) } @@ -197,7 +208,7 @@ fn url_match<'a>( contents: &[u8], i: usize, ) -> Option<(&'a AstNode<'a>, usize, usize)> { - const SCHEMES: [&'static [u8]; 3] = [b"http", b"https", b"ftp"]; + const SCHEMES: [&[u8]; 3] = [b"http", b"https", b"ftp"]; let size = contents.len(); @@ -226,12 +237,14 @@ fn url_match<'a>( link_end = autolink_delim(&contents[i..], link_end); - let url = contents[i - rewind..i + link_end].to_vec(); + let url = str::from_utf8(&contents[i - rewind..i + link_end]) + .unwrap() + .to_string(); let inl = make_inline( arena, NodeValue::Link(NodeLink { url: url.clone(), - title: vec![], + title: String::new(), }), ); @@ -302,14 +315,18 @@ fn email_match<'a>( return None; } - let mut url = b"mailto:".to_vec(); - url.extend_from_slice(&contents[i - rewind..link_end + i]); - - let inl = make_inline(arena, NodeValue::Link(NodeLink { url, title: vec![] })); + let mut url = "mailto:".to_string(); + let text = str::from_utf8(&contents[i - rewind..link_end + i]).unwrap(); + url.push_str(text); - inl.append(make_inline( + let inl = make_inline( arena, - NodeValue::Text(contents[i - rewind..link_end + i].to_vec()), - )); + NodeValue::Link(NodeLink { + url, + title: String::new(), + }), + ); + + inl.append(make_inline(arena, NodeValue::Text(text.to_string()))); Some((inl, rewind, rewind + link_end)) } diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index f9b23bf9..e7285274 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -51,7 +51,7 @@ struct Flags { } pub struct RefMap { - pub map: HashMap, Reference>, + pub map: HashMap, pub(crate) max_ref_size: usize, ref_size: usize, } @@ -65,7 +65,7 @@ impl RefMap { } } - fn lookup(&mut self, lab: &[u8]) -> Option { + fn lookup(&mut self, lab: &str) -> Option { match self.map.get(lab) { Some(entry) => { let size = entry.url.len() + entry.title.len(); @@ -172,7 +172,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { '.' => Some(self.handle_period()), '[' => { self.pos += 1; - let inl = make_inline(self.arena, NodeValue::Text(b"[".to_vec())); + let inl = make_inline(self.arena, NodeValue::Text("[".to_string())); self.push_bracket(false, inl); self.within_brackets = true; Some(inl) @@ -185,11 +185,11 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { self.pos += 1; if self.peek_char() == Some(&(b'[')) && self.peek_char_n(1) != Some(&(b'^')) { self.pos += 1; - let inl = make_inline(self.arena, NodeValue::Text(b"![".to_vec())); + let inl = make_inline(self.arena, NodeValue::Text("![".to_string())); self.push_bracket(true, inl); Some(inl) } else { - Some(make_inline(self.arena, NodeValue::Text(b"!".to_vec()))) + Some(make_inline(self.arena, NodeValue::Text("!".to_string()))) } } '~' if self.options.extension.strikethrough => Some(self.handle_delim(b'~')), @@ -216,7 +216,10 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { strings::ltrim(&mut contents); } - Some(make_inline(self.arena, NodeValue::Text(contents))) + Some(make_inline( + self.arena, + NodeValue::Text(String::from_utf8(contents).unwrap()), + )) } }; @@ -390,9 +393,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { } } else if c.delim_char == b'\'' || c.delim_char == b'"' { *c.inl.data.borrow_mut().value.text_mut().unwrap() = - if c.delim_char == b'\'' { "’" } else { "”" } - .to_string() - .into_bytes(); + if c.delim_char == b'\'' { "’" } else { "”" }.to_string(); closer = c.next.get(); if opener_found { @@ -408,8 +409,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { } else { "“" } - .to_string() - .into_bytes(); + .to_string(); self.remove_delimiter(opener.unwrap()); self.remove_delimiter(old_c); } @@ -559,14 +559,14 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { match endpos { None => { self.pos = startpos; - make_inline(self.arena, NodeValue::Text(vec![b'`'; openticks])) + make_inline(self.arena, NodeValue::Text("`".repeat(openticks))) } Some(endpos) => { let buf = &self.input[startpos..endpos - openticks]; let buf = strings::normalize_code(buf); let code = NodeCode { num_backticks: openticks, - literal: buf, + literal: String::from_utf8(buf).unwrap(), }; make_inline(self.arena, NodeValue::Code(code)) } @@ -586,15 +586,17 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { let (numdelims, can_open, can_close) = self.scan_delims(c); let contents = if c == b'\'' && self.options.parse.smart { - b"\xE2\x80\x99".to_vec() + "’".to_string() } else if c == b'"' && self.options.parse.smart { if can_close { - b"\xE2\x90\x9D".to_vec() + "”".to_string() } else { - b"\xE2\x80\x9C".to_vec() + "“".to_string() } } else { - self.input[self.pos - numdelims..self.pos].to_vec() + str::from_utf8(&self.input[self.pos - numdelims..self.pos]) + .unwrap() + .to_string() }; let inl = make_inline(self.arena, NodeValue::Text(contents)); @@ -610,7 +612,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { self.pos += 1; if !self.options.parse.smart || self.peek_char().map_or(false, |&c| c != b'-') { - return make_inline(self.arena, NodeValue::Text(vec![b'-'])); + return make_inline(self.arena, NodeValue::Text("-".to_string())); } while self.options.parse.smart && self.peek_char().map_or(false, |&c| c == b'-') { @@ -629,14 +631,12 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { (2, (numhyphens - 4) / 3) }; - let mut buf = vec![]; - for _ in 0..ems { - buf.extend_from_slice(b"\xE2\x80\x94"); - } - for _ in 0..ens { - buf.extend_from_slice(b"\xE2\x80\x93"); - } + let ens = if ens > 0 { ens as usize } else { 0 }; + let ems = if ems > 0 { ems as usize } else { 0 }; + let mut buf = String::with_capacity(3 * (ems + ens)); + buf.push_str(&"—".repeat(ems)); + buf.push_str(&"–".repeat(ens)); make_inline(self.arena, NodeValue::Text(buf)) } @@ -646,12 +646,12 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { self.pos += 1; if self.peek_char().map_or(false, |&c| c == b'.') { self.pos += 1; - make_inline(self.arena, NodeValue::Text(b"\xE2\x80\xA6".to_vec())) + make_inline(self.arena, NodeValue::Text("…".to_string())) } else { - make_inline(self.arena, NodeValue::Text(b"..".to_vec())) + make_inline(self.arena, NodeValue::Text("..".to_string())) } } else { - make_inline(self.arena, NodeValue::Text(b".".to_vec())) + make_inline(self.arena, NodeValue::Text(".".to_string())) } } @@ -774,7 +774,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { opener: &'d Delimiter<'a, 'd>, closer: &'d Delimiter<'a, 'd>, ) -> Option<&'d Delimiter<'a, 'd>> { - let opener_char = opener.inl.data.borrow().value.text().unwrap()[0]; + let opener_char = opener.inl.data.borrow().value.text().unwrap().as_bytes()[0]; let mut opener_num_chars = opener.inl.data.borrow().value.text().unwrap().len(); let mut closer_num_chars = closer.inl.data.borrow().value.text().unwrap().len(); let use_delims = if closer_num_chars >= 2 && opener_num_chars >= 2 { @@ -866,11 +866,14 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { if self.peek_char().map_or(false, |&c| ispunct(c)) { self.pos += 1; // TODO - make_inline(self.arena, NodeValue::Text(vec![self.input[self.pos - 1]])) + make_inline( + self.arena, + NodeValue::Text(String::from_utf8(vec![self.input[self.pos - 1]]).unwrap()), + ) } else if !self.eof() && self.skip_line_end() { make_inline(self.arena, NodeValue::LineBreak) } else { - make_inline(self.arena, NodeValue::Text(b"\\".to_vec())) + make_inline(self.arena, NodeValue::Text("\\".to_string())) } } @@ -889,10 +892,13 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { self.pos += 1; match entity::unescape(&self.input[self.pos..]) { - None => make_inline(self.arena, NodeValue::Text(b"&".to_vec())), + None => make_inline(self.arena, NodeValue::Text("&".to_string())), Some((entity, len)) => { self.pos += len; - make_inline(self.arena, NodeValue::Text(entity)) + make_inline( + self.arena, + NodeValue::Text(String::from_utf8(entity).unwrap()), + ) } } } @@ -911,7 +917,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { } } self.pos += 1; - make_inline(self.arena, NodeValue::Text(b":".to_vec())) + make_inline(self.arena, NodeValue::Text(":".to_string())) } pub fn handle_pointy_brace(&mut self) -> &'a AstNode<'a> { @@ -921,7 +927,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { let inl = make_autolink( self.arena, &self.input[self.pos..self.pos + matchlen - 1], - AutolinkType::URI, + AutolinkType::Uri, ); self.pos += matchlen; return inl; @@ -949,12 +955,10 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { matchlen = Some(4); } else if self.input[self.pos + 3] == b'-' && self.input[self.pos + 4] == b'>' { matchlen = Some(5); + } else if let Some(m) = scanners::html_comment(&self.input[self.pos + 1..]) { + matchlen = Some(m + 1); } else { - if let Some(m) = scanners::html_comment(&self.input[self.pos + 1..]) { - matchlen = Some(m + 1); - } else { - self.flags.skip_html_comment = true; - } + self.flags.skip_html_comment = true; } } else if c == b'[' { if !self.flags.skip_html_cdata { @@ -1000,12 +1004,15 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { if let Some(matchlen) = matchlen { let contents = &self.input[self.pos - 1..self.pos + matchlen]; - let inl = make_inline(self.arena, NodeValue::HtmlInline(contents.to_vec())); + let inl = make_inline( + self.arena, + NodeValue::HtmlInline(str::from_utf8(contents).unwrap().to_string()), + ); self.pos += matchlen; return inl; } - make_inline(self.arena, NodeValue::Text(b"<".to_vec())) + make_inline(self.arena, NodeValue::Text("<".to_string())) } pub fn push_bracket(&mut self, image: bool, inl_text: &'a AstNode<'a>) { @@ -1030,14 +1037,14 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { let brackets_len = self.brackets.len(); if brackets_len == 0 { - return Some(make_inline(self.arena, NodeValue::Text(b"]".to_vec()))); + return Some(make_inline(self.arena, NodeValue::Text("]".to_string()))); } let is_image = self.brackets[brackets_len - 1].image; if !is_image && self.no_link_openers { self.brackets.pop(); - return Some(make_inline(self.arena, NodeValue::Text(b"]".to_vec()))); + return Some(make_inline(self.arena, NodeValue::Text("]".to_string()))); } let after_link_text_pos = self.pos; @@ -1071,7 +1078,11 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { self.pos = endall + 1; let url = strings::clean_url(url); let title = strings::clean_title(&self.input[starttitle..endtitle]); - self.close_bracket_match(is_image, url, title); + self.close_bracket_match( + is_image, + String::from_utf8(url).unwrap(), + String::from_utf8(title).unwrap(), + ); return None; } else { self.pos = after_link_text_pos; @@ -1081,8 +1092,8 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { // Try to see if this is a reference link let (mut lab, mut found_label) = match self.link_label() { - Some(lab) => (lab.to_vec(), true), - None => (vec![], false), + Some(lab) => (lab.to_string(), true), + None => ("".to_string(), false), }; if !found_label { @@ -1090,12 +1101,16 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { } if (!found_label || lab.is_empty()) && !self.brackets[brackets_len - 1].bracket_after { - lab = self.input[self.brackets[brackets_len - 1].position..initial_pos - 1].to_vec(); + lab = str::from_utf8( + &self.input[self.brackets[brackets_len - 1].position..initial_pos - 1], + ) + .unwrap() + .to_string(); found_label = true; } // Need to normalize both to lookup in refmap and to call callback - lab = strings::normalize_label(&lab); + let lab = strings::normalize_label(&lab); let mut reff = if found_label { self.refmap.lookup(&lab) } else { @@ -1114,7 +1129,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { return None; } - let mut text: Option> = None; + let mut text: Option = None; if self.options.extension.footnotes && match self.brackets[brackets_len - 1].inl_text.next_sibling() { Some(n) => { @@ -1125,8 +1140,11 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { } { let text = text.unwrap(); - if text.len() > 1 && text[0] == b'^' { - let inl = make_inline(self.arena, NodeValue::FootnoteReference(text[1..].to_vec())); + if text.len() > 1 && text.as_bytes()[0] == b'^' { + let inl = make_inline( + self.arena, + NodeValue::FootnoteReference(text[1..].to_string()), + ); self.brackets[brackets_len - 1].inl_text.insert_before(inl); self.brackets[brackets_len - 1] .inl_text @@ -1142,10 +1160,10 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { self.brackets.pop(); self.pos = initial_pos; - Some(make_inline(self.arena, NodeValue::Text(b"]".to_vec()))) + Some(make_inline(self.arena, NodeValue::Text("]".to_string()))) } - pub fn close_bracket_match(&mut self, is_image: bool, url: Vec, title: Vec) { + pub fn close_bracket_match(&mut self, is_image: bool, url: String, title: String) { let nl = NodeLink { url, title }; let inl = make_inline( self.arena, @@ -1172,7 +1190,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { } } - pub fn link_label(&mut self) -> Option<&[u8]> { + pub fn link_label(&mut self) -> Option<&str> { let startpos = self.pos; if self.peek_char() != Some(&(b'[')) { @@ -1204,7 +1222,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { if c == b']' { let raw_label = strings::trim_slice(&self.input[startpos + 1..self.pos]); self.pos += 1; - Some(raw_label) + Some(str::from_utf8(raw_label).unwrap()) } else { self.pos = startpos; None @@ -1289,7 +1307,7 @@ pub fn manual_scan_link_url_2(input: &[u8]) -> Option<(&[u8], usize)> { pub fn make_inline<'a>(arena: &'a Arena>, value: NodeValue) -> &'a AstNode<'a> { let ast = Ast { value, - content: vec![], + content: String::new(), start_line: 0, open: false, last_line_blank: false, @@ -1306,13 +1324,13 @@ fn make_autolink<'a>( let inl = make_inline( arena, NodeValue::Link(NodeLink { - url: strings::clean_autolink(url, kind), - title: vec![], + url: String::from_utf8(strings::clean_autolink(url, kind)).unwrap(), + title: String::new(), }), ); inl.append(make_inline( arena, - NodeValue::Text(entity::unescape_html(url)), + NodeValue::Text(String::from_utf8(entity::unescape_html(url)).unwrap()), )); inl } diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 7f291017..72bbbcb1 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -16,7 +16,7 @@ use crate::nodes::{ use crate::scanners; use crate::strings; use once_cell::sync::OnceCell; -use regex::bytes::{Regex, RegexBuilder}; +use regex::{Regex, RegexBuilder}; use std::cell::RefCell; use std::cmp::min; use std::collections::HashMap; @@ -74,10 +74,10 @@ pub fn parse_document<'a>( /// &arena, /// "# Cool input!\nWow look at this cool [link][foo]. A [broken link] renders as text.", /// &ComrakOptions::default(), -/// Some(&mut |link_ref: &[u8]| match link_ref { -/// b"foo" => Some(( -/// b"https://www.rust-lang.org/".to_vec(), -/// b"The Rust Language".to_vec(), +/// Some(&mut |link_ref: &str| match link_ref { +/// "foo" => Some(( +/// "https://www.rust-lang.org/".to_string(), +/// "The Rust Language".to_string(), /// )), /// _ => None, /// }), @@ -100,7 +100,7 @@ pub fn parse_document_with_broken_link_callback<'a, 'c>( ) -> &'a AstNode<'a> { let root: &'a AstNode<'a> = arena.alloc(Node::new(RefCell::new(Ast { value: NodeValue::Document, - content: vec![], + content: String::new(), start_line: 0, open: true, last_line_blank: false, @@ -112,7 +112,7 @@ pub fn parse_document_with_broken_link_callback<'a, 'c>( parser.finish(linebuf) } -type Callback<'c> = &'c mut dyn FnMut(&[u8]) -> Option<(Vec, Vec)>; +type Callback<'c> = &'c mut dyn FnMut(&str) -> Option<(String, String)>; pub struct Parser<'a, 'o, 'c> { arena: &'a Arena>, @@ -571,8 +571,8 @@ impl Debug for ComrakRenderPlugins<'_> { #[derive(Clone)] pub struct Reference { - pub url: Vec, - pub title: Vec, + pub url: String, + pub title: String, } struct FootnoteDefinition<'a> { @@ -619,12 +619,13 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { self.total_size += s.len(); } - if self.last_buffer_ended_with_cr && s.len() > 0 && s[0] == b'\n' { + if self.last_buffer_ended_with_cr && !s.is_empty() && s[0] == b'\n' { buffer += 1; } self.last_buffer_ended_with_cr = false; if let Some(ref delimiter) = self.options.extension.front_matter_delimiter { + // TODO: re2c let front_matter_pattern = RegexBuilder::new(&format!( "\\A(?:\u{feff})?{delim}\\r?\\n.*^{delim}\\r?\\n(?:\\r?\\n)?", delim = regex::escape(delimiter) @@ -633,10 +634,13 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { .dot_matches_new_line(true) .build() .unwrap(); - if let Some(front_matter_size) = front_matter_pattern.shortest_match(&s[buffer..]) { + + // We've only advanced `s` past a \n; we are valid UTF-8. + let buffer_to_check = unsafe { str::from_utf8_unchecked(&s[buffer..]) }; + if let Some(front_matter_size) = front_matter_pattern.shortest_match(buffer_to_check) { let node = self.add_child( self.root, - NodeValue::FrontMatter(s[buffer..buffer + front_matter_size].to_vec()), + NodeValue::FrontMatter(buffer_to_check[..front_matter_size].to_string()), ); buffer += front_matter_size; self.finalize(node).unwrap(); @@ -666,18 +670,16 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { if process { if !linebuf.is_empty() { linebuf.extend_from_slice(&s[buffer..eol]); - self.process_line(&linebuf); + self.process_line(linebuf); linebuf.truncate(0); } else { self.process_line(&s[buffer..eol]); } + } else if eol < end && s[eol] == b'\0' { + linebuf.extend_from_slice(&s[buffer..eol]); + linebuf.extend_from_slice(&"\u{fffd}".to_string().into_bytes()); } else { - if eol < end && s[eol] == b'\0' { - linebuf.extend_from_slice(&s[buffer..eol]); - linebuf.extend_from_slice(&"\u{fffd}".to_string().into_bytes()); - } else { - linebuf.extend_from_slice(&s[buffer..eol]); - } + linebuf.extend_from_slice(&s[buffer..eol]); } buffer = eol; @@ -800,7 +802,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { if self.line_number == 0 && line.len() >= 3 - && unsafe { str::from_utf8_unchecked(line) }.starts_with("\u{feff}") + && unsafe { str::from_utf8_unchecked(line) }.starts_with('\u{feff}') { self.offset += 3; } @@ -931,7 +933,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { if strings::is_space_or_tab(line[self.offset]) { self.advance_offset(line, 1, true); } - *container = self.add_child(*container, NodeValue::BlockQuote); + *container = self.add_child(container, NodeValue::BlockQuote); } else if !indented && unwrap_into( scanners::atx_heading_start(&line[self.first_nonspace..]), @@ -941,7 +943,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { let heading_startpos = self.first_nonspace; let offset = self.offset; self.advance_offset(line, heading_startpos + matched - offset, false); - *container = self.add_child(*container, NodeValue::Heading(NodeHeading::default())); + *container = self.add_child(container, NodeValue::Heading(NodeHeading::default())); let mut hashpos = line[self.first_nonspace..] .iter() @@ -971,10 +973,10 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { fence_char: line[first_nonspace], fence_length: matched, fence_offset: first_nonspace - offset, - info: Vec::with_capacity(10), - literal: Vec::new(), + info: String::with_capacity(10), + literal: String::new(), }; - *container = self.add_child(*container, NodeValue::CodeBlock(ncb)); + *container = self.add_child(container, NodeValue::CodeBlock(ncb)); self.advance_offset(line, first_nonspace + matched - offset, false); } else if !indented && (unwrap_into( @@ -988,10 +990,10 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { { let nhb = NodeHtmlBlock { block_type: matched as u8, - literal: Vec::new(), + literal: String::new(), }; - *container = self.add_child(*container, NodeValue::HtmlBlock(nhb)); + *container = self.add_child(container, NodeValue::HtmlBlock(nhb)); } else if !indented && node_matches!(container, NodeValue::Paragraph) && unwrap_into( @@ -1022,7 +1024,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { && self.thematic_break_kill_pos <= self.first_nonspace && unwrap_into(self.scan_thematic_break(line), &mut matched) { - *container = self.add_child(*container, NodeValue::ThematicBreak); + *container = self.add_child(container, NodeValue::ThematicBreak); let adv = line.len() - 1 - self.offset; self.advance_offset(line, adv, false); } else if !indented @@ -1036,7 +1038,10 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { c = c.split(|&e| e == b']').next().unwrap(); let offset = self.first_nonspace + matched - self.offset; self.advance_offset(line, offset, false); - *container = self.add_child(*container, NodeValue::FootnoteDefinition(c.to_vec())); + *container = self.add_child( + container, + NodeValue::FootnoteDefinition(str::from_utf8(c).unwrap().to_string()), + ); } else if !indented && self.options.extension.description_lists && line[self.first_nonspace] == b':' @@ -1070,7 +1075,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { } let i = self.column - save_column; - if i >= 5 || i < 1 || strings::is_line_end_char(line[self.offset]) { + if !(1..5).contains(&i) || strings::is_line_end_char(line[self.offset]) { nl.padding = matched + 1; self.offset = save_offset; self.column = save_column; @@ -1088,10 +1093,10 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { NodeValue::List(ref mnl) => !lists_match(&nl, mnl), _ => true, } { - *container = self.add_child(*container, NodeValue::List(nl)); + *container = self.add_child(container, NodeValue::List(nl)); } - *container = self.add_child(*container, NodeValue::Item(nl)); + *container = self.add_child(container, NodeValue::Item(nl)); } else if indented && !maybe_lazy && !self.blank { self.advance_offset(line, CODE_INDENT, true); let ncb = NodeCodeBlock { @@ -1099,13 +1104,13 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { fence_char: 0, fence_length: 0, fence_offset: 0, - info: vec![], - literal: Vec::new(), + info: String::new(), + literal: String::new(), }; - *container = self.add_child(*container, NodeValue::CodeBlock(ncb)); + *container = self.add_child(container, NodeValue::CodeBlock(ncb)); } else { let new_container = if !indented && self.options.extension.table { - table::try_opening_block(self, *container, line) + table::try_opening_block(self, container, line) } else { None }; @@ -1454,11 +1459,12 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { self.offset += 1; let chars_to_tab = TAB_STOP - (self.column % TAB_STOP); for _ in 0..chars_to_tab { - ast.content.push(b' '); + ast.content.push(' '); } } if self.offset < line.len() { - ast.content.extend_from_slice(&line[self.offset..]); + ast.content + .push_str(str::from_utf8(&line[self.offset..]).unwrap()); // TODO: try propagating &[u8] to &str up from here } } @@ -1492,14 +1498,14 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { } fn finalize(&mut self, node: &'a AstNode<'a>) -> Option<&'a AstNode<'a>> { - self.finalize_borrowed(node, &mut *node.data.borrow_mut()) + self.finalize_borrowed(node, &mut node.data.borrow_mut()) } - fn resolve_reference_link_definitions(&mut self, content: &mut Vec) -> bool { + fn resolve_reference_link_definitions(&mut self, content: &mut String) -> bool { let mut seeked = 0; { let mut pos = 0; - let mut seek: &[u8] = &*content; + let mut seek: &[u8] = content.as_bytes(); while !seek.is_empty() && seek[0] == b'[' && unwrap_into(self.parse_reference_inline(seek), &mut pos) @@ -1510,10 +1516,10 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { } if seeked != 0 { - *content = content[seeked..].to_vec(); + *content = content[seeked..].to_string(); } - !strings::is_blank(content) + !strings::is_blank(content.as_bytes()) } fn finalize_borrowed( @@ -1537,18 +1543,18 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { NodeValue::CodeBlock(ref mut ncb) => { if !ncb.fenced { strings::remove_trailing_blank_lines(content); - content.push(b'\n'); + content.push('\n'); } else { let mut pos = 0; while pos < content.len() { - if strings::is_line_end_char(content[pos]) { + if strings::is_line_end_char(content.as_bytes()[pos]) { break; } pos += 1; } assert!(pos < content.len()); - let mut tmp = entity::unescape_html(&content[..pos]); + let mut tmp = entity::unescape_html(&content.as_bytes()[..pos]); strings::trim(&mut tmp); strings::unescape(&mut tmp); if tmp.is_empty() { @@ -1557,19 +1563,19 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { .parse .default_info_string .as_ref() - .map_or(vec![], |s| s.as_bytes().to_vec()); + .map_or(String::new(), |s| s.clone()); } else { - ncb.info = tmp; + ncb.info = String::from_utf8(tmp).unwrap(); } - if content[pos] == b'\r' { + if content.as_bytes()[pos] == b'\r' { pos += 1; } - if content[pos] == b'\n' { + if content.as_bytes()[pos] == b'\n' { pos += 1; } - *content = content[pos..].to_vec(); + content.drain(..pos); } mem::swap(&mut ncb.literal, content); } @@ -1625,7 +1631,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { fn parse_inlines(&mut self, node: &'a AstNode<'a>) { let delimiter_arena = Arena::new(); let node_data = node.data.borrow(); - let content = strings::rtrim_slice(&node_data.content); + let content = strings::rtrim_slice(node_data.content.as_bytes()); let mut subj = inlines::Subject::new( self.arena, self.options, @@ -1650,13 +1656,13 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { Self::find_footnote_references(self.root, &mut map, &mut ix); if ix > 0 { - let mut v = map.into_iter().map(|(_, v)| v).collect::>(); + let mut v = map.into_values().collect::>(); v.sort_unstable_by(|a, b| a.ix.cmp(&b.ix)); for f in v { - if f.ix.is_some() { + if let Some(ix) = f.ix { match f.node.data.borrow_mut().value { NodeValue::FootnoteDefinition(ref mut name) => { - *name = format!("{}", f.ix.unwrap()).into_bytes(); + *name = format!("{}", ix); } _ => unreachable!(), } @@ -1668,7 +1674,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { fn find_footnote_definitions( node: &'a AstNode<'a>, - map: &mut HashMap, FootnoteDefinition<'a>>, + map: &mut HashMap>, ) { match node.data.borrow().value { NodeValue::FootnoteDefinition(ref name) => { @@ -1688,35 +1694,37 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { fn find_footnote_references( node: &'a AstNode<'a>, - map: &mut HashMap, FootnoteDefinition>, - ix: &mut u32, + map: &mut HashMap, + ixp: &mut u32, ) { let mut ast = node.data.borrow_mut(); let mut replace = None; match ast.value { NodeValue::FootnoteReference(ref mut name) => { if let Some(ref mut footnote) = map.get_mut(name) { - if footnote.ix.is_none() { - *ix += 1; - footnote.ix = Some(*ix); - } - *name = format!("{}", footnote.ix.unwrap()).into_bytes(); + let ix = match footnote.ix { + Some(ix) => ix, + None => { + *ixp += 1; + footnote.ix = Some(*ixp); + *ixp + } + }; + *name = format!("{}", ix); } else { replace = Some(name.clone()); } } _ => { for n in node.children() { - Self::find_footnote_references(n, map, ix); + Self::find_footnote_references(n, map, ixp); } } } if let Some(mut label) = replace { - label.insert(0, b'['); - label.insert(1, b'^'); - let len = label.len(); - label.insert(len, b']'); + label.insert_str(0, "[^"); + label.push(']'); ast.value = NodeValue::Text(label); } } @@ -1745,7 +1753,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { match ns.data.borrow().value { NodeValue::Text(ref adj) => { - root.extend_from_slice(adj); + root.push_str(adj); ns.detach(); } _ => { @@ -1776,7 +1784,7 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { } } - fn postprocess_text_node(&mut self, node: &'a AstNode<'a>, text: &mut Vec) { + fn postprocess_text_node(&mut self, node: &'a AstNode<'a>, text: &mut String) { if self.options.extension.tasklist { self.process_tasklist(node, text); } @@ -1786,16 +1794,20 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { } } - fn process_tasklist(&mut self, node: &'a AstNode<'a>, text: &mut Vec) { + fn process_tasklist(&mut self, node: &'a AstNode<'a>, text: &mut String) { + // TODO: re2c static TASKLIST: OnceCell = OnceCell::new(); let r = TASKLIST.get_or_init(|| Regex::new(r"\A(\s*\[(.)\])(?:\z|\s)").unwrap()); let (symbol, end) = match r.captures(text) { None => return, - Some(c) => (c.get(2).unwrap().as_bytes()[0], c.get(0).unwrap().end()), + Some(c) => (c.get(2).unwrap().as_str(), c.get(0).unwrap().end()), }; - if !self.options.parse.relaxed_tasklist_matching && !matches!(symbol, b' ' | b'x' | b'X') { + assert!(symbol.len() == 1); + let symbol = symbol.chars().next().unwrap(); + + if !self.options.parse.relaxed_tasklist_matching && !matches!(symbol, ' ' | 'x' | 'X') { return; } @@ -1812,12 +1824,12 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { return; } - *text = text[end..].to_vec(); + text.drain(..end); + let checkbox = inlines::make_inline( self.arena, NodeValue::TaskItem { - checked: symbol != b' ', - symbol: symbol, + symbol: if symbol == ' ' { None } else { Some(symbol) }, }, ); node.insert_before(checkbox); @@ -1836,17 +1848,10 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { self.callback.as_mut(), ); - let mut lab: Vec = match subj.link_label() { - Some(lab) => { - if lab.is_empty() { - return None; - } else { - lab - } - } - None => return None, - } - .to_vec(); + let mut lab: String = match subj.link_label() { + Some(lab) if !lab.is_empty() => lab.to_string(), + _ => return None, + }; if subj.peek_char() != Some(&(b':')) { return None; @@ -1894,9 +1899,9 @@ impl<'a, 'o, 'c> Parser<'a, 'o, 'c> { lab = strings::normalize_label(&lab); if !lab.is_empty() { - subj.refmap.map.entry(lab.to_vec()).or_insert(Reference { - url: strings::clean_url(&url), - title: strings::clean_title(&title), + subj.refmap.map.entry(lab).or_insert(Reference { + url: String::from_utf8(strings::clean_url(url)).unwrap(), + title: String::from_utf8(strings::clean_title(&title)).unwrap(), }); } Some(subj.pos) @@ -2054,23 +2059,19 @@ fn reopen_ast_nodes<'a>(mut ast: &'a AstNode<'a>) { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum AutolinkType { - URI, + Uri, Email, } #[derive(Debug, Clone, Copy)] /// Options for bulleted list redering in markdown. See `link_style` in [ComrakRenderOptions] for more details. +#[derive(Default)] pub enum ListStyleType { /// The `-` character + #[default] Dash = 45, /// The `+` character Plus = 43, /// The `*` character Star = 42, } - -impl Default for ListStyleType { - fn default() -> Self { - ListStyleType::Dash - } -} diff --git a/src/parser/table.rs b/src/parser/table.rs index 8f28b575..2700aafb 100644 --- a/src/parser/table.rs +++ b/src/parser/table.rs @@ -7,8 +7,8 @@ use crate::strings::trim; use std::cell::RefCell; use std::cmp::min; -pub fn try_opening_block<'a, 'o, 'c>( - parser: &mut Parser<'a, 'o, 'c>, +pub fn try_opening_block<'a>( + parser: &mut Parser<'a, '_, '_>, container: &'a AstNode<'a>, line: &[u8], ) -> Option<(&'a AstNode<'a>, bool, bool)> { @@ -24,8 +24,8 @@ pub fn try_opening_block<'a, 'o, 'c>( } } -fn try_opening_header<'a, 'o, 'c>( - parser: &mut Parser<'a, 'o, 'c>, +fn try_opening_header<'a>( + parser: &mut Parser<'a, '_, '_>, container: &'a AstNode<'a>, line: &[u8], ) -> Option<(&'a AstNode<'a>, bool, bool)> { @@ -39,7 +39,7 @@ fn try_opening_header<'a, 'o, 'c>( let marker_row = row(&line[parser.first_nonspace..]).unwrap(); - let header_row = match row(&container.data.borrow().content) { + let header_row = match row(container.data.borrow().content.as_bytes()) { Some(header_row) => header_row, None => return Some((container, false, true)), }; @@ -52,13 +52,14 @@ fn try_opening_header<'a, 'o, 'c>( try_inserting_table_header_paragraph( parser, container, - &container.data.borrow().content, + container.data.borrow().content.as_bytes(), header_row.paragraph_offset, ); } let mut alignments = vec![]; for cell in marker_row.cells { + let cell = cell.as_bytes(); let left = !cell.is_empty() && cell[0] == b':'; let right = !cell.is_empty() && cell[cell.len() - 1] == b':'; alignments.push(if left && right { @@ -89,8 +90,8 @@ fn try_opening_header<'a, 'o, 'c>( Some((table, true, false)) } -fn try_opening_row<'a, 'o, 'c>( - parser: &mut Parser<'a, 'o, 'c>, +fn try_opening_row<'a>( + parser: &mut Parser<'a, '_, '_>, container: &'a AstNode<'a>, alignments: &[TableAlignment], line: &[u8], @@ -121,7 +122,7 @@ fn try_opening_row<'a, 'o, 'c>( struct Row { paragraph_offset: usize, - cells: Vec>, + cells: Vec, } fn row(string: &[u8]) -> Option { @@ -140,7 +141,7 @@ fn row(string: &[u8]) -> Option { if cell_matched > 0 || pipe_matched > 0 { let mut cell = unescape_pipes(&string[offset..offset + cell_matched]); trim(&mut cell); - cells.push(cell); + cells.push(String::from_utf8(cell).unwrap()); } offset += cell_matched + pipe_matched; @@ -172,8 +173,8 @@ fn row(string: &[u8]) -> Option { } } -fn try_inserting_table_header_paragraph<'a, 'o, 'c>( - parser: &mut Parser<'a, 'o, 'c>, +fn try_inserting_table_header_paragraph<'a>( + parser: &mut Parser<'a, '_, '_>, container: &'a AstNode<'a>, parent_string: &[u8], paragraph_offset: usize, @@ -181,14 +182,14 @@ fn try_inserting_table_header_paragraph<'a, 'o, 'c>( let mut paragraph_content = unescape_pipes(&parent_string[..paragraph_offset]); trim(&mut paragraph_content); - if !container.parent().is_some() + if container.parent().is_none() || !nodes::can_contain_type(container.parent().unwrap(), &NodeValue::Paragraph) { return; } let mut paragraph = Ast::new(NodeValue::Paragraph); - paragraph.content = paragraph_content; + paragraph.content = String::from_utf8(paragraph_content).unwrap(); let node = parser.arena.alloc(Node::new(RefCell::new(paragraph))); container.insert_before(node); } diff --git a/src/plugins/syntect.rs b/src/plugins/syntect.rs index 4854433f..7f297054 100644 --- a/src/plugins/syntect.rs +++ b/src/plugins/syntect.rs @@ -25,7 +25,7 @@ impl<'a> SyntectAdapter<'a> { /// Construct a new `SyntectAdapter` object and set the syntax highlighting theme. pub fn new(theme: &'a str) -> Self { SyntectAdapter { - theme: &theme, + theme, syntax_set: SyntaxSet::load_defaults_newlines(), theme_set: ThemeSet::load_defaults(), } diff --git a/src/scanners.re b/src/scanners.re index 7cd21a34..79846c29 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -1,6 +1,3 @@ -// TODO: consider dropping all the #[inline(always)], we probably don't know -// better than rustc. - /*!re2c re2c:case-insensitive = 1; re2c:encoding:utf8 = 1; diff --git a/src/scanners.rs b/src/scanners.rs index cd8f510e..50b3b9c8 100644 --- a/src/scanners.rs +++ b/src/scanners.rs @@ -1,6 +1,4 @@ /* Generated by re2c 3.0 */ -// TODO: consider dropping all the #[inline(always)], we probably don't know -// better than rustc. pub fn atx_heading_start(s: &[u8]) -> Option { let mut cursor = 0; diff --git a/src/strings.rs b/src/strings.rs index f11b1eb9..41a0371d 100644 --- a/src/strings.rs +++ b/src/strings.rs @@ -81,10 +81,11 @@ pub fn normalize_code(v: &[u8]) -> Vec { r } -pub fn remove_trailing_blank_lines(line: &mut Vec) { +pub fn remove_trailing_blank_lines(line: &mut String) { + let line_bytes = line.as_bytes(); let mut i = line.len() - 1; loop { - let c = line[i]; + let c = line_bytes[i]; if c != b' ' && c != b'\t' && !is_line_end_char(c) { break; @@ -98,10 +99,8 @@ pub fn remove_trailing_blank_lines(line: &mut Vec) { i -= 1; } - for i in i..line.len() { - let c = line[i]; - - if !is_line_end_char(c) { + for (i, c) in line_bytes.iter().enumerate().take(line.len()).skip(i) { + if !is_line_end_char(*c) { continue; } @@ -240,11 +239,14 @@ pub fn is_blank(s: &[u8]) -> bool { true } -pub fn normalize_label(i: &[u8]) -> Vec { - let i = trim_slice(i); +pub fn normalize_label(i: &str) -> String { + // trim_slice only removes bytes from start and end that match isspace(); + // result is UTF-8. + let i = unsafe { str::from_utf8_unchecked(trim_slice(i.as_bytes())) }; + let mut v = String::with_capacity(i.len()); let mut last_was_whitespace = false; - for c in unsafe { str::from_utf8_unchecked(i) }.chars() { + for c in i.chars() { for e in c.to_lowercase() { if e.is_whitespace() { if !last_was_whitespace { @@ -257,7 +259,7 @@ pub fn normalize_label(i: &[u8]) -> Vec { } } } - v.into_bytes() + v } #[cfg(feature = "syntect")] diff --git a/src/tests.rs b/src/tests.rs index 03c3475a..d725e70b 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -80,7 +80,7 @@ fn commonmark(input: &str, expected: &str, opts: Option<&ComrakOptions>) { let root = parse_document(&arena, input, options); let mut output = vec![]; - cm::format_document(root, &options, &mut output).unwrap(); + cm::format_document(root, options, &mut output).unwrap(); compare_strs(&String::from_utf8(output).unwrap(), expected, "regular"); } @@ -132,14 +132,14 @@ fn html_plugins(input: &str, expected: &str, plugins: &ComrakPlugins) { let root = parse_document(&arena, input, &options); let mut output = vec![]; - html::format_document_with_plugins(root, &options, &mut output, &plugins).unwrap(); + html::format_document_with_plugins(root, &options, &mut output, plugins).unwrap(); compare_strs(&String::from_utf8(output).unwrap(), expected, "regular"); let mut md = vec![]; cm::format_document(root, &options, &mut md).unwrap(); let root = parse_document(&arena, &String::from_utf8(md).unwrap(), &options); let mut output_from_rt = vec![]; - html::format_document_with_plugins(root, &options, &mut output_from_rt, &plugins).unwrap(); + html::format_document_with_plugins(root, &options, &mut output_from_rt, plugins).unwrap(); compare_strs( &String::from_utf8(output_from_rt).unwrap(), expected, @@ -564,13 +564,13 @@ fn backticks_num() { let code1 = NodeValue::Code(NodeCode { num_backticks: 1, - literal: b"code1".to_vec(), + literal: "code1".to_string(), }); asssert_node_eq(root, &[0, 1], &code1); let code2 = NodeValue::Code(NodeCode { num_backticks: 3, - literal: b"code2".to_vec(), + literal: "code2".to_string(), }); asssert_node_eq(root, &[0, 3], &code2); } @@ -1247,7 +1247,7 @@ fn nested_tables_3() { #[test] fn no_stack_smash_html() { - let s: String = ::std::iter::repeat('>').take(150_000).collect(); + let s: String = ">".repeat(150_000); let arena = Arena::new(); let root = parse_document(&arena, &s, &ComrakOptions::default()); let mut output = vec![]; @@ -1256,7 +1256,7 @@ fn no_stack_smash_html() { #[test] fn no_stack_smash_cm() { - let s: String = ::std::iter::repeat('>').take(150_000).collect(); + let s: String = ">".repeat(150_000); let arena = Arena::new(); let root = parse_document(&arena, &s, &ComrakOptions::default()); let mut output = vec![]; @@ -1420,7 +1420,7 @@ fn exercise_full_api<'a>() { &arena, "document", &default_options, - Some(&mut |_: &[u8]| Some((b"abc".to_vec(), b"xyz".to_vec()))), + Some(&mut |_: &str| Some(("abc".to_string(), "xyz".to_string()))), ); let _ = ComrakOptions { @@ -1457,15 +1457,15 @@ fn exercise_full_api<'a>() { pub struct MockAdapter {} impl SyntaxHighlighterAdapter for MockAdapter { fn highlight(&self, lang: Option<&str>, code: &str) -> String { - String::from(format!("{}{}", lang.unwrap(), code)) + format!("{}{}", lang.unwrap(), code) } fn build_pre_tag(&self, attributes: &HashMap) -> String { - build_opening_tag("pre", &attributes) + build_opening_tag("pre", attributes) } fn build_code_tag(&self, attributes: &HashMap) -> String { - build_opening_tag("code", &attributes) + build_opening_tag("code", attributes) } } @@ -1519,11 +1519,11 @@ fn exercise_full_api<'a>() { let _: bool = ncb.fenced; let _: u8 = ncb.fence_char; let _: usize = ncb.fence_length; - let _: Vec = ncb.info; - let _: Vec = ncb.literal; + let _: String = ncb.info; + let _: String = ncb.literal; } nodes::NodeValue::HtmlBlock(nhb) => { - let _: Vec = nhb.literal; + let _: String = nhb.literal; } nodes::NodeValue::Paragraph => {} nodes::NodeValue::Heading(nh) => { @@ -1532,7 +1532,7 @@ fn exercise_full_api<'a>() { } nodes::NodeValue::ThematicBreak => {} nodes::NodeValue::FootnoteDefinition(name) => { - let _: &Vec = name; + let _: &String = name; } nodes::NodeValue::Table(aligns) => { let _: &Vec = aligns; @@ -1548,35 +1548,34 @@ fn exercise_full_api<'a>() { } nodes::NodeValue::TableCell => {} nodes::NodeValue::Text(text) => { - let _: &Vec = text; + let _: &String = text; } - nodes::NodeValue::TaskItem { checked, symbol } => { - let _: &bool = checked; - let _: &u8 = symbol; + nodes::NodeValue::TaskItem { symbol } => { + let _: &Option = symbol; } nodes::NodeValue::SoftBreak => {} nodes::NodeValue::LineBreak => {} nodes::NodeValue::Code(code) => { let _: usize = code.num_backticks; - let _: Vec = code.literal; + let _: String = code.literal; } nodes::NodeValue::HtmlInline(html) => { - let _: &Vec = html; + let _: &String = html; } nodes::NodeValue::Emph => {} nodes::NodeValue::Strong => {} nodes::NodeValue::Strikethrough => {} nodes::NodeValue::Superscript => {} nodes::NodeValue::Link(nl) | nodes::NodeValue::Image(nl) => { - let _: Vec = nl.url; - let _: Vec = nl.title; + let _: String = nl.url; + let _: String = nl.title; } #[cfg(feature = "shortcodes")] nodes::NodeValue::ShortCode(ne) => { let _: Option = ne.shortcode(); } nodes::NodeValue::FootnoteReference(name) => { - let _: &Vec = name; + let _: &String = name; } } }