Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions examples/parser-map-event-print.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
use pulldown_cmark::{Event, Parser, html};
use pulldown_cmark::{html, Event, Parser};

fn main() {
let markdown_input = "# Example Heading\nExample paragraph with **lorem** _ipsum_ text.";
println!("\nParsing the following markdown string:\n{}\n", markdown_input);
println!(
"\nParsing the following markdown string:\n{}\n",
markdown_input
);

// Set up the parser. We can treat is as any other iterator.
// Set up the parser. We can treat is as any other iterator.
// For each event, we print its details, such as the tag or string.
// This filter simply returns the same event without any changes;
// you can compare the `event-filter` example which alters the output.
let parser = Parser::new(markdown_input)
.map(|event| {
let parser = Parser::new(markdown_input).map(|event| {
match &event {
Event::Start(tag) => println!("Start: {:?}", tag),
Event::End(tag) => println!("End: {:?}", tag),
Event::Html(s) => println!("Html: {:?}", s),
Event::InlineHtml(s) => println!("InlineHtml: {:?}", s),
Event::Text(s) => println!("Text: {:?}", s),
Event::Code(s) => println!("Code: {:?}", s),
Event::FootnoteReference(s) => println!("FootnoteReference: {:?}", s),
Expand Down
1 change: 1 addition & 0 deletions examples/parser-map-tag-print.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ fn main() {
let parser = Parser::new_ext(markdown_input, Options::all()).map(|event| {
match &event {
Event::Start(tag) => match tag {
Tag::HtmlBlock => println!("HtmlBlock"),
Tag::Heading {
level,
id,
Expand Down
23 changes: 23 additions & 0 deletions src/firstpass.rs
Original file line number Diff line number Diff line change
Expand Up @@ -801,8 +801,16 @@ impl<'a, 'b> FirstPass<'a, 'b> {
html_end_tag: &str,
mut remaining_space: usize,
) -> usize {
self.tree.append(Item {
start: start_ix,
end: 0, // set later
body: ItemBody::HtmlBlock,
});
self.tree.push();

let bytes = self.text.as_bytes();
let mut ix = start_ix;
let end_ix;
loop {
let line_start_ix = ix;
ix += scan_nextline(&bytes[ix..]);
Expand All @@ -815,20 +823,24 @@ impl<'a, 'b> FirstPass<'a, 'b> {
self.options.has_gfm_footnotes(),
);
if n_containers < self.tree.spine_len() {
end_ix = ix;
break;
}

if (&self.text[line_start_ix..ix]).contains(html_end_tag) {
end_ix = ix;
break;
}

let next_line_ix = ix + line_start.bytes_scanned();
if next_line_ix == self.text.len() {
end_ix = next_line_ix;
break;
}
ix = next_line_ix;
remaining_space = line_start.remaining_space();
}
self.pop(end_ix);
ix
}

Expand All @@ -840,8 +852,16 @@ impl<'a, 'b> FirstPass<'a, 'b> {
start_ix: usize,
mut remaining_space: usize,
) -> usize {
self.tree.append(Item {
start: start_ix,
end: 0, // set later
body: ItemBody::HtmlBlock,
});
self.tree.push();

let bytes = self.text.as_bytes();
let mut ix = start_ix;
let end_ix;
loop {
let line_start_ix = ix;
ix += scan_nextline(&bytes[ix..]);
Expand All @@ -854,17 +874,20 @@ impl<'a, 'b> FirstPass<'a, 'b> {
self.options.has_gfm_footnotes(),
);
if n_containers < self.tree.spine_len() || line_start.is_at_eol() {
end_ix = ix;
break;
}

let next_line_ix = ix + line_start.bytes_scanned();
if next_line_ix == self.text.len() || scan_blank_line(&bytes[next_line_ix..]).is_some()
{
end_ix = next_line_ix;
break;
}
ix = next_line_ix;
remaining_space = line_start.remaining_space();
}
self.pop(end_ix);
ix
}

Expand Down
7 changes: 5 additions & 2 deletions src/html.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ where
escape_html(&mut self.writer, &text)?;
self.write("</code>")?;
}
Html(html) => {
Html(html) | InlineHtml(html) => {
self.write(&html)?;
}
SoftBreak => {
Expand Down Expand Up @@ -146,6 +146,7 @@ where
/// Writes the start of an HTML tag.
fn start_tag(&mut self, tag: Tag<'a>) -> io::Result<()> {
match tag {
Tag::HtmlBlock => Ok(()),
Tag::Paragraph => {
if self.end_newline {
self.write("<p>")
Expand Down Expand Up @@ -347,6 +348,7 @@ where

fn end_tag(&mut self, tag: TagEnd) -> io::Result<()> {
match tag {
TagEnd::HtmlBlock => {}
TagEnd::Paragraph => {
self.write("</p>\n")?;
}
Expand Down Expand Up @@ -426,7 +428,8 @@ where
}
nest -= 1;
}
Html(text) | Code(text) | Text(text) => {
Html(_) => {}
InlineHtml(text) | Code(text) | Text(text) => {
escape_html(&mut self.writer, &text)?;
self.end_newline = text.ends_with('\n');
}
Expand Down
12 changes: 10 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,9 @@ pub enum Tag<'a> {
/// A code block.
CodeBlock(CodeBlockKind<'a>),

/// A HTML block.
HtmlBlock,

/// A list. If the list is ordered the field indicates the number of the first item.
/// Contains only list items.
List(Option<u64>), // TODO: add delim and tight for ast (not needed for html)
Expand Down Expand Up @@ -206,7 +209,10 @@ pub enum TagEnd {
BlockQuote,
CodeBlock,

List(bool), // true for ordered lists
HtmlBlock,

/// A list, `true` for ordered lists.
List(bool),
Item,
FootnoteDefinition,

Expand All @@ -222,7 +228,6 @@ pub enum TagEnd {
Link,
Image,

/// A metadata block.
MetadataBlock(MetadataBlockKind),
}

Expand Down Expand Up @@ -328,6 +333,9 @@ pub enum Event<'a> {
/// An HTML node.
#[cfg_attr(feature = "serde", serde(borrow))]
Html(CowStr<'a>),
/// An inline HTML node.
#[cfg_attr(feature = "serde", serde(borrow))]
InlineHtml(CowStr<'a>),
/// A reference to a footnote with given label, which may or may not be defined
/// by an event with a `Tag::FootnoteDefinition` tag. Definitions and references to them may
/// occur in any order.
Expand Down
31 changes: 25 additions & 6 deletions src/parse.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ use std::ops::{Index, Range};
use unicase::UniCase;

use crate::firstpass::run_first_pass;
use crate::linklabel::{scan_link_label_rest, LinkLabel, ReferenceLabel, FootnoteLabel};
use crate::linklabel::{scan_link_label_rest, FootnoteLabel, LinkLabel, ReferenceLabel};
use crate::strings::CowStr;
use crate::tree::{Tree, TreeIndex};
use crate::{scanners::*, MetadataBlockKind};
Expand Down Expand Up @@ -83,6 +83,8 @@ pub(crate) enum ItemBody {
Heading(HeadingLevel, Option<HeadingIndex>), // heading level
FencedCodeBlock(CowIndex),
IndentCodeBlock,
HtmlBlock,
InlineHtml,
Html,
OwnedHtml(CowIndex),
BlockQuote,
Expand Down Expand Up @@ -273,7 +275,7 @@ impl<'input, 'callback> Parser<'input, 'callback> {
self.allocs.allocate_cow(converted_string.into()),
)
} else {
ItemBody::Html
ItemBody::InlineHtml
};
self.tree[cur_ix].item.end = ix;
self.tree[cur_ix].next = node;
Expand Down Expand Up @@ -465,10 +467,16 @@ impl<'input, 'callback> Parser<'input, 'callback> {
// see if it's a footnote reference
if let Some((ReferenceLabel::Footnote(l), end)) = label {
let footref = self.allocs.allocate_cow(l);
if let Some(def) = self.allocs.footdefs.get_mut(self.allocs.cows[footref.0].to_owned().into()) {
if let Some(def) = self
.allocs
.footdefs
.get_mut(self.allocs.cows[footref.0].to_owned().into())
{
def.use_count += 1;
}
if !self.options.has_gfm_footnotes() || self.allocs.footdefs.contains(&self.allocs.cows[footref.0]) {
if !self.options.has_gfm_footnotes()
|| self.allocs.footdefs.contains(&self.allocs.cows[footref.0])
{
self.tree[tos.node].next = node_after_link;
self.tree[tos.node].child = None;
self.tree[tos.node].item.body =
Expand Down Expand Up @@ -889,7 +897,11 @@ impl<'input, 'callback> Parser<'input, 'callback> {
&bytes[(ix - 1)..],
Some(&|bytes| {
let mut line_start = LineStart::new(bytes);
let _ = scan_containers(&self.tree, &mut line_start, self.options.has_gfm_footnotes());
let _ = scan_containers(
&self.tree,
&mut line_start,
self.options.has_gfm_footnotes(),
);
line_start.bytes_scanned()
}),
)?;
Expand All @@ -906,7 +918,11 @@ impl<'input, 'callback> Parser<'input, 'callback> {
}

/// Returns number of containers scanned.
pub(crate) fn scan_containers(tree: &Tree<Item>, line_start: &mut LineStart, gfm_footnotes: bool) -> usize {
pub(crate) fn scan_containers(
tree: &Tree<Item>,
line_start: &mut LineStart,
gfm_footnotes: bool,
) -> usize {
let mut i = 0;
for &node_ix in tree.walk_spine() {
match tree[node_ix].item.body {
Expand Down Expand Up @@ -1497,6 +1513,7 @@ fn body_to_tag_end(body: &ItemBody) -> TagEnd {
ItemBody::Heading(level, _) => TagEnd::Heading(level),
ItemBody::IndentCodeBlock | ItemBody::FencedCodeBlock(..) => TagEnd::CodeBlock,
ItemBody::BlockQuote => TagEnd::BlockQuote,
ItemBody::HtmlBlock => TagEnd::HtmlBlock,
ItemBody::List(_, c, _) => {
let is_ordered = c == b'.' || c == b')';
TagEnd::List(is_ordered)
Expand All @@ -1518,7 +1535,9 @@ fn item_to_event<'a>(item: Item, text: &'a str, allocs: &mut Allocations<'a>) ->
ItemBody::Code(cow_ix) => return Event::Code(allocs.take_cow(cow_ix)),
ItemBody::SynthesizeText(cow_ix) => return Event::Text(allocs.take_cow(cow_ix)),
ItemBody::SynthesizeChar(c) => return Event::Text(c.into()),
ItemBody::HtmlBlock => Tag::HtmlBlock,
ItemBody::Html => return Event::Html(text[item.start..item.end].into()),
ItemBody::InlineHtml => return Event::InlineHtml(text[item.start..item.end].into()),
ItemBody::OwnedHtml(cow_ix) => return Event::Html(allocs.take_cow(cow_ix)),
ItemBody::SoftBreak => return Event::SoftBreak,
ItemBody::HardBreak => return Event::HardBreak,
Expand Down