Skip to content

Commit

Permalink
perf(html/parser): Improve memory allocation (#4884)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-akait committed Jun 6, 2022
1 parent c46045f commit ce4d577
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 94 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@ pub struct ActiveFormattingElementStack {

impl ActiveFormattingElementStack {
pub fn new() -> Self {
ActiveFormattingElementStack { items: vec![] }
ActiveFormattingElementStack {
items: Vec::with_capacity(8),
}
}

pub fn push(&mut self, value: ActiveFormattingElement) {
Expand Down
156 changes: 64 additions & 92 deletions crates/swc_html_parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ where
context_element: None,
insertion_mode: Default::default(),
original_insertion_mode: Default::default(),
template_insertion_mode_stack: vec![],
template_insertion_mode_stack: Vec::with_capacity(16),
document_mode: DocumentMode::NoQuirks,
document: None,
html_additional_attributes: vec![],
Expand All @@ -126,7 +126,7 @@ where
form_element_pointer: None,
open_elements_stack: OpenElementsStack::new(),
active_formatting_elements: ActiveFormattingElementStack::new(),
pending_character_tokens: vec![],
pending_character_tokens: Vec::with_capacity(16),
frameset_ok: true,
foster_parenting_enabled: false,
errors: Default::default(),
Expand All @@ -148,10 +148,11 @@ where

self.run()?;

let original_document = &mut self.document.take().unwrap();
let mut children = vec![];
let document = &mut self.document.take().unwrap();
let nodes = document.children.take();
let mut children = Vec::with_capacity(nodes.len());

for node in original_document.children.take() {
for node in nodes {
children.push(self.node_to_child(node));
}

Expand Down Expand Up @@ -337,9 +338,10 @@ where

self.run()?;

let mut children = vec![];
let nodes = root.children.take();
let mut children = Vec::with_capacity(nodes.len());

for node in root.children.take() {
for node in nodes {
children.push(self.node_to_child(node));
}

Expand All @@ -355,7 +357,8 @@ where
Node::new(Data::Document(Document {
span: Default::default(),
mode: DocumentMode::NoQuirks,
children: vec![],
// `DocumentType` and HTML `Element`
children: Vec::with_capacity(2),
}))
}

Expand All @@ -366,9 +369,10 @@ where
Child::DocumentType(DocumentType { ..document_type })
}
Data::Element(element) => {
let mut new_children = vec![];
let nodes = node.children.take();
let mut new_children = Vec::with_capacity(nodes.len());

for node in node.children.take() {
for node in nodes {
new_children.push(self.node_to_child(node));
}

Expand Down Expand Up @@ -1463,7 +1467,7 @@ where
value: attribute.value.clone(),
})
.collect(),
children: vec![],
children: Vec::with_capacity(2),
content: None,
}));

Expand Down Expand Up @@ -1513,16 +1517,9 @@ where
InsertionMode::BeforeHead => {
let anything_else =
|parser: &mut Parser<I>, token_and_info: &mut TokenAndInfo| -> PResult<()> {
let element = parser.insert_html_element(&mut TokenAndInfo {
span: Default::default(),
acknowledged: false,
token: Token::StartTag {
tag_name: "head".into(),
raw_tag_name: None,
self_closing: false,
attributes: vec![],
},
})?;
let element = parser.insert_html_element(
&mut parser.create_fake_token_and_info("head", None),
)?;

parser.head_element_pointer = Some(element);
parser.insertion_mode = InsertionMode::InHead;
Expand Down Expand Up @@ -2049,20 +2046,15 @@ where
let anything_else = |parser: &mut Parser<I>,
token_and_info: &mut TokenAndInfo|
-> PResult<()> {
parser.insert_html_element(&mut TokenAndInfo {
span: if matches!(&token_and_info.token, Token::EndTag { tag_name, .. } if &*tag_name == "body") {
token_and_info.span
} else {
Default::default()
},
acknowledged: false,
token: Token::StartTag {
tag_name: "body".into(),
raw_tag_name: None,
self_closing: false,
attributes: vec![],
},
})?;
let span = if matches!(&token_and_info.token, Token::EndTag { tag_name, .. } if &*tag_name == "body")
{
Some(token_and_info.span)
} else {
None
};
let mut body_token = parser.create_fake_token_and_info("body", span);

parser.insert_html_element(&mut body_token)?;
parser.insertion_mode = InsertionMode::InBody;
parser.process_token(token_and_info, None)?;

Expand Down Expand Up @@ -3175,16 +3167,10 @@ where
ErrorKind::NoElementToCloseButEndTagSeen(tag_name.clone()),
));

self.insert_html_element(&mut TokenAndInfo {
span: token_and_info.span,
acknowledged: false,
token: Token::StartTag {
tag_name: "p".into(),
raw_tag_name: None,
self_closing: false,
attributes: vec![],
},
})?;
self.insert_html_element(
&mut self
.create_fake_token_and_info("p", Some(token_and_info.span)),
)?;
}

self.close_p_element(token_and_info, true);
Expand Down Expand Up @@ -3564,7 +3550,6 @@ where
// attributes, rather than the end tag token that it actually is.
Token::EndTag {
tag_name,
raw_tag_name,
self_closing,
..
} if tag_name == "br" => {
Expand All @@ -3574,16 +3559,9 @@ where
.push(Error::new(token_and_info.span, ErrorKind::EndTagBr));

self.reconstruct_active_formatting_elements()?;
self.insert_html_element(&mut TokenAndInfo {
span: token_and_info.span,
acknowledged: false,
token: Token::StartTag {
tag_name: tag_name.clone(),
raw_tag_name: raw_tag_name.clone(),
self_closing: *self_closing,
attributes: vec![],
},
})?;
self.insert_html_element(
&mut self.create_fake_token_and_info("br", Some(token_and_info.span)),
)?;
self.open_elements_stack.pop();

if is_self_closing {
Expand Down Expand Up @@ -4344,7 +4322,7 @@ where
_ => false,
} =>
{
self.pending_character_tokens = vec![];
self.pending_character_tokens.clear();
self.original_insertion_mode = self.insertion_mode.clone();
self.insertion_mode = InsertionMode::InTableText;
self.process_token(token_and_info, None)?;
Expand Down Expand Up @@ -4397,16 +4375,9 @@ where
// Reprocess the current token.
Token::StartTag { tag_name, .. } if tag_name == "col" => {
self.open_elements_stack.clear_back_to_table_context();
self.insert_html_element(&mut TokenAndInfo {
span: Default::default(),
acknowledged: true,
token: Token::StartTag {
tag_name: "colgroup".into(),
raw_tag_name: None,
self_closing: false,
attributes: vec![],
},
})?;
self.insert_html_element(
&mut self.create_fake_token_and_info("colgroup", None),
)?;
self.insertion_mode = InsertionMode::InColumnGroup;
self.process_token(token_and_info, None)?;
}
Expand Down Expand Up @@ -4435,16 +4406,9 @@ where
if matches!(tag_name.as_ref(), "td" | "th" | "tr") =>
{
self.open_elements_stack.clear_back_to_table_context();
self.insert_html_element(&mut TokenAndInfo {
span: Default::default(),
acknowledged: false,
token: Token::StartTag {
tag_name: "tbody".into(),
raw_tag_name: None,
self_closing: false,
attributes: vec![],
},
})?;
self.insert_html_element(
&mut self.create_fake_token_and_info("tbody", None),
)?;
self.insertion_mode = InsertionMode::InTableBody;
self.process_token(token_and_info, None)?;
}
Expand Down Expand Up @@ -5051,16 +5015,7 @@ where
ErrorKind::StartTagInTableBody(tag_name.clone()),
));
self.open_elements_stack.clear_back_to_table_body_context();
self.insert_html_element(&mut TokenAndInfo {
span: Default::default(),
acknowledged: false,
token: Token::StartTag {
tag_name: "tr".into(),
raw_tag_name: None,
self_closing: false,
attributes: vec![],
},
})?;
self.insert_html_element(&mut self.create_fake_token_and_info("tr", None))?;
self.insertion_mode = InsertionMode::InRow;
self.process_token(token_and_info, None)?;
}
Expand Down Expand Up @@ -6885,7 +6840,7 @@ where
tag_name,
namespace: namespace.unwrap(),
attributes,
children: vec![],
children: Vec::with_capacity(16),
content: None,
}
}
Expand Down Expand Up @@ -7448,11 +7403,28 @@ where
tag_name: "html".into(),
namespace: Namespace::HTML,
attributes: vec![],
children: vec![],
// body and head `Element`s
children: Vec::with_capacity(2),
content: None,
}))
}

fn create_fake_token_and_info(&self, tag_name: &str, span: Option<Span>) -> TokenAndInfo {
TokenAndInfo {
span: match span {
Some(span) => span,
_ => Default::default(),
},
acknowledged: false,
token: Token::StartTag {
tag_name: tag_name.into(),
raw_tag_name: None,
self_closing: false,
attributes: vec![],
},
}
}

// Parsing elements that contain only text
// The generic raw text element parsing algorithm and the generic RCDATA element
// parsing algorithm consist of the following steps. These algorithms are always
Expand Down Expand Up @@ -7562,7 +7534,7 @@ where
}
};

let mut additional_attributes = vec![];
let mut additional_attributes = Vec::with_capacity(token_attributes.len());

for token_attribute in &token_attributes {
let mut found = false;
Expand Down Expand Up @@ -8171,7 +8143,7 @@ where

if let Some(last) = children.last() {
if let Data::Text(text) = &last.data {
let mut new_value = String::new();
let mut new_value = String::with_capacity(text.value.len() + 1);

new_value.push_str(&*text.value);

Expand Down Expand Up @@ -8204,7 +8176,7 @@ where

if let Some(previous) = children.get(i - 1) {
if let Data::Text(text) = &previous.data {
let mut new_value = String::new();
let mut new_value = String::with_capacity(text.value.len() + 1);

new_value.push_str(&*text.value);

Expand Down
2 changes: 1 addition & 1 deletion crates/swc_html_parser/src/parser/open_elements_stack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ pub struct OpenElementsStack {
impl OpenElementsStack {
pub fn new() -> Self {
OpenElementsStack {
items: vec![],
items: Vec::with_capacity(16),
template_element_count: 0,
}
}
Expand Down

1 comment on commit ce4d577

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: ce4d577 Previous: b14719c Ratio
es/full/minify/libraries/antd 2215831937 ns/iter (± 50934466) 1953139533 ns/iter (± 47485371) 1.13
es/full/minify/libraries/d3 572786588 ns/iter (± 8678242) 453793496 ns/iter (± 2596253) 1.26
es/full/minify/libraries/echarts 2580244269 ns/iter (± 14160685) 2310501968 ns/iter (± 9550848) 1.12
es/full/minify/libraries/jquery 111478353 ns/iter (± 2175030) 102626942 ns/iter (± 275453) 1.09
es/full/minify/libraries/lodash 162983212 ns/iter (± 2461315) 150483116 ns/iter (± 322719) 1.08
es/full/minify/libraries/moment 63214409 ns/iter (± 688379) 61346181 ns/iter (± 135373) 1.03
es/full/minify/libraries/react 20414331 ns/iter (± 67146) 20015833 ns/iter (± 55857) 1.02
es/full/minify/libraries/terser 578942638 ns/iter (± 2815594) 488715986 ns/iter (± 3960590) 1.18
es/full/minify/libraries/three 768532534 ns/iter (± 5801731) 624730927 ns/iter (± 5469241) 1.23
es/full/minify/libraries/typescript 5032935441 ns/iter (± 15187227) 4501383218 ns/iter (± 20850819) 1.12
es/full/minify/libraries/victory 980789304 ns/iter (± 6942482) 807786882 ns/iter (± 7210634) 1.21
es/full/minify/libraries/vue 180588400 ns/iter (± 4739345) 155405022 ns/iter (± 470493) 1.16
es/full/codegen/es3 34420 ns/iter (± 234) 34528 ns/iter (± 123) 1.00
es/full/codegen/es5 34433 ns/iter (± 135) 34528 ns/iter (± 125) 1.00
es/full/codegen/es2015 34424 ns/iter (± 134) 34591 ns/iter (± 156) 1.00
es/full/codegen/es2016 34408 ns/iter (± 216) 34544 ns/iter (± 180) 1.00
es/full/codegen/es2017 34392 ns/iter (± 187) 34511 ns/iter (± 162) 1.00
es/full/codegen/es2018 34402 ns/iter (± 198) 34523 ns/iter (± 139) 1.00
es/full/codegen/es2019 34387 ns/iter (± 161) 34531 ns/iter (± 144) 1.00
es/full/codegen/es2020 34390 ns/iter (± 150) 34510 ns/iter (± 160) 1.00
es/full/all/es3 194375519 ns/iter (± 767631) 192597362 ns/iter (± 1105246) 1.01
es/full/all/es5 183775703 ns/iter (± 665458) 181265576 ns/iter (± 1017407) 1.01
es/full/all/es2015 146030395 ns/iter (± 699075) 145033794 ns/iter (± 1616339) 1.01
es/full/all/es2016 144970398 ns/iter (± 937933) 145432453 ns/iter (± 1261293) 1.00
es/full/all/es2017 144121706 ns/iter (± 694986) 144450563 ns/iter (± 1626308) 1.00
es/full/all/es2018 142764575 ns/iter (± 840041) 142890499 ns/iter (± 1361305) 1.00
es/full/all/es2019 141962979 ns/iter (± 676428) 141562878 ns/iter (± 1561253) 1.00
es/full/all/es2020 136872964 ns/iter (± 567059) 137654334 ns/iter (± 1713614) 0.99
es/full/parser 590136 ns/iter (± 61290) 582895 ns/iter (± 42743) 1.01
es/full/base/fixer 28361 ns/iter (± 218) 27705 ns/iter (± 150) 1.02
es/full/base/resolver_and_hygiene 140578 ns/iter (± 1694) 140230 ns/iter (± 2071) 1.00
serialization of ast node 182 ns/iter (± 1) 181 ns/iter (± 0) 1.01
serialization of serde 182 ns/iter (± 0) 183 ns/iter (± 0) 0.99

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.