Skip to content

Commit

Permalink
feat(html/minifier): Add normalize_attributes for disabling normali…
Browse files Browse the repository at this point in the history
…zation (#5045)
  • Loading branch information
alexander-akait committed Jun 28, 2022
1 parent 70770e0 commit 0836d29
Show file tree
Hide file tree
Showing 5 changed files with 126 additions and 95 deletions.
176 changes: 93 additions & 83 deletions crates/swc_html_minifier/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,7 @@ struct Minifier {
remove_empty_attributes: bool,
remove_redundant_attributes: bool,
collapse_boolean_attributes: bool,
normalize_attributes: bool,
minify_json: bool,
minify_js: bool,
minify_css: bool,
Expand Down Expand Up @@ -1374,6 +1375,7 @@ impl Minifier {
remove_empty_attributes: self.remove_empty_attributes,
remove_redundant_attributes: self.remove_empty_attributes,
collapse_boolean_attributes: self.collapse_boolean_attributes,
normalize_attributes: self.normalize_attributes,
minify_js: self.minify_js,
minify_json: self.minify_json,
minify_css: self.minify_css,
Expand Down Expand Up @@ -1547,6 +1549,10 @@ impl VisitMut for Minifier {
}
};

if value.is_empty() {
return;
}

let current_element = self.current_element.as_ref().unwrap();

if self.collapse_boolean_attributes
Expand All @@ -1556,109 +1562,112 @@ impl VisitMut for Minifier {
n.value = None;

return;
} else if self.is_space_separated_attribute(current_element, &n.name) {
let mut values = value.split_whitespace().collect::<Vec<_>>();
} else if self.normalize_attributes {
if self.is_space_separated_attribute(current_element, &n.name) {
value = value.split_whitespace().collect::<Vec<_>>().join(" ");
} else if self.is_comma_separated_attribute(current_element, &n.name) {
let is_sizes = matches!(&*n.name, "sizes" | "imagesizes");

if &*n.name == "class" {
values.sort_unstable();
}
let mut new_values = vec![];

value = values.join(" ");
} else if self.is_comma_separated_attribute(current_element, &n.name) {
let is_sizes = matches!(&*n.name, "sizes" | "imagesizes");
for value in value.trim().split(',') {
if is_sizes {
let trimmed = value.trim();

match self.minify_sizes(trimmed) {
Some(minified) => {
new_values.push(minified);
}
_ => {
new_values.push(trimmed.to_string());
}
};
} else {
new_values.push(value.trim().to_string());
}
}

let mut new_values = vec![];
value = new_values.join(",");
} else if self.is_trimable_separated_attribute(current_element, &n.name) {
value = value.trim().to_string();
} else if current_element.namespace == Namespace::HTML
&& &n.name == "contenteditable"
&& value == "true"
{
n.value = Some(js_word!(""));

return;
} else if &n.name == "content"
&& self.element_has_attribute_with_value(
current_element,
"http-equiv",
&["content-security-policy"],
)
{
let values = value.trim().split(';');

for value in value.trim().split(',') {
if is_sizes {
let trimmed = value.trim();
let mut new_values = vec![];

match self.minify_sizes(trimmed) {
Some(minified) => {
new_values.push(minified);
}
_ => {
new_values.push(trimmed.to_string());
}
};
} else {
new_values.push(value.trim().to_string());
for value in values {
new_values.push(
value
.trim()
.split(' ')
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join(" "),
);
}
}

value = new_values.join(",");
value = new_values.join(";");

if self.minify_css && &*n.name == "media" && !value.is_empty() {
if let Some(minified) =
self.minify_css(value.clone(), CssMinificationMode::MediaQueryList)
{
value = minified;
if value.ends_with(';') {
value.pop();
}
}
} else if self.is_trimable_separated_attribute(current_element, &n.name) {
value = value.trim().to_string();
} else if self.is_event_handler_attribute(&n.name) {
value = value.trim().into();

if self.minify_css && &*n.name == "style" && !value.is_empty() {
if let Some(minified) =
self.minify_css(value.clone(), CssMinificationMode::ListOfDeclarations)
{
value = minified;
if value.trim().to_lowercase().starts_with("javascript:") {
value = value.chars().skip(11).collect();
}
} else if current_element.namespace == Namespace::HTML
&& &*current_element.tag_name == "iframe"
&& &n.name == "srcdoc"
{
value = match self
.minify_html(value.clone(), HtmlMinificationMode::DocumentIframeSrcdoc)
{
Some(minified) => minified,
_ => value,
};
}
} else if current_element.namespace == Namespace::HTML
&& &n.name == "contenteditable"
&& value == "true"
{
n.value = Some(js_word!(""));

return;
} else if &n.name == "content"
&& self.element_has_attribute_with_value(
current_element,
"http-equiv",
&["content-security-policy"],
)
{
let values = value.trim().split(';');

let mut new_values = vec![];

for value in values {
new_values.push(
value
.trim()
.split(' ')
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join(" "),
);
}

value = new_values.join(";");
}

if value.ends_with(';') {
value.pop();
}
} else if self.is_event_handler_attribute(&n.name) {
value = value.trim().into();
if &*n.name == "class" {
let mut values = value.split_whitespace().collect::<Vec<_>>();

if value.trim().to_lowercase().starts_with("javascript:") {
value = value.chars().skip(11).collect();
if &*n.name == "class" {
values.sort_unstable();
}

value = values.join(" ");
} else if self.minify_js && self.is_event_handler_attribute(&n.name) {
value = match self.minify_js(value.clone(), false) {
Some(minified) => minified,
_ => value,
};
} else if current_element.namespace == Namespace::HTML
&& &*current_element.tag_name == "iframe"
&& &n.name == "srcdoc"
{
value =
match self.minify_html(value.clone(), HtmlMinificationMode::DocumentIframeSrcdoc) {
Some(minified) => minified,
_ => value,
};
} else if self.minify_css && &*n.name == "media" && !value.is_empty() {
if let Some(minified) =
self.minify_css(value.clone(), CssMinificationMode::MediaQueryList)
{
value = minified;
}
} else if self.minify_css && &*n.name == "style" && !value.is_empty() {
if let Some(minified) =
self.minify_css(value.clone(), CssMinificationMode::ListOfDeclarations)
{
value = minified;
}
}

if self.minify_additional_attributes.is_some() {
Expand Down Expand Up @@ -1893,6 +1902,7 @@ fn create_minifier(context_element: Option<&Element>, options: &MinifyOptions) -
remove_empty_attributes: options.remove_empty_attributes,
remove_redundant_attributes: options.remove_redundant_attributes,
collapse_boolean_attributes: options.collapse_boolean_attributes,
normalize_attributes: options.normalize_attributes,

minify_js: options.minify_js,
minify_json: options.minify_json,
Expand Down
29 changes: 17 additions & 12 deletions crates/swc_html_minifier/src/option.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,36 +19,41 @@ pub struct MinifyOptions {
pub force_set_html5_doctype: bool,
#[serde(default)]
pub collapse_whitespaces: Option<CollapseWhitespaces>,
/// Prevent to remove empty attributes, by default we only remove attributes
/// that are safe to remove (for example - empty a `style` attribute),
/// but in edge cases it can be unsafe because some libraries can
/// interact with DOM like with strings (i.e. don't use DOM API) and in this
/// case strings will be different, which can break the work of
/// libraries
#[serde(default = "true_by_default")]
pub remove_comments: bool,
#[serde(default = "default_preserve_comments")]
pub preserve_comments: Option<Vec<CachedRegex>>,
#[serde(default = "true_by_default")]
pub minify_conditional_comments: bool,
/// Prevent to remove empty attributes, by default we only remove attributes
/// that are safe to remove (for example - empty a `style` attribute),
/// but in edge cases it can be unsafe because some libraries can
/// interact with DOM like with strings (i.e. don't use DOM API) and in this
/// case strings will be different, which can break the work of
/// libraries
#[serde(default = "true_by_default")]
pub remove_empty_attributes: bool,
#[serde(default = "true_by_default")]
pub remove_redundant_attributes: bool,
#[serde(default = "true_by_default")]
pub collapse_boolean_attributes: bool,
/// Remove extra whitespace in space and comma separated attribute values
/// (where it is safe) and remove `javascript:` prefix for event handler
/// attributes
#[serde(default = "true_by_default")]
pub normalize_attributes: bool,
#[serde(default = "true_by_default")]
pub minify_js: bool,
#[serde(default = "true_by_default")]
pub minify_json: bool,
#[serde(default = "true_by_default")]
pub minify_css: bool,
// Allow to compress value of custom attributes,
// i.e. `<div data-js="myFunction(100 * 2, 'foo' + 'bar')"></div>`
//
// The first item is tag_name
// The second is attribute name
// The third is type of minifier
/// Allow to compress value of custom attributes,
/// i.e. `<div data-js="myFunction(100 * 2, 'foo' + 'bar')"></div>`
///
/// The first item is tag_name
/// The second is attribute name
/// The third is type of minifier
#[serde(default)]
pub minify_additional_attributes: Option<Vec<(CachedRegex, MinifierType)>>,
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
{
"normalizeAttributes": false
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!doctype html>
<html lang="en">
<head>
<title>Document</title>
<link rel="stylesheet" href="test.css" media="screen and (min-width: 1024px)">
</head>
<body>
<a rel="foo bar baz"></a>
<div onclick="javascript:alert( 'test' ) ;"></div>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
<!doctype html><html lang=en><title>Document</title><link rel=stylesheet href=test.css media="screen and (min-width:1024px)"><a rel="foo bar baz"></a>
<div onclick='javascript:alert("test")'></div>

1 comment on commit 0836d29

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 0836d29 Previous: 55e0ea3 Ratio
es/full/minify/libraries/antd 1620959921 ns/iter (± 13653757) 1629261898 ns/iter (± 9160935) 0.99
es/full/minify/libraries/d3 408098235 ns/iter (± 16249612) 400016458 ns/iter (± 4699793) 1.02
es/full/minify/libraries/echarts 1610858523 ns/iter (± 18365633) 1610667988 ns/iter (± 28928933) 1.00
es/full/minify/libraries/jquery 88265703 ns/iter (± 3492952) 87064706 ns/iter (± 1254987) 1.01
es/full/minify/libraries/lodash 116885471 ns/iter (± 2379516) 116421667 ns/iter (± 2095223) 1.00
es/full/minify/libraries/moment 50990523 ns/iter (± 1869261) 52014933 ns/iter (± 1241650) 0.98
es/full/minify/libraries/react 17129573 ns/iter (± 893723) 18367196 ns/iter (± 858627) 0.93
es/full/minify/libraries/terser 598977226 ns/iter (± 17022992) 592997037 ns/iter (± 13296702) 1.01
es/full/minify/libraries/three 537373478 ns/iter (± 14503456) 542737979 ns/iter (± 3026572) 0.99
es/full/minify/libraries/typescript 3439170822 ns/iter (± 73736135) 3445681172 ns/iter (± 37589007) 1.00
es/full/minify/libraries/victory 712985392 ns/iter (± 11816656) 718145074 ns/iter (± 9571676) 0.99
es/full/minify/libraries/vue 138363576 ns/iter (± 6260433) 131031034 ns/iter (± 2736413) 1.06
es/full/codegen/es3 31553 ns/iter (± 1231) 32671 ns/iter (± 1363) 0.97
es/full/codegen/es5 31696 ns/iter (± 1222) 32726 ns/iter (± 670) 0.97
es/full/codegen/es2015 31580 ns/iter (± 831) 32654 ns/iter (± 1377) 0.97
es/full/codegen/es2016 31518 ns/iter (± 592) 32686 ns/iter (± 1073) 0.96
es/full/codegen/es2017 31757 ns/iter (± 635) 32526 ns/iter (± 1362) 0.98
es/full/codegen/es2018 32612 ns/iter (± 703) 32616 ns/iter (± 1054) 1.00
es/full/codegen/es2019 32718 ns/iter (± 1302) 32605 ns/iter (± 1096) 1.00
es/full/codegen/es2020 32754 ns/iter (± 780) 32783 ns/iter (± 1490) 1.00
es/full/all/es3 181984003 ns/iter (± 4675686) 179259661 ns/iter (± 9822465) 1.02
es/full/all/es5 170537809 ns/iter (± 6794378) 167434061 ns/iter (± 2999797) 1.02
es/full/all/es2015 139173680 ns/iter (± 3808562) 136365286 ns/iter (± 3483988) 1.02
es/full/all/es2016 137519205 ns/iter (± 5573861) 135498835 ns/iter (± 3521091) 1.01
es/full/all/es2017 137146227 ns/iter (± 5050149) 135041208 ns/iter (± 3040354) 1.02
es/full/all/es2018 135670420 ns/iter (± 6485647) 133606034 ns/iter (± 2862796) 1.02
es/full/all/es2019 135689467 ns/iter (± 5082285) 132820617 ns/iter (± 3235566) 1.02
es/full/all/es2020 137722510 ns/iter (± 14048126) 128089391 ns/iter (± 2553936) 1.08
es/full/parser 721573 ns/iter (± 26257) 683380 ns/iter (± 22549) 1.06
es/full/base/fixer 29555 ns/iter (± 1107) 28809 ns/iter (± 1087) 1.03
es/full/base/resolver_and_hygiene 88655 ns/iter (± 3452) 85282 ns/iter (± 2382) 1.04
serialization of ast node 202 ns/iter (± 11) 206 ns/iter (± 6) 0.98
serialization of serde 223 ns/iter (± 7) 215 ns/iter (± 10) 1.04

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.