Skip to content

Commit

Permalink
feat(html/minifier): Allow compressing additional attributes (#5036)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-akait committed Jun 27, 2022
1 parent 8b04274 commit 55e0ea3
Show file tree
Hide file tree
Showing 5 changed files with 208 additions and 53 deletions.
211 changes: 158 additions & 53 deletions crates/swc_html_minifier/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,10 @@ use swc_atoms::{js_word, JsWord};
use swc_cached::regex::CachedRegex;
use swc_common::{collections::AHashSet, sync::Lrc, FileName, FilePathMapping, Mark, SourceMap};
use swc_html_ast::*;
use swc_html_parser::parser::ParserConfig;
use swc_html_visit::{VisitMut, VisitMutWith};

use crate::option::{CollapseWhitespaces, MinifyOptions};
use crate::option::{CollapseWhitespaces, MinifierType, MinifyOptions};
pub mod option;

static HTML_BOOLEAN_ATTRIBUTES: &[&str] = &[
Expand Down Expand Up @@ -259,6 +260,16 @@ enum CssMinificationMode {
MediaQueryList,
}

enum HtmlMinificationMode {
ConditionalComments,
DocumentIframeSrcdoc,
}

enum HtmlRoot {
Document(Document),
DocumentFragment(DocumentFragment),
}

#[inline(always)]
fn is_whitespace(c: char) -> bool {
matches!(c, '\x09' | '\x0a' | '\x0c' | '\x0d' | '\x20')
Expand Down Expand Up @@ -322,6 +333,7 @@ struct Minifier {
minify_json: bool,
minify_js: bool,
minify_css: bool,
minify_additional_attributes: Option<Vec<(CachedRegex, MinifierType)>>,
}

fn get_white_space(namespace: Namespace, tag_name: &str) -> WhiteSpace {
Expand Down Expand Up @@ -409,6 +421,18 @@ impl Minifier {
}
}

fn is_additional_minifier_attribute(&self, name: &str) -> Option<MinifierType> {
if let Some(minify_additional_attributes) = &self.minify_additional_attributes {
for item in minify_additional_attributes {
if item.0.is_match(name) {
return Some(item.1.clone());
}
}
}

None
}

fn element_has_attribute_with_value(
&self,
element: &Element,
Expand Down Expand Up @@ -1285,58 +1309,87 @@ impl Minifier {
Some(minified)
}

fn minify_html(&self, data: String) -> Option<String> {
fn minify_html(&self, data: String, mode: HtmlMinificationMode) -> Option<String> {
let mut errors: Vec<_> = vec![];

let cm = Lrc::new(SourceMap::new(FilePathMapping::empty()));
let fm = cm.new_source_file(FileName::Anon, data);

// Emulate content inside conditional comments like content inside the
// `template` element
let context_element = Element {
span: Default::default(),
tag_name: "template".into(),
namespace: Namespace::HTML,
attributes: vec![],
children: vec![],
content: None,
is_self_closing: false,
};
let mut document_fragment = match swc_html_parser::parse_file_as_document_fragment(
&fm,
&context_element,
DocumentMode::NoQuirks,
None,
Default::default(),
&mut errors,
) {
Ok(document_fragment) => document_fragment,
_ => return None,
let mut context_element = None;

let mut document_or_document_fragment = match mode {
HtmlMinificationMode::ConditionalComments => {
// Emulate content inside conditional comments like content inside the
// `template` element
context_element = Some(Element {
span: Default::default(),
tag_name: "template".into(),
namespace: Namespace::HTML,
attributes: vec![],
children: vec![],
content: None,
is_self_closing: false,
});

match swc_html_parser::parse_file_as_document_fragment(
&fm,
context_element.as_ref().unwrap(),
DocumentMode::NoQuirks,
None,
Default::default(),
&mut errors,
) {
Ok(document_fragment) => HtmlRoot::DocumentFragment(document_fragment),
_ => return None,
}
}
HtmlMinificationMode::DocumentIframeSrcdoc => {
match swc_html_parser::parse_file_as_document(
&fm,
ParserConfig {
iframe_srcdoc: true,
..Default::default()
},
&mut errors,
) {
Ok(document) => HtmlRoot::Document(document),
_ => return None,
}
}
};

// Avoid compress potential invalid CSS
if !errors.is_empty() {
return None;
}

let mut minifier = create_minifier(
Some(&context_element),
&MinifyOptions {
force_set_html5_doctype: self.force_set_html5_doctype,
remove_comments: self.remove_comments,
preserve_comments: self.preserve_comments.clone(),
minify_conditional_comments: self.minify_conditional_comments,
collapse_whitespaces: self.collapse_whitespaces.clone(),
remove_empty_attributes: self.remove_empty_attributes,
remove_redundant_attributes: self.remove_empty_attributes,
collapse_boolean_attributes: self.collapse_boolean_attributes,
minify_js: self.minify_js,
minify_json: self.minify_json,
minify_css: self.minify_css,
},
);
let minify_options = MinifyOptions {
force_set_html5_doctype: self.force_set_html5_doctype,
collapse_whitespaces: self.collapse_whitespaces.clone(),
remove_comments: self.remove_comments,
preserve_comments: self.preserve_comments.clone(),
minify_conditional_comments: self.minify_conditional_comments,
remove_empty_attributes: self.remove_empty_attributes,
remove_redundant_attributes: self.remove_empty_attributes,
collapse_boolean_attributes: self.collapse_boolean_attributes,
minify_js: self.minify_js,
minify_json: self.minify_json,
minify_css: self.minify_css,
minify_additional_attributes: self.minify_additional_attributes.clone(),
};

document_fragment.visit_mut_with(&mut minifier);
match document_or_document_fragment {
HtmlRoot::Document(ref mut document) => {
minify_document(document, &minify_options);
}
HtmlRoot::DocumentFragment(ref mut document_fragment) => minify_document_fragment(
document_fragment,
context_element.as_ref().unwrap(),
&minify_options,
),
}

let mut minified = String::new();
let wr = swc_html_codegen::writer::basic::BasicHtmlWriter::new(
Expand All @@ -1349,13 +1402,20 @@ impl Minifier {
swc_html_codegen::CodegenConfig {
minify: true,
scripting_enabled: false,
context_element: Some(&context_element),
context_element: context_element.as_ref(),
tag_omission: None,
self_closing_void_elements: None,
},
);

swc_html_codegen::Emit::emit(&mut gen, &document_fragment).unwrap();
match document_or_document_fragment {
HtmlRoot::Document(document) => {
swc_html_codegen::Emit::emit(&mut gen, &document).unwrap();
}
HtmlRoot::DocumentFragment(document_fragment) => {
swc_html_codegen::Emit::emit(&mut gen, &document_fragment).unwrap();
}
}

Some(minified)
}
Expand Down Expand Up @@ -1487,27 +1547,24 @@ impl VisitMut for Minifier {
}
};

let is_element_html_namespace =
self.current_element.as_ref().unwrap().namespace == Namespace::HTML;
let current_element = self.current_element.as_ref().unwrap();

if self.collapse_boolean_attributes
&& is_element_html_namespace
&& current_element.namespace == Namespace::HTML
&& self.is_boolean_attribute(&n.name)
{
n.value = None;

return;
} else if self.is_space_separated_attribute(self.current_element.as_ref().unwrap(), &n.name)
{
} else if self.is_space_separated_attribute(current_element, &n.name) {
let mut values = value.split_whitespace().collect::<Vec<_>>();

if &*n.name == "class" {
values.sort_unstable();
}

value = values.join(" ");
} else if self.is_comma_separated_attribute(self.current_element.as_ref().unwrap(), &n.name)
{
} else if self.is_comma_separated_attribute(current_element, &n.name) {
let is_sizes = matches!(&*n.name, "sizes" | "imagesizes");

let mut new_values = vec![];
Expand Down Expand Up @@ -1538,9 +1595,7 @@ impl VisitMut for Minifier {
value = minified;
}
}
} else if self
.is_trimable_separated_attribute(self.current_element.as_ref().unwrap(), &n.name)
{
} else if self.is_trimable_separated_attribute(current_element, &n.name) {
value = value.trim().to_string();

if self.minify_css && &*n.name == "style" && !value.is_empty() {
Expand All @@ -1550,13 +1605,16 @@ impl VisitMut for Minifier {
value = minified;
}
}
} else if is_element_html_namespace && &n.name == "contenteditable" && value == "true" {
} else if current_element.namespace == Namespace::HTML
&& &n.name == "contenteditable"
&& value == "true"
{
n.value = Some(js_word!(""));

return;
} else if &n.name == "content"
&& self.element_has_attribute_with_value(
self.current_element.as_ref().unwrap(),
current_element,
"http-equiv",
&["content-security-policy"],
)
Expand Down Expand Up @@ -1592,6 +1650,51 @@ impl VisitMut for Minifier {
Some(minified) => minified,
_ => value,
};
} else if current_element.namespace == Namespace::HTML
&& &*current_element.tag_name == "iframe"
&& &n.name == "srcdoc"
{
value =
match self.minify_html(value.clone(), HtmlMinificationMode::DocumentIframeSrcdoc) {
Some(minified) => minified,
_ => value,
};
}

if self.minify_additional_attributes.is_some() {
let minifier_type = self.is_additional_minifier_attribute(&n.name);

match minifier_type {
Some(MinifierType::Js) if self.minify_js => {
value = match self.minify_js(value.clone(), false) {
Some(minified) => minified,
_ => value,
};
}
Some(MinifierType::Json) if self.minify_json => {
value = match self.minify_json(value.clone()) {
Some(minified) => minified,
_ => value,
};
}
Some(MinifierType::Css) if self.minify_css => {
value = match self
.minify_css(value.clone(), CssMinificationMode::ListOfDeclarations)
{
Some(minified) => minified,
_ => value,
};
}
Some(MinifierType::Html) => {
value = match self
.minify_html(value.clone(), HtmlMinificationMode::DocumentIframeSrcdoc)
{
Some(minified) => minified,
_ => value,
};
}
_ => {}
}
}

n.value = Some(value.into());
Expand Down Expand Up @@ -1747,7 +1850,7 @@ impl VisitMut for Minifier {
.take(end_pos - start_pos)
.collect();

let minified = match self.minify_html(html) {
let minified = match self.minify_html(html, HtmlMinificationMode::ConditionalComments) {
Some(minified) => minified,
_ => return,
};
Expand Down Expand Up @@ -1794,6 +1897,8 @@ fn create_minifier(context_element: Option<&Element>, options: &MinifyOptions) -
minify_js: options.minify_js,
minify_json: options.minify_json,
minify_css: options.minify_css,

minify_additional_attributes: options.minify_additional_attributes.clone(),
}
}

Expand Down
18 changes: 18 additions & 0 deletions crates/swc_html_minifier/src/option.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,16 @@
use serde::{Deserialize, Serialize};
use swc_cached::regex::CachedRegex;

#[derive(Debug, Serialize, Deserialize, Clone)]
#[serde(rename_all = "lowercase")]
#[serde(deny_unknown_fields)]
pub enum MinifierType {
Js,
Json,
Css,
Html,
}

#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
#[serde(deny_unknown_fields)]
Expand Down Expand Up @@ -33,6 +43,14 @@ pub struct MinifyOptions {
pub minify_json: bool,
#[serde(default = "true_by_default")]
pub minify_css: bool,
// Allow to compress value of custom attributes,
// i.e. `<div data-js="myFunction(100 * 2, 'foo' + 'bar')"></div>`
//
// The first item is tag_name
// The second is attribute name
// The third is type of minifier
#[serde(default)]
pub minify_additional_attributes: Option<Vec<(CachedRegex, MinifierType)>>,
}

/// Implement default using serde.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"minifyAdditionalAttributes": [
["^data-click", "js"],
["^ng-", "js"],
["^data-json", "json"],
["^data-style", "css"],
["^data-html", "html"]
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport"
content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
<title>Document</title>
</head>
<body>
<button type="button" onclick="a(1 + 2)" ng-click="a(1 + 2)" data-click="a(1 + 2)"></button>
<button type="button" onclick="a(1 + 2)" ng-click="a(1 + 2)" data-click="a(1 + 2)"></button>
<div data-json='{ "foo": "bar" }'></div>
<div data-style="color: red; background-color: red"></div>
<iframe srcdoc="<html> <body> <p>test.</p>" src="nosrcdoc.html"></iframe>
<div data-html="<html> <body> <p>test.</p>" src="nosrcdoc.html"></div>
</body>
</html>

1 comment on commit 55e0ea3

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 55e0ea3 Previous: fa68cbd Ratio
es/full/minify/libraries/antd 1629261898 ns/iter (± 9160935) 1677065970 ns/iter (± 31913732) 0.97
es/full/minify/libraries/d3 400016458 ns/iter (± 4699793) 440498270 ns/iter (± 8713870) 0.91
es/full/minify/libraries/echarts 1610667988 ns/iter (± 28928933) 1670768194 ns/iter (± 26603713) 0.96
es/full/minify/libraries/jquery 87064706 ns/iter (± 1254987) 101867890 ns/iter (± 3824022) 0.85
es/full/minify/libraries/lodash 116421667 ns/iter (± 2095223) 134704731 ns/iter (± 2994713) 0.86
es/full/minify/libraries/moment 52014933 ns/iter (± 1241650) 58748746 ns/iter (± 584903) 0.89
es/full/minify/libraries/react 18367196 ns/iter (± 858627) 18886460 ns/iter (± 472592) 0.97
es/full/minify/libraries/terser 592997037 ns/iter (± 13296702) 622043344 ns/iter (± 12607014) 0.95
es/full/minify/libraries/three 542737979 ns/iter (± 3026572) 564374191 ns/iter (± 8815940) 0.96
es/full/minify/libraries/typescript 3445681172 ns/iter (± 37589007) 3589112323 ns/iter (± 55669666) 0.96
es/full/minify/libraries/victory 718145074 ns/iter (± 9571676) 752027563 ns/iter (± 4591208) 0.95
es/full/minify/libraries/vue 131031034 ns/iter (± 2736413) 159632105 ns/iter (± 2242216) 0.82
es/full/codegen/es3 32671 ns/iter (± 1363) 32746 ns/iter (± 1566) 1.00
es/full/codegen/es5 32726 ns/iter (± 670) 32687 ns/iter (± 327) 1.00
es/full/codegen/es2015 32654 ns/iter (± 1377) 32800 ns/iter (± 430) 1.00
es/full/codegen/es2016 32686 ns/iter (± 1073) 32803 ns/iter (± 457) 1.00
es/full/codegen/es2017 32526 ns/iter (± 1362) 31435 ns/iter (± 1128) 1.03
es/full/codegen/es2018 32616 ns/iter (± 1054) 32509 ns/iter (± 633) 1.00
es/full/codegen/es2019 32605 ns/iter (± 1096) 32588 ns/iter (± 1008) 1.00
es/full/codegen/es2020 32783 ns/iter (± 1490) 32636 ns/iter (± 2918) 1.00
es/full/all/es3 179259661 ns/iter (± 9822465) 190928774 ns/iter (± 8604319) 0.94
es/full/all/es5 167434061 ns/iter (± 2999797) 183280570 ns/iter (± 10559769) 0.91
es/full/all/es2015 136365286 ns/iter (± 3483988) 138355011 ns/iter (± 2970068) 0.99
es/full/all/es2016 135498835 ns/iter (± 3521091) 138789554 ns/iter (± 2610463) 0.98
es/full/all/es2017 135041208 ns/iter (± 3040354) 143210809 ns/iter (± 4899288) 0.94
es/full/all/es2018 133606034 ns/iter (± 2862796) 140842112 ns/iter (± 4017436) 0.95
es/full/all/es2019 132820617 ns/iter (± 3235566) 141347434 ns/iter (± 6586499) 0.94
es/full/all/es2020 128089391 ns/iter (± 2553936) 130775269 ns/iter (± 3371631) 0.98
es/full/parser 683380 ns/iter (± 22549) 715282 ns/iter (± 14870) 0.96
es/full/base/fixer 28809 ns/iter (± 1087) 29083 ns/iter (± 383) 0.99
es/full/base/resolver_and_hygiene 85282 ns/iter (± 2382) 87824 ns/iter (± 2768) 0.97
serialization of ast node 206 ns/iter (± 6) 207 ns/iter (± 7) 1.00
serialization of serde 215 ns/iter (± 10) 217 ns/iter (± 1) 0.99

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.