Skip to content

Commit

Permalink
feat(html/codegen): Improve entity compression (#4889)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-akait committed Jun 7, 2022
1 parent 44e606a commit da09c1c
Show file tree
Hide file tree
Showing 20 changed files with 841 additions and 79 deletions.
13 changes: 12 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions crates/swc_html_codegen/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ swc_atoms = {version = "0.2.7", path = "../swc_atoms"}
swc_common = { version = "0.18.0", path = "../swc_common"}
swc_html_ast = {version = "0.8.0", path = "../swc_html_ast"}
swc_html_codegen_macros = {version = "0.1.0", path = "../swc_html_codegen_macros"}
swc_html_utils = { version = "0.1.0", path = "../swc_html_utils" }

[dev-dependencies]
swc_common = { version = "0.18.0", path = "../swc_common", features = [
Expand Down
101 changes: 97 additions & 4 deletions crates/swc_html_codegen/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@
#![allow(clippy::needless_update)]

pub use std::fmt::Result;
use std::{iter::Peekable, str::Chars};

use swc_common::Spanned;
use swc_html_ast::*;
use swc_html_codegen_macros::emitter;
use swc_html_utils::HTML_ENTITIES;
use writer::HtmlWriter;

pub use self::emit::*;
Expand Down Expand Up @@ -820,10 +822,12 @@ fn minify_attribute_value(value: &str) -> String {
let mut dq = 0;
let mut sq = 0;

for c in value.chars() {
let mut chars = value.chars().peekable();

while let Some(c) = chars.next() {
match c {
'&' => {
minified.push_str("&");
minified.push_str(&minify_amp(&mut chars));

continue;
}
Expand Down Expand Up @@ -875,11 +879,12 @@ fn normalize_attribute_value(value: &str) -> String {

fn minify_text(value: &str) -> String {
let mut result = String::with_capacity(value.len());
let mut chars = value.chars().peekable();

for c in value.chars() {
while let Some(c) = chars.next() {
match c {
'&' => {
result.push_str("&");
result.push_str(&minify_amp(&mut chars));
}
'<' => {
result.push_str("&lt;");
Expand All @@ -891,6 +896,94 @@ fn minify_text(value: &str) -> String {
result
}

fn minify_amp(chars: &mut Peekable<Chars>) -> String {
let mut result = String::with_capacity(7);

match chars.next() {
Some(hash @ '#') => {
match chars.next() {
// HTML CODE
// Prevent `&amp;#38;` -> `&#38`
Some(number @ '0'..='9') => {
result.push_str("&amp;");
result.push(hash);
result.push(number);
}
Some(x @ 'x' | x @ 'X') => {
match chars.peek() {
// HEX CODE
// Prevent `&amp;#x38;` -> `&#x38`
Some(c) if c.is_ascii_hexdigit() => {
result.push_str("&amp;");
result.push(hash);
result.push(x);
}
_ => {
result.push('&');
result.push(hash);
result.push(x);
}
}
}
any => {
result.push('&');
result.push(hash);

if let Some(any) = any {
result.push(any);
}
}
}
}
// Named entity
// Prevent `&amp;current` -> `&current`
Some(c @ 'a'..='z') | Some(c @ 'A'..='Z') => {
let mut entity_temporary_buffer = String::with_capacity(33);

entity_temporary_buffer.push('&');
entity_temporary_buffer.push(c);

let mut found_entity = false;

// No need to validate input, because we reset position if nothing was found
for c in chars {
entity_temporary_buffer.push(c);

if HTML_ENTITIES.get(&entity_temporary_buffer).is_some() {
found_entity = true;

break;
} else {
// We stop when:
//
// - not ascii alphanumeric
// - we consume more characters than the longest entity
if !c.is_ascii_alphanumeric() || entity_temporary_buffer.len() > 32 {
break;
}
}
}

if found_entity {
result.push_str("&amp;");
result.push_str(&entity_temporary_buffer[1..]);
} else {
result.push('&');
result.push_str(&entity_temporary_buffer[1..]);
}
}
any => {
result.push('&');

if let Some(any) = any {
result.push(any);
}
}
}

result
}

// Escaping a string (for the purposes of the algorithm above) consists of
// running the following steps:
//
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<div>Test &amp;</div>
<div>Test &amp;</div>
<div>Test &</div>
<div>Test &</div>
<style>
a::before {
content: "&";
Expand Down
24 changes: 12 additions & 12 deletions crates/swc_html_codegen/tests/fixture/attribute/output.min.html
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,8 @@
<div data-test=\\foo class=bar>test</div>

<span title='test "with" &amp;quot;'>test</span>
<span title='test "with" &amp; quot'>test</span>
<span title='test "with" &amp;test'>test</span>
<span title='test "with" & quot'>test</span>
<span title='test "with" &test'>test</span>
<span title='test "with" &amp;amptest'>test</span>
<span title='test "with" <'>test</span>
<span title='test "with" >'>test</span>
Expand All @@ -116,9 +116,9 @@
</style>

<div>
foo &amp; bar
foo&amp;<i>bar</i>
foo&amp;&amp;&amp; bar
foo & bar
foo&<i>bar</i>
foo&&& bar
</div>

<pre><code>Label current;
Expand All @@ -128,20 +128,20 @@
</code></pre>

<div>
&amp;xxx; &amp;xxx &amp;thorn; &amp;thorn &amp;curren;t &amp;current &amp;current; &amp;&amp;
&xxx; &xxx &amp;thorn; &amp;thorn &amp;curren;t &amp;current &amp;current; &&
&amp;gt
&amp;unknown;
&unknown;
&amp;current
&amp;current;
&amp;current
&amp;current;

ø &amp;osLash Ø
&amp;ø &amp;&amp;osLash; &amp;Ø
&amp;ø &amp;&amp;osLash; &amp;Ø
ø &osLash Ø
&ø &&osLash; &Ø
&ø &&osLash; &Ø

&amp;oslash; &amp;osLash; &amp;Oslash;
&amp;oslash; &amp;osLash; &amp;Oslash;
&amp;oslash; &osLash; &amp;Oslash;
&amp;oslash; &osLash; &amp;Oslash;
</div>


Expand Down
32 changes: 32 additions & 0 deletions crates/swc_html_codegen/tests/fixture/html-entity/input.html
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,38 @@ <h1>HTML Entity Example</h1>
kablammo!
</a>

<div>&amp;#38;</div>
<div>&amp#38;</div>
<div>&amp#38</div>
<div>&amp#x26;</div>
<div>&amp#x26</div>
<div>&#38;amp</div>
<div>&amp#</div>
<div>&amp#1</div>
<div>&#8</div>
<div>&8</div>
<div>&#38;#a</div>
<div>&#38;#xb</div>
<div>&#38;#xj</div>

<div data-test="&amp;amp;"></div>
<div data-test="&amp;amp"></div>
<div data-test="&amp;#36"></div>
<div data-test="&amp;#x36"></div>
<div data-test="&unknown">&unknown</div>
<div data-test="&unknown;">&unknown;</div>
<div data-test="&u;">&u;</div>
<div data-test="&ampCounterClockwiseContourIntegral">&ampCounterClockwiseContourIntegral</div>
<div data-test="&amp;CounterClockwiseContourIntegral">&amp;CounterClockwiseContourIntegral</div>
<div data-test="&ampCounterClockwiseContourIntegral;">&ampCounterClockwiseContourIntegral;</div>
<div data-test="&amp;CounterClockwiseContourIntegral;">&amp;CounterClockwiseContourIntegral;</div>
<div data-test="&amp;amp">&amp;amp</div>
<div data-test="&amp;am">&amp;am</div>
<div data-test="&amp;;">&amp;;</div>
<div data-test="&amp;">&amp;</div>
<div data-test="&amp;#x">&amp;#x</div>
<div data-test="&amp;#x1">&amp;#x1</div>
<div data-test="&amp;#1">&amp;#1</div>
</body>
</html>

32 changes: 32 additions & 0 deletions crates/swc_html_codegen/tests/fixture/html-entity/output.html
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,38 @@ <h1>HTML Entity Example</h1>
kablammo!
</a>

<div>&amp;#38;</div>
<div>&amp;#38;</div>
<div>&amp;#38</div>
<div>&amp;#x26;</div>
<div>&amp;#x26</div>
<div>&amp;amp</div>
<div>&amp;#</div>
<div>&amp;#1</div>
<div></div>
<div>&amp;8</div>
<div>&amp;#a</div>
<div>&amp;#xb</div>
<div>&amp;#xj</div>

<div data-test="&amp;amp;"></div>
<div data-test="&amp;amp"></div>
<div data-test="&amp;#36"></div>
<div data-test="&amp;#x36"></div>
<div data-test="&amp;unknown">&amp;unknown</div>
<div data-test="&amp;unknown;">&amp;unknown;</div>
<div data-test="&amp;u;">&amp;u;</div>
<div data-test="&amp;ampCounterClockwiseContourIntegral">&amp;CounterClockwiseContourIntegral</div>
<div data-test="&amp;CounterClockwiseContourIntegral">&amp;CounterClockwiseContourIntegral</div>
<div data-test="&amp;ampCounterClockwiseContourIntegral;">&amp;CounterClockwiseContourIntegral;</div>
<div data-test="&amp;CounterClockwiseContourIntegral;">&amp;CounterClockwiseContourIntegral;</div>
<div data-test="&amp;amp">&amp;amp</div>
<div data-test="&amp;am">&amp;am</div>
<div data-test="&amp;;">&amp;;</div>
<div data-test="&amp;">&amp;</div>
<div data-test="&amp;#x">&amp;#x</div>
<div data-test="&amp;#x1">&amp;#x1</div>
<div data-test="&amp;#1">&amp;#1</div>



Expand Down
42 changes: 37 additions & 5 deletions crates/swc_html_codegen/tests/fixture/html-entity/output.min.html
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

<h1>HTML Entity Example</h1>

<div>A space character: &amp;</div>
<div>A space character: &amp;</div>
<div>A space character: &</div>
<div>A space character: &</div>

<div>A space character:  </div>
<div>The less-than sign: &lt;</div>
Expand All @@ -25,14 +25,46 @@ <h1>HTML Entity Example</h1>
<div>An o with a circumflex accent: ô</div>
<div>An o with a tilde: õ</div>

<div>A space character: &amp;&amp;</div>
<div>A space character: &&</div>
<div>I'm ∉ I tell you</div>

<a href="http://lmgtfy.com/?l=1&amp;q=rick+roll">tired meme</a>
<a href=# onclick="window.location='?l=1&amp;q=rick+roll';return false">
<a href="http://lmgtfy.com/?l=1&q=rick+roll">tired meme</a>
<a href=# onclick="window.location='?l=1&q=rick+roll';return false">
kablammo!
</a>

<div>&amp;#38;</div>
<div>&amp;#38;</div>
<div>&amp;#38</div>
<div>&amp;#x26;</div>
<div>&amp;#x26</div>
<div>&amp;amp</div>
<div>&#</div>
<div>&amp;#1</div>
<div></div>
<div>&8</div>
<div>&#a</div>
<div>&amp;#xb</div>
<div>&#xj</div>

<div data-test=&amp;amp;></div>
<div data-test=&amp;amp></div>
<div data-test=&amp;#36></div>
<div data-test=&amp;#x36></div>
<div data-test=&unknown>&unknown</div>
<div data-test=&unknown;>&unknown;</div>
<div data-test=&u;>&u;</div>
<div data-test=&amp;ampCounterClockwiseContourIntegral>&CounterClockwiseContourIntegral</div>
<div data-test=&CounterClockwiseContourIntegral>&CounterClockwiseContourIntegral</div>
<div data-test=&amp;ampCounterClockwiseContourIntegral;>&amp;CounterClockwiseContourIntegral;</div>
<div data-test=&amp;CounterClockwiseContourIntegral;>&amp;CounterClockwiseContourIntegral;</div>
<div data-test=&amp;amp>&amp;amp</div>
<div data-test=&am>&am</div>
<div data-test=&;>&;</div>
<div data-test=&>&</div>
<div data-test=&#x>&#x</div>
<div data-test=&amp;#x1>&amp;#x1</div>
<div data-test=&amp;#1>&amp;#1</div>



Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ <h2 class="font-bold leading-normal mb-2 text-2xl text-black">What We Do</h2>
<line x1=55.1 y1=30.4 x2=100 y2=30.4 class=stroke-primary style="stroke-width: 2;stroke-miterlimit: 10;"></line>
<line x1=45.1 y1=30.4 x2=0 y2=30.4 class=stroke-primary style="stroke-width: 2;stroke-miterlimit: 10;"></line>
</svg>
<p class="font-light leading-relaxed mx-auto pb-2 text-gray-500 text-xl">Save time managing advertising &amp; Content for your business.</p>
<p class="font-light leading-relaxed mx-auto pb-2 text-gray-500 text-xl">Save time managing advertising & Content for your business.</p>
</header>


Expand Down Expand Up @@ -96,7 +96,7 @@ <h3 class="font-semibold leading-normal mb-2 text-black text-lg">Brand Identity<
<path d="M12.136.326A1.5 1.5 0 0 1 14 1.78V3h.5A1.5 1.5 0 0 1 16 4.5v9a1.5 1.5 0 0 1-1.5 1.5h-13A1.5 1.5 0 0 1 0 13.5v-9a1.5 1.5 0 0 1 1.432-1.499L12.136.326zM5.562 3H13V1.78a.5.5 0 0 0-.621-.484L5.562 3zM1.5 4a.5.5 0 0 0-.5.5v9a.5.5 0 0 0 .5.5h13a.5.5 0 0 0 .5-.5v-9a.5.5 0 0 0-.5-.5h-13z"></path>
</svg>
</div>
<h3 class="font-semibold leading-normal mb-2 text-black text-lg">Budget &amp; Marketing</h3>
<h3 class="font-semibold leading-normal mb-2 text-black text-lg">Budget & Marketing</h3>
<p class=text-gray-500>This is a wider card with supporting text below as a natural content.</p>
</div>

Expand Down

1 comment on commit da09c1c

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: da09c1c Previous: ce4d577 Ratio
es/full/minify/libraries/antd 1971494540 ns/iter (± 72407620) 2215831937 ns/iter (± 50934466) 0.89
es/full/minify/libraries/d3 492714049 ns/iter (± 19054818) 572786588 ns/iter (± 8678242) 0.86
es/full/minify/libraries/echarts 2409592864 ns/iter (± 9079780) 2580244269 ns/iter (± 14160685) 0.93
es/full/minify/libraries/jquery 107431678 ns/iter (± 1240776) 111478353 ns/iter (± 2175030) 0.96
es/full/minify/libraries/lodash 156651500 ns/iter (± 1267917) 162983212 ns/iter (± 2461315) 0.96
es/full/minify/libraries/moment 62989406 ns/iter (± 368904) 63214409 ns/iter (± 688379) 1.00
es/full/minify/libraries/react 20230288 ns/iter (± 102584) 20414331 ns/iter (± 67146) 0.99
es/full/minify/libraries/terser 527938164 ns/iter (± 6421600) 578942638 ns/iter (± 2815594) 0.91
es/full/minify/libraries/three 652769111 ns/iter (± 14693819) 768532534 ns/iter (± 5801731) 0.85
es/full/minify/libraries/typescript 4594097404 ns/iter (± 25075839) 5032935441 ns/iter (± 15187227) 0.91
es/full/minify/libraries/victory 876134477 ns/iter (± 7580526) 980789304 ns/iter (± 6942482) 0.89
es/full/minify/libraries/vue 161616824 ns/iter (± 1762288) 180588400 ns/iter (± 4739345) 0.89
es/full/codegen/es3 34400 ns/iter (± 170) 34420 ns/iter (± 234) 1.00
es/full/codegen/es5 34347 ns/iter (± 162) 34433 ns/iter (± 135) 1.00
es/full/codegen/es2015 34392 ns/iter (± 166) 34424 ns/iter (± 134) 1.00
es/full/codegen/es2016 34372 ns/iter (± 148) 34408 ns/iter (± 216) 1.00
es/full/codegen/es2017 34320 ns/iter (± 155) 34392 ns/iter (± 187) 1.00
es/full/codegen/es2018 34330 ns/iter (± 134) 34402 ns/iter (± 198) 1.00
es/full/codegen/es2019 34343 ns/iter (± 153) 34387 ns/iter (± 161) 1.00
es/full/codegen/es2020 34339 ns/iter (± 154) 34390 ns/iter (± 150) 1.00
es/full/all/es3 192743328 ns/iter (± 756187) 194375519 ns/iter (± 767631) 0.99
es/full/all/es5 182783437 ns/iter (± 669555) 183775703 ns/iter (± 665458) 0.99
es/full/all/es2015 145214169 ns/iter (± 709279) 146030395 ns/iter (± 699075) 0.99
es/full/all/es2016 144216544 ns/iter (± 796442) 144970398 ns/iter (± 937933) 0.99
es/full/all/es2017 144480003 ns/iter (± 683550) 144121706 ns/iter (± 694986) 1.00
es/full/all/es2018 142220423 ns/iter (± 771651) 142764575 ns/iter (± 840041) 1.00
es/full/all/es2019 141403895 ns/iter (± 725082) 141962979 ns/iter (± 676428) 1.00
es/full/all/es2020 136592703 ns/iter (± 665244) 136872964 ns/iter (± 567059) 1.00
es/full/parser 591439 ns/iter (± 59727) 590136 ns/iter (± 61290) 1.00
es/full/base/fixer 28741 ns/iter (± 202) 28361 ns/iter (± 218) 1.01
es/full/base/resolver_and_hygiene 139171 ns/iter (± 1678) 140578 ns/iter (± 1694) 0.99
serialization of ast node 182 ns/iter (± 15) 182 ns/iter (± 1) 1
serialization of serde 182 ns/iter (± 0) 182 ns/iter (± 0) 1

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.