Skip to content

Commit bade75d

Browse files
committed
fix: trim space characters but not Unicode whitespace
Fix #14 by trimming only HTML's definition of space characters (https://www.w3.org/TR/html52/infrastructure.html#space-characters) instead of Unicode (JavaScript)'s definition of whitespace.
1 parent e7246f0 commit bade75d

File tree

2 files changed

+19
-1
lines changed

2 files changed

+19
-1
lines changed

__tests__/index.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,18 @@ test('should trim text nodes', () => {
6868
);
6969
});
7070

71+
test('should not trim Unicode whitespace', () => {
72+
const html = `<span> \u2009 surrounded \u2005\u200a </span>`;
73+
74+
expect(format(html)).toEqual(
75+
`
76+
<span>
77+
\u2009 surrounded \u2005\u200a
78+
</span>
79+
`
80+
);
81+
});
82+
7183
test('should not introduce line break if text node is empty', () => {
7284
const html = `<span> </span>`;
7385

index.js

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,12 @@ const format = function(html) {
151151
return voidElements.indexOf(name) !== -1;
152152
};
153153

154+
// https://www.w3.org/TR/html52/infrastructure.html#space-characters
155+
// defines "space characters" to include SPACE, TAB, LF, FF, and CR.
156+
const trimText = text => {
157+
return text.replace(/^[ \t\n\f\r]+|[ \t\n\f\r]+$/g, '');
158+
}
159+
154160
const extractAttributesFromString = content => {
155161
const attributes = {};
156162

@@ -205,7 +211,7 @@ const format = function(html) {
205211
appendClosingTag(attributes, '>');
206212
},
207213
ontext: function(text) {
208-
const trimmed = text.trim();
214+
const trimmed = trimText(text);
209215
if (trimmed.length === 0) {
210216
return;
211217
}

0 commit comments

Comments
 (0)