-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
309dea5
commit 7bc3851
Showing
4 changed files
with
109 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
const parseEntities = input => { | ||
let position = 0, next = 0; | ||
const parts = []; | ||
|
||
while ((next = input.indexOf('&', position)) !== -1) { | ||
// remember anything there was before the entity | ||
if (next > position) { | ||
parts.push(input.slice(position, next)); | ||
} | ||
|
||
const end = input.indexOf(';', next); | ||
|
||
// ignore unterminated entities | ||
if (end === -1) { | ||
break; | ||
} | ||
|
||
const entity = input.slice(next, end); | ||
|
||
if (entity === '"') { | ||
parts.push('"'); | ||
} else if (entity === '&') { | ||
parts.push('&'); | ||
} else if (entity === '&apos') { | ||
parts.push("'"); | ||
} else if (entity === '<') { | ||
parts.push('<'); | ||
} else if (entity === '>') { | ||
parts.push('>'); | ||
} else { | ||
// ignore unrecognized character entities | ||
if (entity[1] !== '#') { | ||
parts.push(entity + ';'); | ||
} else { | ||
// hexadecimal numeric entities | ||
if (entity[2] == 'x') { | ||
const value = parseInt(entity.slice(3), 16); | ||
|
||
// ignore non-numeric numeric entities | ||
if (isNaN(value)) { | ||
parts.push(entity + ';'); | ||
} else { | ||
parts.push(String.fromCharCode(value)); | ||
} | ||
} else { | ||
// decimal numeric entities | ||
const value = parseInt(entity.slice(2), 10); | ||
|
||
// ignore non-numeric numeric entities | ||
if (isNaN(value)) { | ||
parts.push(entity + ';'); | ||
} else { | ||
parts.push(String.fromCharCode(value)); | ||
} | ||
} | ||
} | ||
} | ||
|
||
position = end + 1; | ||
} | ||
|
||
if (position < input.length) { | ||
parts.push(input.slice(position)); | ||
} | ||
|
||
return parts.join(''); | ||
}; | ||
|
||
module.exports = Saxophone => { | ||
Saxophone.parseEntities = parseEntities; | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
const test = require('tape'); | ||
const Saxophone = require('../'); | ||
|
||
test('should normalize character entity references', assert => { | ||
assert.equal(Saxophone.parseEntities('"Run!", he said'), '"Run!", he said', 'normalize "'); | ||
assert.equal(Saxophone.parseEntities('& On & On & On'), '& On & On & On', 'normalize &'); | ||
assert.equal(Saxophone.parseEntities('J'irai demain'), "J'irai demain", 'normalize ''); | ||
assert.equal(Saxophone.parseEntities('<thisIsNotATag>'), '<thisIsNotATag>', 'normalize > and <'); | ||
assert.equal(Saxophone.parseEntities('<>"&&"'>'), '<>"&&"\'>', 'normalize several'); | ||
assert.end(); | ||
}); | ||
|
||
test('should normalize numeric character references', assert => { | ||
assert.equal(Saxophone.parseEntities('§'), '§', 'normalize hexadecimal entities'); | ||
assert.equal(Saxophone.parseEntities('§'), '§', 'normalize decimal entities'); | ||
assert.equal(Saxophone.parseEntities('⁂☒〃⸻'), '⁂☒〃⸻', 'normalize mixed entities'); | ||
assert.end(); | ||
}); | ||
|
||
test('should ignore invalid character entity references', assert => { | ||
assert.equal(Saxophone.parseEntities('&unknown;'), '&unknown;', 'ignore unknown entity references'); | ||
assert.equal(Saxophone.parseEntities('&'), '&', 'ignore unterminated entity references'); | ||
assert.equal(Saxophone.parseEntities('&#notanumber;'), '&#notanumber;', 'ignore non-numeric decimal character refrences'); | ||
assert.equal(Saxophone.parseEntities('&#xnotanumber;'), '&#xnotanumber;', 'ignore non-numeric hexa character refrences'); | ||
assert.end(); | ||
}); |