From 7b381da5bceee3c36be079e27a2bad7b3cc412c2 Mon Sep 17 00:00:00 2001 From: Titus Wormer Date: Sat, 30 Oct 2021 18:00:17 +0200 Subject: [PATCH] Fix to not escape some character in certain spans Related to remarkjs/remark#885. --- lib/unsafe.js | 51 ++++++++++++++++++++++++++++++++++++++++----------- test/index.js | 45 +++++++++++++++++++++++++++++++++++++++------ 2 files changed, 79 insertions(+), 17 deletions(-) diff --git a/lib/unsafe.js b/lib/unsafe.js index 185f91a..4555d1a 100644 --- a/lib/unsafe.js +++ b/lib/unsafe.js @@ -2,6 +2,21 @@ * @typedef {import('./types.js').Unsafe} Unsafe */ +/** + * List of constructs that occur in phrasing (paragraphs, headings), but cannot + * contain things like attention (emphasis, strong), images, or links. + * So they sort of cancel each other out. + * Note: could use a better name. + */ +const fullPhrasingSpans = [ + 'autolink', + 'destinationLiteral', + 'destinationRaw', + 'reference', + 'titleQuote', + 'titleApostrophe' +] + /** @type {Array.} */ export const unsafe = [ {character: '\t', after: '[\\r\\n]', inConstruct: 'phrasing'}, @@ -40,7 +55,12 @@ export const unsafe = [ }, // An exclamation mark can start an image, if it is followed by a link or // a link reference. - {character: '!', after: '\\[', inConstruct: 'phrasing'}, + { + character: '!', + after: '\\[', + inConstruct: 'phrasing', + notInConstruct: fullPhrasingSpans + }, // A quote can break out of a title. {character: '"', inConstruct: 'titleQuote'}, // A number sign could start an ATX heading if it starts a line. @@ -53,14 +73,20 @@ export const unsafe = [ {character: "'", inConstruct: 'titleApostrophe'}, // A left paren could break out of a destination raw. {character: '(', inConstruct: 'destinationRaw'}, - {before: '\\]', character: '(', inConstruct: 'phrasing'}, + // A left paren followed by `]` could make something into a link or image. + { + before: '\\]', + character: '(', + inConstruct: 'phrasing', + notInConstruct: fullPhrasingSpans + }, // A right paren could start a list item or break out of a destination // raw. {atBreak: true, before: '\\d+', character: ')'}, {character: ')', inConstruct: 'destinationRaw'}, // An asterisk can start thematic breaks, list items, emphasis, strong. {atBreak: true, character: '*'}, - {character: '*', inConstruct: 'phrasing'}, + {character: '*', inConstruct: 'phrasing', notInConstruct: fullPhrasingSpans}, // A plus sign could start a list item. {atBreak: true, character: '+'}, // A dash can start thematic breaks, list items, and setext heading @@ -75,7 +101,12 @@ export const unsafe = [ // An autolink also starts with a letter. // Finally, it could break out of a destination literal. {atBreak: true, character: '<', after: '[!/?A-Za-z]'}, - {character: '<', after: '[!/?A-Za-z]', inConstruct: 'phrasing'}, + { + character: '<', + after: '[!/?A-Za-z]', + inConstruct: 'phrasing', + notInConstruct: fullPhrasingSpans + }, {character: '<', inConstruct: 'destinationLiteral'}, // An equals to can start setext heading underlines. {atBreak: true, character: '='}, @@ -86,7 +117,8 @@ export const unsafe = [ // Question mark and at sign are not used in markdown for constructs. // A left bracket can start definitions, references, labels, {atBreak: true, character: '['}, - {character: '[', inConstruct: ['phrasing', 'label', 'reference']}, + {character: '[', inConstruct: 'phrasing', notInConstruct: fullPhrasingSpans}, + {character: '[', inConstruct: ['label', 'reference']}, // A backslash can start an escape (when followed by punctuation) or a // hard break (when followed by an eol). // Note: typical escapes are handled in `safe`! @@ -96,18 +128,15 @@ export const unsafe = [ // Caret is not used in markdown for constructs. // An underscore can start emphasis, strong, or a thematic break. {atBreak: true, character: '_'}, - {character: '_', inConstruct: 'phrasing'}, + {character: '_', inConstruct: 'phrasing', notInConstruct: fullPhrasingSpans}, // A grave accent can start code (fenced or text), or it can break out of // a grave accent code fence. {atBreak: true, character: '`'}, { character: '`', - inConstruct: [ - 'codeFencedLangGraveAccent', - 'codeFencedMetaGraveAccent', - 'phrasing' - ] + inConstruct: ['codeFencedLangGraveAccent', 'codeFencedMetaGraveAccent'] }, + {character: '`', inConstruct: 'phrasing', notInConstruct: fullPhrasingSpans}, // Left brace, vertical bar, right brace are not used in markdown for // constructs. // A tilde can start code (fenced). diff --git a/test/index.js b/test/index.js index d40b904..322514d 100644 --- a/test/index.js +++ b/test/index.js @@ -790,6 +790,44 @@ test('code (flow)', (t) => { 'should use an indent if the value is indented' ) + t.equal( + to({type: 'link', url: 'a b![c](d*e_f[g_h`i', children: []}), + '[]()\n', + 'should not escape unneeded characters in a `destinationLiteral`' + ) + + t.equal( + to({type: 'link', url: 'a![b](c*d_e[f_g`h { children: [ { type: 'paragraph', - children: [ - { - type: 'text', - value: 'bar' - } - ] + children: [{type: 'text', value: 'bar'}] } ] }