markdoc · rpaul-stripe · Sep 20, 2022 · Sep 13, 2022 · Sep 13, 2022 · Sep 19, 2022
diff --git a/spec/marktest/index.ts b/spec/marktest/index.ts
@@ -18,7 +18,10 @@ class Loader extends yaml.loader.Loader {
   }
 }
 
-const tokenizer = new markdoc.Tokenizer({ allowIndentation: true });
+const tokenizer = new markdoc.Tokenizer({
+  allowIndentation: true,
+  allowComments: true,
+});
 
 function parse(content: string, file?: string) {
   const tokens = tokenizer.tokenize(content);

diff --git a/spec/marktest/tests.yaml b/spec/marktest/tests.yaml
@@ -438,6 +438,51 @@
   expected: |
     <article><h1 class="foo bar">This is a test </h1></article>
 
+- name: Ignoring tags in fenced code blocks
+  code: |
+    ```javascript {% process=false %}
+    foo
+    {% bar %}
+    ```
+  expected:
+    - tag: pre
+      attributes:
+        data-language: javascript
+      children:
+        - "foo\n{% bar %}\n"
+
+- name: Using a backtick in a fenced code block string attribute
+  config:
+    nodes:
+      fence:
+        render: pre
+        attributes:
+          content:
+            type: String
+            render: false
+            required: true
+          language:
+            type: String
+            render: 'data-language'
+          process:
+            type: Boolean
+            render: false
+            default: true
+          title:
+            type: String
+
+  code: |
+    ~~~yaml {% title="this is a `test`" %}
+    example
+    ~~~
+  expected:
+    - tag: pre
+      attributes:
+        data-language: yaml
+        title: 'this is a `test`'
+      children:
+        - "example\n"
+
 - name: Conditional and variable in code example
   config:
     variables:
@@ -1503,3 +1548,47 @@
       children:
         - tag: p
           children: [testing]
+
+- name: Ignoring comments
+  code: |
+    # Example <!-- foo -->
+
+    This is a test <!-- bar
+    -->
+
+    <!--
+    baz
+    -->
+  expected:
+    - tag: h1
+      children: ['Example ']
+    - tag: p
+      children: ['This is a test ']
+
+- name: Escaped quotes in tag strings
+  config:
+    tags:
+      foo:
+        render: foo
+        attributes:
+          bar:
+            type: String
+  code: |
+    {% foo bar="this is a test of \"quoted\" strings" /%}
+  expected:
+    - tag: foo
+      attributes:
+        bar: 'this is a test of "quoted" strings'
+
+- name: Escaped quotes in tag strings with html renderer
+  renderer: html
+  config:
+    tags:
+      foo:
+        render: foo
+        attributes:
+          bar:
+            type: String
+  code: |
+    {% foo bar="this is a test of \"quoted\" strings" /%}
+  expected: <article><foo bar="this is a test of &quot;quoted&quot; strings"></foo></article>
diff --git a/src/parser.test.ts b/src/parser.test.ts
@@ -7,7 +7,7 @@ import { any } from 'deep-assert';
 
 describe('Markdown parser', function () {
   const fence = '```';
-  const tokenizer = new Tokenizer();
+  const tokenizer = new Tokenizer({ allowComments: true });
 
   function convert(example) {
     const content = example.replace(/\n\s+/gm, '\n').trim();
@@ -636,4 +636,20 @@ describe('Markdown parser', function () {
     `);
     }).not.toThrow();
   });
+
+  it('parsing comments', function () {
+    const example = convert(`
+    this is a test
+
+    <!-- foo -->
+    `);
+
+    expect(example).toDeepEqualSubset({
+      type: 'document',
+      children: [
+        { type: 'paragraph' },
+        { type: 'comment', attributes: { content: 'foo' } },
+      ],
+    });
+  });
 });
diff --git a/src/parser.ts b/src/parser.ts
@@ -45,6 +45,7 @@ function handleAttrs(token: Token, type: string) {
     }
     case 'text':
     case 'code':
+    case 'comment':
       return { content: (token.meta || {}).variable || token.content };
     case 'fence': {
       const [language] = token.info.split(' ', 1);

diff --git a/src/schema.ts b/src/schema.ts
@@ -11,6 +11,7 @@ export const document: Schema = {
     'tag',
     'fence',
     'blockquote',
+    'comment',
     'list',
     'hr',
   ],
@@ -189,6 +190,7 @@ export const inline: Schema = {
     'image',
     'hardbreak',
     'softbreak',
+    'comment',
   ],
 };
 
@@ -231,5 +233,11 @@ export const softbreak: Schema = {
   },
 };
 
+export const comment = {
+  attributes: {
+    content: { type: String, required: true },
+  },
+};
+
 export const error = {};
 export const node = {};
diff --git a/src/tokenizer/index.ts b/src/tokenizer/index.ts
@@ -1,13 +1,17 @@
 import MarkdownIt from 'markdown-it/lib';
 import annotations from './plugins/annotations';
 import frontmatter from './plugins/frontmatter';
+import comments from './plugins/comments';
 import type Token from 'markdown-it/lib/token';
 
 export default class Tokenizer {
   private parser: MarkdownIt;
 
   constructor(
-    config: MarkdownIt.Options & { allowIndentation?: boolean } = {}
+    config: MarkdownIt.Options & {
+      allowIndentation?: boolean;
+      allowComments?: boolean;
+    } = {}
   ) {
     this.parser = new MarkdownIt(config);
     this.parser.use(annotations, 'annotations', {});
@@ -17,6 +21,8 @@ export default class Tokenizer {
       // Disable indented `code_block` support https://spec.commonmark.org/0.30/#indented-code-block
       'code',
     ]);
+
+    if (config.allowComments) this.parser.use(comments, 'comments', {});
   }
 
   tokenize(content: string): Token[] {

diff --git a/src/tokenizer/plugins/comments.test.ts b/src/tokenizer/plugins/comments.test.ts
@@ -0,0 +1,94 @@
+import Tokenizer from '..';
+
+describe('MarkdownIt Comments plugin', function () {
+  const tokenizer = new Tokenizer({ allowComments: true });
+
+  function parse(example) {
+    const content = example.replace(/\n\s+/gm, '\n').trim();
+    return tokenizer.tokenize(content);
+  }
+
+  describe('inline comments', function () {
+    const output = [
+      { type: 'paragraph_open' },
+      {
+        type: 'inline',
+        children: [
+          { type: 'text', content: 'this is a test ' },
+          { type: 'comment', content: 'example comment' },
+          { type: 'text', content: ' foo' },
+        ],
+      },
+      { type: 'paragraph_close' },
+    ];
+
+    it('simple inline comment', function () {
+      const example = parse(`
+      this is a test <!-- example comment --> foo
+      `);
+
+      expect(example).toDeepEqualSubset(output);
+    });
+
+    it('inline comment with a newline', function () {
+      const example = parse(`
+      this is a test <!-- 
+        example comment
+        --> foo
+      `);
+
+      expect(example).toDeepEqualSubset(output);
+    });
+  });
+
+  describe('block comments', function () {
+    const output = [
+      { type: 'paragraph_open' },
+      { type: 'inline' },
+      { type: 'paragraph_close' },
+      { type: 'comment', content: 'example comment' },
+      { type: 'paragraph_open' },
+      { type: 'inline', content: 'foo' },
+      { type: 'paragraph_close' },
+    ];
+
+    it('simple block comment after a paragraph', function () {
+      const example = parse(`
+      this is a test
+
+      <!--
+      example comment
+      -->
+
+      foo
+      `);
+
+      expect(example).toDeepEqualSubset(output);
+    });
+
+    it('block comment with ending on same line as content', function () {
+      const example = parse(`
+      this is a test
+
+      <!--
+      example comment -->
+
+      foo
+      `);
+
+      expect(example).toDeepEqualSubset(output);
+    });
+
+    it('block comment one one line', function () {
+      const example = parse(`
+      this is a test
+
+      <!-- example comment -->
+
+      foo
+      `);
+
+      expect(example).toDeepEqualSubset(output);
+    });
+  });
+});
diff --git a/src/tokenizer/plugins/comments.ts b/src/tokenizer/plugins/comments.ts
@@ -0,0 +1,51 @@
+import type MarkdownIt from 'markdown-it/lib';
+import type StateBlock from 'markdown-it/lib/rules_block/state_block';
+import type StateInline from 'markdown-it/lib/rules_inline/state_inline';
+
+const OPEN = '<!--';
+const CLOSE = '-->';
+
+function block(
+  state: StateBlock,
+  startLine: number,
+  endLine: number,
+  silent: boolean
+): boolean {
+  const start = state.bMarks[startLine] + state.tShift[startLine];
+  if (!state.src.startsWith(OPEN, start)) return false;
+
+  const close = state.src.indexOf(CLOSE, start);
+
+  if (!close) return false;
+  if (silent) return true;
+
+  const content = state.src.slice(start + OPEN.length, close);
+  const lines = content.split('\n').length;
+  const token = state.push('comment', '', 0);
+  token.content = content.trim();
+  token.map = [startLine, startLine + lines];
+  state.line += lines;
+
+  return true;
+}
+
+function inline(state: StateInline, silent: boolean): boolean {
+  if (!state.src.startsWith(OPEN, state.pos)) return false;
+
+  const close = state.src.indexOf(CLOSE, state.pos);
+
+  if (!close) return false;
+  if (silent) return true;
+
+  const content = state.src.slice(state.pos + OPEN.length, close);
+  const token = state.push('comment', '', 0);
+  token.content = content.trim();
+  state.pos = close + CLOSE.length;
+
+  return true;
+}
+
+export default function plugin(md: MarkdownIt) {
+  md.block.ruler.before('table', 'comment', block, { alt: ['paragraph'] });
+  md.inline.ruler.push('comment', inline);
+}