From 9e7ab801bc79aac62e0ca54978fca91de2296519 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Sun, 3 Jul 2022 22:52:44 -0500 Subject: [PATCH 1/7] fix: return values from walkTokens --- src/Lexer.js | 3 +- src/Tokenizer.js | 51 +++++------- src/marked.js | 45 +++++++---- test/bench.js | 196 +++++++++++++++++------------------------------ 4 files changed, 123 insertions(+), 172 deletions(-) diff --git a/src/Lexer.js b/src/Lexer.js index 3c0a2c7e01..c4bbf41a83 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -316,8 +316,9 @@ export class Lexer { return tokens; } - inline(src, tokens) { + inline(src, tokens = []) { this.inlineQueue.push({ src, tokens }); + return tokens; } /** diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 0f77a01050..338355afaa 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -19,7 +19,7 @@ function outputLink(cap, link, raw, lexer) { href, title, text, - tokens: lexer.inlineTokens(text, []) + tokens: lexer.inlineTokens(text) }; lexer.state.inLink = false; return token; @@ -125,15 +125,13 @@ export class Tokenizer { } } - const token = { + return { type: 'heading', raw: cap[0], depth: cap[1].length, text, - tokens: [] + tokens: this.lexer.inline(text) }; - this.lexer.inline(token.text, token.tokens); - return token; } } @@ -354,10 +352,10 @@ export class Tokenizer { text: cap[0] }; if (this.options.sanitize) { + const text = this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0]); token.type = 'paragraph'; - token.text = this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0]); - token.tokens = []; - this.lexer.inline(token.text, token.tokens); + token.text = text; + token.tokens = this.lexer.inline(text); } return token; } @@ -415,8 +413,7 @@ export class Tokenizer { // header child tokens l = item.header.length; for (j = 0; j < l; j++) { - item.header[j].tokens = []; - this.lexer.inline(item.header[j].text, item.header[j].tokens); + item.header[j].tokens = this.lexer.inline(item.header[j].text); } // cell child tokens @@ -424,8 +421,7 @@ export class Tokenizer { for (j = 0; j < l; j++) { row = item.rows[j]; for (k = 0; k < row.length; k++) { - row[k].tokens = []; - this.lexer.inline(row[k].text, row[k].tokens); + row[k].tokens = this.lexer.inline(row[k].text); } } @@ -437,45 +433,40 @@ export class Tokenizer { lheading(src) { const cap = this.rules.block.lheading.exec(src); if (cap) { - const token = { + return { type: 'heading', raw: cap[0], depth: cap[2].charAt(0) === '=' ? 1 : 2, text: cap[1], - tokens: [] + tokens: this.lexer.inline(cap[1]) }; - this.lexer.inline(token.text, token.tokens); - return token; } } paragraph(src) { const cap = this.rules.block.paragraph.exec(src); if (cap) { - const token = { + const text = cap[1].charAt(cap[1].length - 1) === '\n' + ? cap[1].slice(0, -1) + : cap[1]; + return { type: 'paragraph', raw: cap[0], - text: cap[1].charAt(cap[1].length - 1) === '\n' - ? cap[1].slice(0, -1) - : cap[1], - tokens: [] + text, + tokens: this.lexer.inline(text) }; - this.lexer.inline(token.text, token.tokens); - return token; } } text(src) { const cap = this.rules.block.text.exec(src); if (cap) { - const token = { + return { type: 'text', raw: cap[0], text: cap[0], - tokens: [] + tokens: this.lexer.inline(cap[0]) }; - this.lexer.inline(token.text, token.tokens); - return token; } } @@ -644,7 +635,7 @@ export class Tokenizer { type: 'em', raw: src.slice(0, lLength + match.index + rLength + 1), text, - tokens: this.lexer.inlineTokens(text, []) + tokens: this.lexer.inlineTokens(text) }; } @@ -654,7 +645,7 @@ export class Tokenizer { type: 'strong', raw: src.slice(0, lLength + match.index + rLength + 1), text, - tokens: this.lexer.inlineTokens(text, []) + tokens: this.lexer.inlineTokens(text) }; } } @@ -695,7 +686,7 @@ export class Tokenizer { type: 'del', raw: cap[0], text: cap[2], - tokens: this.lexer.inlineTokens(cap[2], []) + tokens: this.lexer.inlineTokens(cap[2]) }; } } diff --git a/src/marked.js b/src/marked.js index 10f543336c..feb500397f 100644 --- a/src/marked.js +++ b/src/marked.js @@ -105,13 +105,7 @@ export function marked(src, opt, callback) { return; } - try { - const tokens = Lexer.lex(src, opt); - if (opt.walkTokens) { - marked.walkTokens(tokens, opt.walkTokens); - } - return Parser.parse(tokens, opt); - } catch (e) { + function onError(e) { e.message += '\nPlease report this to https://github.com/markedjs/marked.'; if (opt.silent) { return '

An error occurred:

'
@@ -120,6 +114,23 @@ export function marked(src, opt, callback) {
     }
     throw e;
   }
+
+  try {
+    const tokens = Lexer.lex(src, opt);
+    if (opt.walkTokens) {
+      if (opt.async) {
+        return Promise.all(marked.walkTokens(tokens, opt.walkTokens))
+          .then(() => {
+            return Parser.parse(tokens, opt);
+          })
+          .catch(onError);
+      }
+      marked.walkTokens(tokens, opt.walkTokens);
+    }
+    return Parser.parse(tokens, opt);
+  } catch (e) {
+    onError(e);
+  }
 }
 
 /**
@@ -236,10 +247,12 @@ marked.use = function(...args) {
     if (pack.walkTokens) {
       const walkTokens = marked.defaults.walkTokens;
       opts.walkTokens = function(token) {
-        pack.walkTokens.call(this, token);
+        let values = [];
+        values.push(pack.walkTokens.call(this, token));
         if (walkTokens) {
-          walkTokens.call(this, token);
+          values = values.concat(walkTokens.call(this, token));
         }
+        return values;
       };
     }
 
@@ -256,35 +269,37 @@ marked.use = function(...args) {
  */
 
 marked.walkTokens = function(tokens, callback) {
+  let values = [];
   for (const token of tokens) {
-    callback.call(marked, token);
+    values = values.concat(callback.call(marked, token));
     switch (token.type) {
       case 'table': {
         for (const cell of token.header) {
-          marked.walkTokens(cell.tokens, callback);
+          values = values.concat(marked.walkTokens(cell.tokens, callback));
         }
         for (const row of token.rows) {
           for (const cell of row) {
-            marked.walkTokens(cell.tokens, callback);
+            values = values.concat(marked.walkTokens(cell.tokens, callback));
           }
         }
         break;
       }
       case 'list': {
-        marked.walkTokens(token.items, callback);
+        values = values.concat(marked.walkTokens(token.items, callback));
         break;
       }
       default: {
         if (marked.defaults.extensions && marked.defaults.extensions.childTokens && marked.defaults.extensions.childTokens[token.type]) { // Walk any extensions
           marked.defaults.extensions.childTokens[token.type].forEach(function(childTokens) {
-            marked.walkTokens(token[childTokens], callback);
+            values = values.concat(marked.walkTokens(token[childTokens], callback));
           });
         } else if (token.tokens) {
-          marked.walkTokens(token.tokens, callback);
+          values = values.concat(marked.walkTokens(token.tokens, callback));
         }
       }
     }
   }
+  return values;
 };
 
 /**
diff --git a/test/bench.js b/test/bench.js
index 7afd24f0e6..7b3d9e71b2 100644
--- a/test/bench.js
+++ b/test/bench.js
@@ -3,6 +3,7 @@ import { fileURLToPath } from 'url';
 import { isEqual } from './helpers/html-differ.js';
 import { loadFiles } from './helpers/load.js';
 
+import { marked as cjsMarked } from '../lib/marked.cjs';
 import { marked as esmMarked } from '../lib/marked.esm.js';
 
 const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -30,9 +31,10 @@ export function load() {
 export async function runBench(options) {
   options = options || {};
   const specs = load();
+  const tests = {};
 
   // Non-GFM, Non-pedantic
-  marked.setOptions({
+  cjsMarked.setOptions({
     gfm: false,
     breaks: false,
     pedantic: false,
@@ -40,9 +42,9 @@ export async function runBench(options) {
     smartLists: false
   });
   if (options.marked) {
-    marked.setOptions(options.marked);
+    cjsMarked.setOptions(options.marked);
   }
-  await bench('cjs marked', specs, marked.parse);
+  tests['cjs marked'] = cjsMarked.parse;
 
   esmMarked.setOptions({
     gfm: false,
@@ -54,113 +56,76 @@ export async function runBench(options) {
   if (options.marked) {
     esmMarked.setOptions(options.marked);
   }
-  await bench('esm marked', specs, esmMarked.parse);
+  tests['esm marked'] = esmMarked.parse;
 
-  // GFM
-  marked.setOptions({
-    gfm: true,
-    breaks: false,
-    pedantic: false,
-    sanitize: false,
-    smartLists: false
-  });
-  if (options.marked) {
-    marked.setOptions(options.marked);
-  }
-  await bench('cjs marked (gfm)', specs, marked.parse);
-
-  esmMarked.setOptions({
-    gfm: true,
-    breaks: false,
-    pedantic: false,
-    sanitize: false,
-    smartLists: false
-  });
-  if (options.marked) {
-    esmMarked.setOptions(options.marked);
-  }
-  await bench('esm marked (gfm)', specs, esmMarked.parse);
-
-  // Pedantic
-  marked.setOptions({
-    gfm: false,
-    breaks: false,
-    pedantic: true,
-    sanitize: false,
-    smartLists: false
-  });
-  if (options.marked) {
-    marked.setOptions(options.marked);
-  }
-  await bench('cjs marked (pedantic)', specs, marked.parse);
-
-  esmMarked.setOptions({
-    gfm: false,
-    breaks: false,
-    pedantic: true,
-    sanitize: false,
-    smartLists: false
-  });
-  if (options.marked) {
-    esmMarked.setOptions(options.marked);
-  }
-  await bench('esm marked (pedantic)', specs, esmMarked.parse);
+  // esmMarked.setOptions({
+  //   gfm: true,
+  //   breaks: false,
+  //   pedantic: false,
+  //   sanitize: false,
+  //   smartLists: false
+  // });
+  // if (options.marked) {
+  //   esmMarked.setOptions(options.marked);
+  // }
+  // tests['esm marked (gfm)'] = esmMarked.parse;
 
   try {
-    await bench('commonmark', specs, (await (async() => {
+    tests.commonmark = (await (async() => {
       const { Parser, HtmlRenderer } = await import('commonmark');
       const parser = new Parser();
       const writer = new HtmlRenderer();
       return function(text) {
         return writer.render(parser.parse(text));
       };
-    })()));
+    })());
   } catch (e) {
     console.error('Could not bench commonmark. (Error: %s)', e.message);
   }
 
   try {
-    await bench('markdown-it', specs, (await (async() => {
+    tests['markdown-it'] = (await (async() => {
       const MarkdownIt = (await import('markdown-it')).default;
       const md = new MarkdownIt();
       return md.render.bind(md);
-    })()));
+    })());
   } catch (e) {
     console.error('Could not bench markdown-it. (Error: %s)', e.message);
   }
+
+  await bench(tests, specs);
 }
 
-export async function bench(name, specs, engine) {
-  const before = process.hrtime();
-  for (let i = 0; i < 1e3; i++) {
-    for (const spec of specs) {
-      await engine(spec.markdown);
+export async function bench(tests, specs) {
+  const stats = {};
+  for (const name in tests) {
+    stats[name] = {
+      elapsed: 0n,
+      correct: 0
+    };
+  }
+
+  console.log();
+  for (let i = 0; i < specs.length; i++) {
+    const spec = specs[i];
+    process.stdout.write(`${(i * 100 / specs.length).toFixed(1).padStart(5)}% ${i.toString().padStart(specs.length.toString().length)} of ${specs.length}\r`);
+    for (const name in tests) {
+      const test = tests[name];
+      const before = process.hrtime.bigint();
+      for (let n = 0; n < 1e3; n++) {
+        await test(spec.markdown);
+      }
+      const after = process.hrtime.bigint();
+      stats[name].elapsed += after - before;
+      stats[name].correct += (await isEqual(spec.html, await test(spec.markdown)) ? 1 : 0);
     }
   }
-  const elapsed = process.hrtime(before);
-  const ms = prettyElapsedTime(elapsed).toFixed();
 
-  let correct = 0;
-  for (const spec of specs) {
-    if (await isEqual(spec.html, await engine(spec.markdown))) {
-      correct++;
-    }
+  for (const name in tests) {
+    const ms = prettyElapsedTime(stats[name].elapsed);
+    const percent = (stats[name].correct / specs.length * 100).toFixed(2);
+    console.log(`${name} completed in ${ms}ms and passed ${percent}%`);
   }
-  const percent = (correct / specs.length * 100).toFixed(2);
-
-  console.log('%s completed in %sms and passed %s%', name, ms, percent);
-}
-
-/**
- * A simple one-time benchmark
- */
-export async function time(options) {
-  options = options || {};
-  const specs = load();
-  if (options.marked) {
-    marked.setOptions(options.marked);
-  }
-  await bench('marked', specs, marked);
 }
 
 /**
@@ -204,35 +169,23 @@ function parseArg(argv) {
 
   while (argv.length) {
     const arg = getarg();
-    switch (arg) {
-      case '-t':
-      case '--time':
-        options.time = true;
-        break;
-      case '-m':
-      case '--minified':
-        options.minified = true;
-        break;
-      default:
-        if (arg.indexOf('--') === 0) {
-          const opt = camelize(arg.replace(/^--(no-)?/, ''));
-          if (!defaults.hasOwnProperty(opt)) {
-            continue;
-          }
-          options.marked = options.marked || {};
-          if (arg.indexOf('--no-') === 0) {
-            options.marked[opt] = typeof defaults[opt] !== 'boolean'
-              ? null
-              : false;
-          } else {
-            options.marked[opt] = typeof defaults[opt] !== 'boolean'
-              ? argv.shift()
-              : true;
-          }
-        } else {
-          orphans.push(arg);
-        }
-        break;
+    if (arg.indexOf('--') === 0) {
+      const opt = camelize(arg.replace(/^--(no-)?/, ''));
+      if (!defaults.hasOwnProperty(opt)) {
+        continue;
+      }
+      options.marked = options.marked || {};
+      if (arg.indexOf('--no-') === 0) {
+        options.marked[opt] = typeof defaults[opt] !== 'boolean'
+          ? null
+          : false;
+      } else {
+        options.marked[opt] = typeof defaults[opt] !== 'boolean'
+          ? argv.shift()
+          : true;
+      }
+    } else {
+      orphans.push(arg);
     }
   }
 
@@ -257,28 +210,19 @@ function camelize(text) {
  * Main
  */
 export default async function main(argv) {
-  marked = (await import('../lib/marked.cjs')).marked;
+  marked = cjsMarked;
 
   const opt = parseArg(argv);
 
-  if (opt.minified) {
-    marked = (await import('../marked.min.js')).marked;
-  }
-
-  if (opt.time) {
-    await time(opt);
-  } else {
-    await runBench(opt);
-  }
+  await runBench(opt);
 }
 
 /**
  * returns time to millisecond granularity
+ * @param hrtimeElapsed {bigint}
  */
 function prettyElapsedTime(hrtimeElapsed) {
-  const seconds = hrtimeElapsed[0];
-  const frac = Math.round(hrtimeElapsed[1] / 1e3) / 1e3;
-  return seconds * 1e3 + frac;
+  return Number(hrtimeElapsed / 1_000_000n);
 }
 
 process.title = 'marked bench';

From 2a761cbe0a487ed1994b57dd9cbc2e496f0a0bb2 Mon Sep 17 00:00:00 2001
From: Tony Brix 
Date: Sun, 14 Aug 2022 19:35:25 -0500
Subject: [PATCH 2/7] docs: add async docs

---
 docs/USING_ADVANCED.md |  1 +
 docs/USING_PRO.md      | 72 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+)

diff --git a/docs/USING_ADVANCED.md b/docs/USING_ADVANCED.md
index e2adba9d43..09bc892ab9 100644
--- a/docs/USING_ADVANCED.md
+++ b/docs/USING_ADVANCED.md
@@ -44,6 +44,7 @@ console.log(marked.parse(markdownString));
 
 |Member      |Type      |Default  |Since    |Notes         |
 |:-----------|:---------|:--------|:--------|:-------------|
+|async       |`boolean` |`false`  |4.0.19   |If true, `walkTokens` functions can be async and `marked.parse` will return a promise that resolves when all walk tokens functions resolve.|
 |baseUrl     |`string`  |`null`   |0.3.9    |A prefix url for any relative link. |
 |breaks      |`boolean` |`false`  |v0.2.7   |If true, add `
` on a single line break (copies GitHub behavior on comments, but not on rendered markdown files). Requires `gfm` be `true`.| |gfm |`boolean` |`true` |v0.2.1 |If true, use approved [GitHub Flavored Markdown (GFM) specification](https://github.github.com/gfm/).| diff --git a/docs/USING_PRO.md b/docs/USING_PRO.md index c621f505fd..0fcd4adcb0 100644 --- a/docs/USING_PRO.md +++ b/docs/USING_PRO.md @@ -438,6 +438,78 @@ console.log(marked.parse('A Description List:\n' *** +

Async Marked : async

+ +Marked will return a promise if the `async` options is true. The `async` option will tell marked to await any `walkTokens` functions before parsing the tokens and returning an HTML string. + +Simple Example: + +```js +const walkTokens = async (token) => { + if (token.type === 'link') { + try { + await fetch(token.href); + } catch (ex) { + token.title = 'invalid'; + } + } +}; + +marked.use({ walkTokens, async: true }); + +const markdown = ` +[valid link](https://example.com) + +[invalid link](https://invalidurl.com) +`; + +const html = await marked.parse(markdown); +``` + +Custom Extension Example: + +```js +const importUrl = { + extensions: [{ + name: 'importUrl', + level: 'block', + start(src) { return src.indexOf('\n:'); }, + tokenizer(src) { + const rule = /^:(https?:\/\/.+?):/; + const match = rule.exec(src); + if (match) { + return { + type: 'importUrl', + raw: match[0], + url: match[1], + html: '' // will be replaced in walkTokens + }; + } + }, + renderer(token) { + return token.html; + } + }], + async: true, // needed to tell marked to return a promise + async walkTokens(token) { + if (token.type === 'importUrl') { + const res = await fetch(token.url); + token.html = await res.text(); + } + } +}; + +marked.use(importUrl); + +const markdown = ` +# example.com + +:https://example.com: +`; + +const html = await marked.parse(markdown); +``` +

The Lexer

The lexer takes a markdown string and calls the tokenizer functions. From 5326c41eebc0e3984c703274c39bf8b0e9915325 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Sun, 14 Aug 2022 20:00:02 -0500 Subject: [PATCH 3/7] test: add async test --- src/defaults.js | 1 + test/unit/marked-spec.js | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/src/defaults.js b/src/defaults.js index 3a68802cdf..e295ad2ebc 100644 --- a/src/defaults.js +++ b/src/defaults.js @@ -1,5 +1,6 @@ export function getDefaults() { return { + async: false, baseUrl: null, breaks: false, extensions: null, diff --git a/test/unit/marked-spec.js b/test/unit/marked-spec.js index e59b5cc497..15ea8751d4 100644 --- a/test/unit/marked-spec.js +++ b/test/unit/marked-spec.js @@ -1059,4 +1059,21 @@ br }); expect(marked('*text*').trim()).toBe('

text walked

'); }); + + it('should wait for async `walkTokens` function', async() => { + marked.use({ + async: true, + async walkTokens(token) { + if (token.type === 'em') { + await new Promise((resolve) => { + setTimeout(resolve, 100); + }); + token.text += ' walked'; + token.tokens = this.Lexer.lexInline(token.text); + } + } + }); + const html = await marked('*text*'); + expect(html.trim()).toBe('

text walked

'); + }); }); From 3f36b41b5c063ae7409259c7899b11b1d91895d8 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Sun, 14 Aug 2022 20:09:27 -0500 Subject: [PATCH 4/7] docs: add nav to async --- docs/_document.html | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/_document.html b/docs/_document.html index 3527ebc1e9..c3dea889cc 100644 --- a/docs/_document.html +++ b/docs/_document.html @@ -51,6 +51,7 @@

Marked Documentation

  • Tokenizer
  • Walk Tokens
  • Custom Extensions
  • +
  • Async Marked
  • Lexer
  • Parser
  • From b9d7c293c7fb4a908d2e774127b8f2988fe39e3c Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Wed, 24 Aug 2022 20:26:04 -0500 Subject: [PATCH 5/7] Update docs/USING_PRO.md Co-authored-by: Steven --- docs/USING_PRO.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/USING_PRO.md b/docs/USING_PRO.md index 0fcd4adcb0..e162bdf437 100644 --- a/docs/USING_PRO.md +++ b/docs/USING_PRO.md @@ -440,7 +440,7 @@ console.log(marked.parse('A Description List:\n'

    Async Marked : async

    -Marked will return a promise if the `async` options is true. The `async` option will tell marked to await any `walkTokens` functions before parsing the tokens and returning an HTML string. +Marked will return a promise if the `async` option is true. The `async` option will tell marked to await any `walkTokens` functions before parsing the tokens and returning an HTML string. Simple Example: From 13e805d31e6c57404b697a63db0790a7167dc2fe Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Wed, 24 Aug 2022 20:59:19 -0500 Subject: [PATCH 6/7] test: expect promise --- test/unit/marked-spec.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/unit/marked-spec.js b/test/unit/marked-spec.js index 15ea8751d4..08f689728d 100644 --- a/test/unit/marked-spec.js +++ b/test/unit/marked-spec.js @@ -1073,7 +1073,9 @@ br } } }); - const html = await marked('*text*'); + const promise = marked('*text*'); + expect(promise).toBeInstanceOf(Promise); + const html = await promise; expect(html.trim()).toBe('

    text walked

    '); }); }); From 67ee2bcd6904f86bb78236ef13f9c433f1c9ad13 Mon Sep 17 00:00:00 2001 From: Tony Brix Date: Wed, 24 Aug 2022 21:01:11 -0500 Subject: [PATCH 7/7] Update docs/USING_ADVANCED.md Co-authored-by: Steven --- docs/USING_ADVANCED.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/USING_ADVANCED.md b/docs/USING_ADVANCED.md index 09bc892ab9..f49d320b4d 100644 --- a/docs/USING_ADVANCED.md +++ b/docs/USING_ADVANCED.md @@ -44,7 +44,7 @@ console.log(marked.parse(markdownString)); |Member |Type |Default |Since |Notes | |:-----------|:---------|:--------|:--------|:-------------| -|async |`boolean` |`false` |4.0.19 |If true, `walkTokens` functions can be async and `marked.parse` will return a promise that resolves when all walk tokens functions resolve.| +|async |`boolean` |`false` |4.1.0 |If true, `walkTokens` functions can be async and `marked.parse` will return a promise that resolves when all walk tokens functions resolve.| |baseUrl |`string` |`null` |0.3.9 |A prefix url for any relative link. | |breaks |`boolean` |`false` |v0.2.7 |If true, add `
    ` on a single line break (copies GitHub behavior on comments, but not on rendered markdown files). Requires `gfm` be `true`.| |gfm |`boolean` |`true` |v0.2.1 |If true, use approved [GitHub Flavored Markdown (GFM) specification](https://github.github.com/gfm/).|