experimental - missing one test - parse markdown on inside of html

mapmeld · Feb 25, 2018 · a8df1aa · a8df1aa
1 parent 56d1bcf
commit a8df1aa
Show file tree

Hide file tree

Showing 3 changed files with 92 additions and 18 deletions.
diff --git a/lib/marked.js b/lib/marked.js
@@ -20,7 +20,10 @@ var block = {
   nptable: noop,
   blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
   list: /^( *)(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/,
-  html: /^ *(?:comment *(?:\n|\s*$)|closed *(?:\n{2,}|\s*$)|closing *(?:\n{2,}|\s*$))/,
+  htmlcomment: /^ *(?:comment *(?:\n|\s*$))/,
+  htmlstart: /^ *(?:(anystart))/,
+  htmlend: /^\<\/.*>/,
+  html: /^ *(?:comment *(?:\n|\s*$)|(anystart)|(anyend)|closed *(?:\n{2,}|\s*$)|closing *(?:\n{2,}|\s*$))/,
   def: /^ {0,3}\[(label)\]: *\n? *<?([^\s>]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/,
   table: noop,
   lheading: /^([^\n]+)\n *(=|-){2,} *(?:\n+|$)/,
@@ -52,13 +55,18 @@ block._tag = '(?!(?:'
   + '|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo'
   + '|span|br|wbr|ins|del|img)\\b)\\w+(?!:|[^\\w\\s@]*@)\\b';
 
-block.html = edit(block.html)
+block.htmlcomment = edit(block.htmlcomment)
   .replace('comment', /<!--[\s\S]*?-->/)
-  .replace('closed', /<(tag)[\s\S]+?<\/\1>/)
-  .replace('closing', /<tag(?:"[^"]*"|'[^']*'|\s[^'"\/>]*)*?\/?>/)
+  .getRegex();
+
+block.htmlstart = edit(block.htmlstart)
+  .replace('anystart', /<(tag)(?:[^"]*"|'[^']*'|\s[^'"\/>]*)*?>(?!:(?<=["']>)|(?=["']))/)
   .replace(/tag/g, block._tag)
   .getRegex();
 
+block.htmlend = edit(block.htmlend)
+  .getRegex();
+
 block.paragraph = edit(block.paragraph)
   .replace('hr', block.hr)
   .replace('heading', block.heading)
@@ -164,7 +172,8 @@ Lexer.prototype.token = function(src, top) {
       space,
       i,
       tag,
-      l;
+      l,
+      withinHtml = 0;
 
   while (src) {
     // newline
@@ -178,16 +187,18 @@ Lexer.prototype.token = function(src, top) {
     }
 
     // code
-    if (cap = this.rules.code.exec(src)) {
-      src = src.substring(cap[0].length);
-      cap = cap[0].replace(/^ {4}/gm, '');
-      this.tokens.push({
-        type: 'code',
-        text: !this.options.pedantic
-          ? cap.replace(/\n+$/, '')
-          : cap
-      });
-      continue;
+    if (withinHtml === 0) {
+      if (cap = this.rules.code.exec(src)) {
+        src = src.substring(cap[0].length);
+        cap = cap[0].replace(/^ {4}/gm, '');
+        this.tokens.push({
+          type: 'code',
+          text: !this.options.pedantic
+            ? cap.replace(/\n+$/, '')
+            : cap
+        });
+        continue;
+      }
     }
 
     // fences (gfm)
@@ -349,13 +360,47 @@ Lexer.prototype.token = function(src, top) {
       continue;
     }
 
-    // html
-    if (cap = this.rules.html.exec(src)) {
+    if (cap = this.rules.htmlcomment.exec(src)) {
       src = src.substring(cap[0].length);
       this.tokens.push({
         type: this.options.sanitize
           ? 'paragraph'
           : 'html',
+        htmltype: 'start',
+        pre: !this.options.sanitizer
+          && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
+        text: cap[0]
+      });
+      continue;
+    }
+
+    if (cap = this.rules.htmlstart.exec(src)) {
+      // need not to ++ if it is a self-closing tag
+      if (cap[0].indexOf('/>') === -1 && cap[1].toLowerCase() !== 'hr') {
+        withinHtml++;
+      }
+      src = src.substring(cap[0].length);
+      this.tokens.push({
+        type: this.options.sanitize
+          ? 'paragraph'
+          : 'html',
+        htmltype: ((cap[0].indexOf('/>') === -1 && cap[1].toLowerCase !== 'hr') ? 'start' : 'self-closing'),
+        pre: !this.options.sanitizer
+          && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
+        text: cap[0]
+      });
+      continue;
+    }
+
+    if (cap = this.rules.htmlend.exec(src)) {
+      // this might be counting </a> in Markdown links
+      withinHtml--;
+      src = src.substring(cap[0].length);
+      this.tokens.push({
+        type: this.options.sanitize
+          ? 'paragraph'
+          : 'html',
+        htmltype: 'end',
         pre: !this.options.sanitizer
           && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'),
         text: cap[0]
@@ -424,9 +469,22 @@ Lexer.prototype.token = function(src, top) {
 
     // top-level paragraph
     if (top && (cap = this.rules.paragraph.exec(src))) {
+      // this needs to be fixed in the paragraph regex
+      var tags = ['p', 'div', 'li', 'ul', 'a', 'h1', 'h2', 'h3', 'strong', 'hr'];
+      tags.forEach((tag) => {
+        if (cap[0].indexOf('</' + tag) > 0) {
+          cap[0] = cap[0].substring(0, cap[0].indexOf('</' + tag));
+        }
+        if (cap[1].indexOf('</' + tag) > 0) {
+          cap[1] = cap[1].substring(0, cap[1].indexOf('</' + tag));
+        }
+      });
       src = src.substring(cap[0].length);
+      if (!cap[1].trim().length) {
+        continue;
+      }
       this.tokens.push({
-        type: 'paragraph',
+        type: ((withinHtml > 0) ? 'text2' : 'paragraph'),
         text: cap[1].charAt(cap[1].length - 1) === '\n'
           ? cap[1].slice(0, -1)
           : cap[1]
@@ -438,6 +496,9 @@ Lexer.prototype.token = function(src, top) {
     if (cap = this.rules.text.exec(src)) {
       // Top-level should never reach here.
       src = src.substring(cap[0].length);
+      if (!cap[0].trim().length) {
+        continue;
+      }
       this.tokens.push({
         type: 'text',
         text: cap[0]
@@ -849,6 +910,10 @@ Renderer.prototype.paragraph = function(text) {
   return '<p>' + text + '</p>\n';
 };
 
+Renderer.prototype.text2 = function(text) {
+  return text + '\n';
+};
+
 Renderer.prototype.table = function(header, body) {
   return '<table>\n'
     + '<thead>\n'
@@ -1139,6 +1204,9 @@ Parser.prototype.tok = function() {
     case 'text': {
       return this.renderer.paragraph(this.parseText());
     }
+    case 'text2': {
+      return this.renderer.text2(this.parseText());
+    }
   }
 };
 

diff --git a/test/new/list_inside_div.html b/test/new/list_inside_div.html
@@ -0,0 +1 @@
+<div><ul><li>list</li><li>inside</li></ul></div>
diff --git a/test/new/list_inside_div.md b/test/new/list_inside_div.md
@@ -0,0 +1,5 @@
+<div>
+* list
+* inside
+
+</div>