From 51bbf62a2f57f1ded5778e45e3b96faca27bc874 Mon Sep 17 00:00:00 2001
From: Simon Holthausen <simon.holthausen@accso.de>
Date: Fri, 22 Oct 2021 10:57:45 +0200
Subject: [PATCH] (fix) add open tag checks to html scanner

the html parser does treat a "<" tag inside the body of another tag as the start of another tag. This is a common situation inside #if control flow tags, so these were blanked out inside parseTag previously. But that blanking logic did not take place when parsed html nodes were passed in. Therefore moved the logic of stripping #if tags inside the preprocess scanner
---
 .../src/lib/documents/parseHtml.ts            | 12 ++---
 .../src/lib/documents/utils.ts                | 44 +++++++++----------
 .../test/lib/documents/parseHtml.test.ts      | 29 ++++++++++++
 3 files changed, 57 insertions(+), 28 deletions(-)
diff --git a/packages/language-server/src/lib/documents/parseHtml.ts b/packages/language-server/src/lib/documents/parseHtml.ts
index cfa65ebe4..552c49765 100644
--- a/packages/language-server/src/lib/documents/parseHtml.ts
+++ b/packages/language-server/src/lib/documents/parseHtml.ts
@@ -42,7 +42,11 @@ function preprocess(text: string) {
         const offset = scanner.getTokenOffset();
 
         if (token === TokenType.StartTagOpen) {
-            currentStartTagStart = offset;
+            if (shouldBlankStartOrEndTagLike(offset)) {
+                blankStartOrEndTagLike(offset);
+            } else {
+                currentStartTagStart = offset;
+            }
         }
 
         if (token === TokenType.StartTagClose) {
@@ -74,11 +78,7 @@ function preprocess(text: string) {
     return text;
 
     function shouldBlankStartOrEndTagLike(offset: number) {
-        // not null rather than falsy, otherwise it won't work on first tag(0)
-        return (
-            currentStartTagStart !== null &&
-            isInsideMoustacheTag(text, currentStartTagStart, offset)
-        );
+        return isInsideMoustacheTag(text, currentStartTagStart, offset);
     }
 
     function blankStartOrEndTagLike(offset: number) {
diff --git a/packages/language-server/src/lib/documents/utils.ts b/packages/language-server/src/lib/documents/utils.ts
index 2e6f706be..50f59cb53 100644
--- a/packages/language-server/src/lib/documents/utils.ts
+++ b/packages/language-server/src/lib/documents/utils.ts
@@ -40,7 +40,6 @@ function parseAttributes(
 }
 
 const regexIf = new RegExp('{#if\\s.*?}', 'gms');
-const regexIfElseIf = new RegExp('{:else if\\s.*?}', 'gms');
 const regexIfEnd = new RegExp('{/if}', 'gms');
 const regexEach = new RegExp('{#each\\s.*?}', 'gms');
 const regexEachEnd = new RegExp('{/each}', 'gms');
@@ -48,23 +47,6 @@ const regexAwait = new RegExp('{#await\\s.*?}', 'gms');
 const regexAwaitEnd = new RegExp('{/await}', 'gms');
 const regexHtml = new RegExp('{@html\\s.*?', 'gms');
 
-/**
- * if-blocks can contain the `<` operator, which mistakingly is
- * parsed as a "open tag" character by the html parser.
- * To prevent this, just replace the whole content inside the if with whitespace.
- */
-function blankIfBlocks(text: string): string {
-    return text
-        .replace(regexIf, (substr) => {
-            return '{#if' + substr.replace(/[^\n]/g, ' ').substring(4, substr.length - 1) + '}';
-        })
-        .replace(regexIfElseIf, (substr) => {
-            return (
-                '{:else if' + substr.replace(/[^\n]/g, ' ').substring(9, substr.length - 1) + '}'
-            );
-        });
-}
-
 /**
  * Extracts a tag (style or script) from the given text
  * and returns its start, end and the attributes on that tag.
@@ -77,7 +59,6 @@ function extractTags(
     tag: 'script' | 'style' | 'template',
     html?: HTMLDocument
 ): TagInformation[] {
-    text = blankIfBlocks(text);
     const rootNodes = html?.roots || parseHtml(text).roots;
     const matchedNodes = rootNodes
         .filter((node) => node.tag === tag)
@@ -404,7 +385,26 @@ export function getLangAttribute(...tags: Array<TagInformation | null>): string
     return attribute.replace(/^text\//, '');
 }
 
-export function isInsideMoustacheTag(html: string, tagStart: number, position: number) {
-    const charactersInNode = html.substring(tagStart, position);
-    return charactersInNode.lastIndexOf('{') > charactersInNode.lastIndexOf('}');
+/**
+ * Checks whether given position is inside a moustache tag (which includes control flow tags)
+ * using a simple bracket matching heuristic which might fail under conditions like
+ * `{#if {a: true}.a}`
+ */
+export function isInsideMoustacheTag(html: string, tagStart: number | null, position: number) {
+    if (tagStart === null) {
+        // Not inside <tag ... >
+        const charactersBeforePosition = html.substring(0, position);
+        return (
+            Math.max(
+                // TODO make this just check for '{'?
+                // Theoretically, someone could do {a < b} in a simple moustache tag
+                charactersBeforePosition.lastIndexOf('{#'),
+                charactersBeforePosition.lastIndexOf('{:')
+            ) > charactersBeforePosition.lastIndexOf('}')
+        );
+    } else {
+        // Inside <tag ... >
+        const charactersInNode = html.substring(tagStart, position);
+        return charactersInNode.lastIndexOf('{') > charactersInNode.lastIndexOf('}');
+    }
 }
diff --git a/packages/language-server/test/lib/documents/parseHtml.test.ts b/packages/language-server/test/lib/documents/parseHtml.test.ts
index 0d6d814a5..875717f85 100644
--- a/packages/language-server/test/lib/documents/parseHtml.test.ts
+++ b/packages/language-server/test/lib/documents/parseHtml.test.ts
@@ -37,6 +37,22 @@ describe('parseHtml', () => {
         );
     });
 
+    it('ignore less than operator inside control flow moustache', () => {
+        testRootElements(
+            parseHtml(
+                `<Foo>
+                    {#if 1 < 2 && innWidth <= 700}
+                        <Foo>
+                            <SelfClosing />
+                        </Foo>
+                        <div>hi</div>
+                    {/if}
+                </Foo>
+                <style></style>`
+            )
+        );
+    });
+
     it('ignore less than operator inside moustache with tag not self closed', () => {
         testRootElements(
             parseHtml(
@@ -65,6 +81,19 @@ describe('parseHtml', () => {
         );
     });
 
+    it('parse baseline html with control flow moustache', () => {
+        testRootElements(
+            parseHtml(
+                `<Foo>
+                    {#if true}
+                        foo
+                    {/if}
+                </Foo>
+                <style></style>`
+            )
+        );
+    });
+
     it('parse baseline html with possibly un-closed start tag', () => {
         testRootElements(
             parseHtml(