From 4c3c6cade5eecee65c76d82a8996ffe406dd00b3 Mon Sep 17 00:00:00 2001
From: Bronley <bronley@gmail.com>
Date: Fri, 24 Sep 2021 08:01:49 -0400
Subject: [PATCH 1/6] Basic regex literal lexer support.

---
 src/lexer/Lexer.spec.ts | 22 ++++++++++++
 src/lexer/Lexer.ts      | 75 ++++++++++++++++++++++++++++++++++++-----
 src/lexer/TokenKind.ts  |  1 +
 3 files changed, 90 insertions(+), 8 deletions(-)

diff --git a/src/lexer/Lexer.spec.ts b/src/lexer/Lexer.spec.ts
index c12a6ba30..c5ac046fc 100644
--- a/src/lexer/Lexer.spec.ts
+++ b/src/lexer/Lexer.spec.ts
@@ -1222,4 +1222,26 @@ describe('lexer', () => {
             TokenKind.Eof
         ]);
     });
+
+    describe('regular expression literals', () => {
+        function testRegex(...regexps) {
+            const results = [] as string[];
+            for (const regexp of regexps) {
+                const { tokens } = Lexer.scan(regexp);
+                results.push(tokens[0].text);
+            }
+            expect(results).to.eql(regexps);
+        }
+
+        it('recognizes regex literals', () => {
+            testRegex(
+                '/simple/',
+                '/SimpleWithValidFlags/imsx',
+                '/UnknownFlags/VUI',
+                '/with spaces/andflags',
+                '/with(parens)and[squarebraces]/',
+                '/*()^$@/'
+            );
+        });
+    });
 });
diff --git a/src/lexer/Lexer.ts b/src/lexer/Lexer.ts
index 962f4ab1d..d10c2c95f 100644
--- a/src/lexer/Lexer.ts
+++ b/src/lexer/Lexer.ts
@@ -199,14 +199,17 @@ export class Lexer {
             }
         },
         '/': function (this: Lexer) {
-            switch (this.peek()) {
-                case '=':
-                    this.advance();
-                    this.addToken(TokenKind.ForwardslashEqual);
-                    break;
-                default:
-                    this.addToken(TokenKind.Forwardslash);
-                    break;
+            //try capturing a regex literal. If that doesn't work, fall back to normal handling
+            if (!this.regexLiteral()) {
+                switch (this.peek()) {
+                    case '=':
+                        this.advance();
+                        this.addToken(TokenKind.ForwardslashEqual);
+                        break;
+                    default:
+                        this.addToken(TokenKind.Forwardslash);
+                        break;
+                }
             }
         },
         '\\': function (this: Lexer) {
@@ -384,6 +387,19 @@ export class Lexer {
         this.columnEnd++;
     }
 
+    private lookaheadStack = [] as Array<{ current: number; columnEnd: number }>;
+    private pushLookahead() {
+        this.lookaheadStack.push({
+            current: this.current,
+            columnEnd: this.columnEnd
+        });
+    }
+    private popLookahead() {
+        const { current, columnEnd } = this.lookaheadStack.pop();
+        this.current = current;
+        this.columnEnd = columnEnd;
+    }
+
     /**
      * Returns the character at position `current` or a null character if we've reached the end of
      * input.
@@ -835,6 +851,17 @@ export class Lexer {
         return candidates.includes(this.source.charAt(this.current));
     }
 
+    /**
+     * Advance if the current token matches one of the candidates
+     */
+    private advanceIf(...candidates: string[]) {
+        if (this.check(...candidates)) {
+            this.advance();
+            return true;
+        }
+        return false;
+    }
+
     /**
      * Check the previous character
      */
@@ -927,6 +954,38 @@ export class Lexer {
         }
     }
 
+    /**
+     * Capture a regex literal token. Returns false if not found.
+     * This is lookahead lexing which might techincally belong in the parser,
+     * but it's easy enough to do here in the lexer
+     */
+    private regexLiteral() {
+        this.pushLookahead();
+
+        //finite loop to prevent infinite loop if something went wrong
+        for (let i = this.current; i < this.source.length; i++) {
+
+            //if we reached the end of the regex, consume any flags
+            if (this.advanceIf('/')) {
+                //consume all flag-like chars (let the parser validate the actual values)
+                while (/[a-z]/i.exec(this.peek())) {
+                    this.advance();
+                }
+                //finalize the regex literal and EXIT
+                this.addToken(TokenKind.RegexLiteral);
+                return true;
+
+                //if we found a non-escaped newline, there's a syntax error with this regex (or it's not a regex), so quit
+            } else if (this.check('\n')) {
+                break;
+            } else {
+                this.advance();
+            }
+        }
+        this.popLookahead();
+        return false;
+    }
+
     /**
      * Creates a `Token` and adds it to the `tokens` array.
      * @param kind the type of token to produce.
diff --git a/src/lexer/TokenKind.ts b/src/lexer/TokenKind.ts
index f01bb4b0c..70ed8d7df 100644
--- a/src/lexer/TokenKind.ts
+++ b/src/lexer/TokenKind.ts
@@ -52,6 +52,7 @@ export enum TokenKind {
     DoubleLiteral = 'DoubleLiteral',
     LongIntegerLiteral = 'LongIntegerLiteral',
     EscapedCharCodeLiteral = 'EscapedCharCodeLiteral', //this is used to capture things like `\n`, `\r\n` in template strings
+    RegexLiteral = 'RegexLiteral',
 
     //types
     Void = 'Void',

From d71e614c3990c304479783e21660298f0127cce0 Mon Sep 17 00:00:00 2001
From: Bronley <bronley@gmail.com>
Date: Fri, 24 Sep 2021 09:11:09 -0400
Subject: [PATCH 2/6] Add lexer support for escaped regexp chars

---
 src/lexer/Lexer.spec.ts | 30 ++++++++++++++++++++++--------
 src/lexer/Lexer.ts      | 22 +++++++++-------------
 2 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/src/lexer/Lexer.spec.ts b/src/lexer/Lexer.spec.ts
index c5ac046fc..24dab7192 100644
--- a/src/lexer/Lexer.spec.ts
+++ b/src/lexer/Lexer.spec.ts
@@ -1224,10 +1224,11 @@ describe('lexer', () => {
     });
 
     describe('regular expression literals', () => {
-        function testRegex(...regexps) {
+        function testRegex(...regexps: Array<string | RegExp>) {
+            regexps = regexps.map(x => x.toString());
             const results = [] as string[];
             for (const regexp of regexps) {
-                const { tokens } = Lexer.scan(regexp);
+                const { tokens } = Lexer.scan(regexp as string);
                 results.push(tokens[0].text);
             }
             expect(results).to.eql(regexps);
@@ -1235,12 +1236,25 @@ describe('lexer', () => {
 
         it('recognizes regex literals', () => {
             testRegex(
-                '/simple/',
-                '/SimpleWithValidFlags/imsx',
-                '/UnknownFlags/VUI',
-                '/with spaces/andflags',
-                '/with(parens)and[squarebraces]/',
-                '/*()^$@/'
+                /simple/,
+                /SimpleWithValidFlags/g,
+                /UnknownFlags/gi,
+                /with spaces/s,
+                /with(parens)and[squarebraces]/,
+                //lots of special characters
+                /.*()^$@/
+            );
+        });
+
+        it('handles escape characters properly', () => {
+            testRegex(
+                //an escaped forward slash right next to the end-regexp forwardslash
+                /\//,
+                /\r/,
+                /\n/,
+                /\r\n/,
+                //a literal backslash in front of an escape backslash
+                /\\\n/
             );
         });
     });
diff --git a/src/lexer/Lexer.ts b/src/lexer/Lexer.ts
index d10c2c95f..ae159303f 100644
--- a/src/lexer/Lexer.ts
+++ b/src/lexer/Lexer.ts
@@ -851,17 +851,6 @@ export class Lexer {
         return candidates.includes(this.source.charAt(this.current));
     }
 
-    /**
-     * Advance if the current token matches one of the candidates
-     */
-    private advanceIf(...candidates: string[]) {
-        if (this.check(...candidates)) {
-            this.advance();
-            return true;
-        }
-        return false;
-    }
-
     /**
      * Check the previous character
      */
@@ -962,11 +951,14 @@ export class Lexer {
     private regexLiteral() {
         this.pushLookahead();
 
+        let nextCharNeedsEscaped = false;
+
         //finite loop to prevent infinite loop if something went wrong
         for (let i = this.current; i < this.source.length; i++) {
 
             //if we reached the end of the regex, consume any flags
-            if (this.advanceIf('/')) {
+            if (this.check('/') && !nextCharNeedsEscaped) {
+                this.advance();
                 //consume all flag-like chars (let the parser validate the actual values)
                 while (/[a-z]/i.exec(this.peek())) {
                     this.advance();
@@ -976,10 +968,14 @@ export class Lexer {
                 return true;
 
                 //if we found a non-escaped newline, there's a syntax error with this regex (or it's not a regex), so quit
-            } else if (this.check('\n')) {
+            } else if (this.check('\n') || this.isAtEnd()) {
                 break;
+            } else if (this.check('\\')) {
+                this.advance();
+                nextCharNeedsEscaped = true;
             } else {
                 this.advance();
+                nextCharNeedsEscaped = false;
             }
         }
         this.popLookahead();

From f8a6c1d7ced24d7649691eae1df782161c29e380 Mon Sep 17 00:00:00 2001
From: Bronley <bronley@gmail.com>
Date: Fri, 24 Sep 2021 10:29:28 -0400
Subject: [PATCH 3/6] Add parser and transpile functionality

---
 src/parser/Expression.ts                      | 40 ++++++++++++++
 src/parser/Parser.ts                          |  9 ++++
 .../expression/RegexLiteralExpression.spec.ts | 54 +++++++++++++++++++
 3 files changed, 103 insertions(+)
 create mode 100644 src/parser/tests/expression/RegexLiteralExpression.spec.ts

diff --git a/src/parser/Expression.ts b/src/parser/Expression.ts
index aef007f9a..073002b3e 100644
--- a/src/parser/Expression.ts
+++ b/src/parser/Expression.ts
@@ -1406,6 +1406,46 @@ export class NullCoalescingExpression extends Expression {
     }
 }
 
+export class RegexLiteralExpression extends Expression {
+    public constructor(
+        public tokens: {
+            regexLiteral: Token;
+        }
+    ) {
+        super();
+    }
+
+    public get range() {
+        return this.tokens.regexLiteral.range;
+    }
+
+    public transpile(state: BrsTranspileState): TranspileResult {
+        let text = this.tokens.regexLiteral?.text ?? '';
+        let flags = '';
+        //get any flags from the end
+        const flagMatch = /\/([a-z]+)$/i.exec(text);
+        if (flagMatch) {
+            text = text.substring(0, flagMatch.index + 1);
+            flags = flagMatch[1];
+        }
+        //remove leading and trailing slashes
+        const pattern = text.substring(1, text.length - 1);
+
+        return [
+            state.sourceNode(this.tokens.regexLiteral, [
+                'CreateObject("roRegex", ',
+                `"${pattern}", `,
+                `"${flags}"`,
+                ')'
+            ])
+        ];
+    }
+
+    walk(visitor: WalkVisitor, options: WalkOptions) {
+        //nothing to walk
+    }
+}
+
 // eslint-disable-next-line @typescript-eslint/consistent-indexed-object-style
 type ExpressionValue = string | number | boolean | Expression | ExpressionValue[] | { [key: string]: ExpressionValue };
 
diff --git a/src/parser/Parser.ts b/src/parser/Parser.ts
index 795d7886b..54faa418f 100644
--- a/src/parser/Parser.ts
+++ b/src/parser/Parser.ts
@@ -93,6 +93,7 @@ import { Logger } from '../Logger';
 import { isAnnotationExpression, isCallExpression, isCallfuncExpression, isClassMethodStatement, isCommentStatement, isDottedGetExpression, isIfStatement, isIndexedGetExpression, isVariableExpression } from '../astUtils/reflection';
 import { createVisitor, WalkMode } from '../astUtils/visitors';
 import { createStringLiteral, createToken } from '../astUtils/creators';
+import { RegexLiteralExpression } from '.';
 
 export class Parser {
     /**
@@ -1396,6 +1397,12 @@ export class Parser {
         return new NullCoalescingExpression(test, questionQuestionToken, alternate);
     }
 
+    private regexLiteralExpression() {
+        return new RegexLiteralExpression({
+            regexLiteral: this.advance()
+        });
+    }
+
     private templateString(isTagged: boolean): TemplateStringExpression | TaggedTemplateStringExpression {
         this.warnIfNotBrighterScriptMode('template string');
 
@@ -2544,6 +2551,8 @@ export class Parser {
                 return new VariableExpression(token, this.currentNamespaceName);
             case this.checkAny(TokenKind.Function, TokenKind.Sub):
                 return this.anonymousFunction();
+            case this.check(TokenKind.RegexLiteral):
+                return this.regexLiteralExpression();
             case this.check(TokenKind.Comment):
                 return new CommentStatement([this.advance()]);
             default:
diff --git a/src/parser/tests/expression/RegexLiteralExpression.spec.ts b/src/parser/tests/expression/RegexLiteralExpression.spec.ts
new file mode 100644
index 000000000..1d0bb1f42
--- /dev/null
+++ b/src/parser/tests/expression/RegexLiteralExpression.spec.ts
@@ -0,0 +1,54 @@
+import { Program } from '../../../Program';
+import { standardizePath as s } from '../../../util';
+import { getTestTranspile } from '../../../testHelpers.spec';
+
+describe('RegexLiteralExpression', () => {
+    let rootDir = s`${process.cwd()}/rootDir`;
+    let program: Program;
+    let testTranspile = getTestTranspile(() => [program, rootDir]);
+
+    beforeEach(() => {
+        program = new Program({ rootDir: rootDir });
+    });
+    afterEach(() => {
+        program.dispose();
+    });
+
+    describe('transpile', () => {
+        it('captures flags', () => {
+            testTranspile(`
+                sub main()
+                    print /hello/gi
+                end sub
+            `, `
+                sub main()
+                    print CreateObject("roRegex", "hello", "gi")
+                end sub
+            `);
+        });
+
+        it('handles when no flags', () => {
+            testTranspile(`
+                sub main()
+                    print /hello/
+                end sub
+            `, `
+                sub main()
+                    print CreateObject("roRegex", "hello", "")
+                end sub
+            `);
+        });
+
+        it('handles weird escapes', () => {
+            testTranspile(`
+                sub main()
+                    print /\\r\\n\\//
+                end sub
+            `, `
+                sub main()
+                    print CreateObject("roRegex", "\\r\\n\\/", "")
+                end sub
+            `);
+        });
+    });
+});

From 2dab1f159dca1f047faf1a306de61c7f34e9c5bb Mon Sep 17 00:00:00 2001
From: Bronley <bronley@gmail.com>
Date: Fri, 24 Sep 2021 10:51:52 -0400
Subject: [PATCH 4/6] Add very basic docs about regex literals

---
 docs/readme.md         |  1 +
 docs/regex-literals.md | 13 +++++++++++++
 2 files changed, 14 insertions(+)
 create mode 100644 docs/regex-literals.md

diff --git a/docs/readme.md b/docs/readme.md
index 65da5e877..f3e1a04f1 100644
--- a/docs/readme.md
+++ b/docs/readme.md
@@ -10,6 +10,7 @@ See the following pages for more information
  - [Namespaces](namespaces.md)
  - [Null-coalescing operator](null-coalescing-operator.md)
  - [Plugins](plugins.md)
+ - [Regular Expression Literals](regex-literals.md)
  - [Source Literals](source-literals.md)
  - [Template Strings (Template Literals)](template-strings.md)
  - [Ternary (Conditional) Operator](ternary-operator.md)
diff --git a/docs/regex-literals.md b/docs/regex-literals.md
new file mode 100644
index 000000000..a3feae1b2
--- /dev/null
+++ b/docs/regex-literals.md
@@ -0,0 +1,13 @@
+# Regular Expression Literals
+You can create a regular expression literal in brighterscript. This simplifies pattern writing and improves readability.
+
+Example:
+```BrighterScript
+print /hello world/ig
+```
+
+transpiles to:
+
+```BrightScript
+print CreateObject("roRegex","hello world","ig")
+```

From a6f1d39188dfeda929d595d8b389ea4d4e3a6f6c Mon Sep 17 00:00:00 2001
From: Bronley <bronley@gmail.com>
Date: Fri, 24 Sep 2021 11:27:47 -0400
Subject: [PATCH 5/6] Verify lexer handles quotemark properly

---
 src/lexer/Lexer.spec.ts | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/lexer/Lexer.spec.ts b/src/lexer/Lexer.spec.ts
index 24dab7192..765c92c49 100644
--- a/src/lexer/Lexer.spec.ts
+++ b/src/lexer/Lexer.spec.ts
@@ -1242,7 +1242,9 @@ describe('lexer', () => {
                 /with spaces/s,
                 /with(parens)and[squarebraces]/,
                 //lots of special characters
-                /.*()^$@/
+                /.*()^$@/,
+                //captures quote char
+                /"/
             );
         });
 

From ad07a8882131d41977d8b0cba1614eac63455864 Mon Sep 17 00:00:00 2001
From: Bronley <bronley@gmail.com>
Date: Fri, 24 Sep 2021 13:13:52 -0400
Subject: [PATCH 6/6] Escape quotemarks

---
 src/parser/Expression.ts                            |  7 +++++--
 .../tests/expression/RegexLiteralExpression.spec.ts | 13 +++++++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/parser/Expression.ts b/src/parser/Expression.ts
index 073002b3e..279263be5 100644
--- a/src/parser/Expression.ts
+++ b/src/parser/Expression.ts
@@ -1428,8 +1428,11 @@ export class RegexLiteralExpression extends Expression {
             text = text.substring(0, flagMatch.index + 1);
             flags = flagMatch[1];
         }
-        //remove leading and trailing slashes
-        const pattern = text.substring(1, text.length - 1);
+        let pattern = text
+            //remove leading and trailing slashes
+            .substring(1, text.length - 1)
+            //escape quotemarks
+            .split('"').join('" + chr(34) + "');
 
         return [
             state.sourceNode(this.tokens.regexLiteral, [
diff --git a/src/parser/tests/expression/RegexLiteralExpression.spec.ts b/src/parser/tests/expression/RegexLiteralExpression.spec.ts
index 1d0bb1f42..be5542ad8 100644
--- a/src/parser/tests/expression/RegexLiteralExpression.spec.ts
+++ b/src/parser/tests/expression/RegexLiteralExpression.spec.ts
@@ -50,5 +50,18 @@ describe('RegexLiteralExpression', () => {
                 end sub
             `);
         });
+
+        it('escapes quotemark', () => {
+            testTranspile(`
+                sub main()
+                    print /"/
+                end sub
+            `, `
+                sub main()
+                    print CreateObject("roRegex", "" + chr(34) + "", "")
+                end sub
+            `);
+        });
+
     });
 });