rokucommunity · TwitchBronBron · Sep 27, 2021 · Sep 24, 2021 · Sep 24, 2021 · Sep 24, 2021
diff --git a/docs/readme.md b/docs/readme.md
@@ -10,6 +10,7 @@ See the following pages for more information
  - [Namespaces](namespaces.md)
  - [Null-coalescing operator](null-coalescing-operator.md)
  - [Plugins](plugins.md)
+ - [Regular Expression Literals](regex-literals.md)
  - [Source Literals](source-literals.md)
  - [Template Strings (Template Literals)](template-strings.md)
  - [Ternary (Conditional) Operator](ternary-operator.md)
diff --git a/docs/regex-literals.md b/docs/regex-literals.md
@@ -0,0 +1,13 @@
+# Regular Expression Literals
+You can create a regular expression literal in brighterscript. This simplifies pattern writing and improves readability.
+
+Example:
+```BrighterScript
+print /hello world/ig
+```
+
+transpiles to:
+
+```BrightScript
+print CreateObject("roRegex","hello world","ig")
+```
diff --git a/src/lexer/Lexer.spec.ts b/src/lexer/Lexer.spec.ts
@@ -1222,4 +1222,42 @@ describe('lexer', () => {
             TokenKind.Eof
         ]);
     });
+
+    describe('regular expression literals', () => {
+        function testRegex(...regexps: Array<string | RegExp>) {
+            regexps = regexps.map(x => x.toString());
+            const results = [] as string[];
+            for (const regexp of regexps) {
+                const { tokens } = Lexer.scan(regexp as string);
+                results.push(tokens[0].text);
+            }
+            expect(results).to.eql(regexps);
+        }
+
+        it('recognizes regex literals', () => {
+            testRegex(
+                /simple/,
+                /SimpleWithValidFlags/g,
+                /UnknownFlags/gi,
+                /with spaces/s,
+                /with(parens)and[squarebraces]/,
+                //lots of special characters
+                /.*()^$@/,
+                //captures quote char
+                /"/
+            );
+        });
+
+        it('handles escape characters properly', () => {
+            testRegex(
+                //an escaped forward slash right next to the end-regexp forwardslash
+                /\//,
+                /\r/,
+                /\n/,
+                /\r\n/,
+                //a literal backslash in front of an escape backslash
+                /\\\n/
+            );
+        });
+    });
 });
diff --git a/src/lexer/Lexer.ts b/src/lexer/Lexer.ts
@@ -199,14 +199,17 @@ export class Lexer {
             }
         },
         '/': function (this: Lexer) {
-            switch (this.peek()) {
-                case '=':
-                    this.advance();
-                    this.addToken(TokenKind.ForwardslashEqual);
-                    break;
-                default:
-                    this.addToken(TokenKind.Forwardslash);
-                    break;
+            //try capturing a regex literal. If that doesn't work, fall back to normal handling
+            if (!this.regexLiteral()) {
+                switch (this.peek()) {
+                    case '=':
+                        this.advance();
+                        this.addToken(TokenKind.ForwardslashEqual);
+                        break;
+                    default:
+                        this.addToken(TokenKind.Forwardslash);
+                        break;
+                }
             }
         },
         '\\': function (this: Lexer) {
@@ -384,6 +387,19 @@ export class Lexer {
         this.columnEnd++;
     }
 
+    private lookaheadStack = [] as Array<{ current: number; columnEnd: number }>;
+    private pushLookahead() {
+        this.lookaheadStack.push({
+            current: this.current,
+            columnEnd: this.columnEnd
+        });
+    }
+    private popLookahead() {
+        const { current, columnEnd } = this.lookaheadStack.pop();
+        this.current = current;
+        this.columnEnd = columnEnd;
+    }
+
     /**
      * Returns the character at position `current` or a null character if we've reached the end of
      * input.
@@ -927,6 +943,45 @@ export class Lexer {
         }
     }
 
+    /**
+     * Capture a regex literal token. Returns false if not found.
+     * This is lookahead lexing which might techincally belong in the parser,
+     * but it's easy enough to do here in the lexer
+     */
+    private regexLiteral() {
+        this.pushLookahead();
+
+        let nextCharNeedsEscaped = false;
+
+        //finite loop to prevent infinite loop if something went wrong
+        for (let i = this.current; i < this.source.length; i++) {
+
+            //if we reached the end of the regex, consume any flags
+            if (this.check('/') && !nextCharNeedsEscaped) {
+                this.advance();
+                //consume all flag-like chars (let the parser validate the actual values)
+                while (/[a-z]/i.exec(this.peek())) {
+                    this.advance();
+                }
+                //finalize the regex literal and EXIT
+                this.addToken(TokenKind.RegexLiteral);
+                return true;
+
+                //if we found a non-escaped newline, there's a syntax error with this regex (or it's not a regex), so quit
+            } else if (this.check('\n') || this.isAtEnd()) {
+                break;
+            } else if (this.check('\\')) {
+                this.advance();
+                nextCharNeedsEscaped = true;
+            } else {
+                this.advance();
+                nextCharNeedsEscaped = false;
+            }
+        }
+        this.popLookahead();
+        return false;
+    }
+
     /**
      * Creates a `Token` and adds it to the `tokens` array.
      * @param kind the type of token to produce.

diff --git a/src/lexer/TokenKind.ts b/src/lexer/TokenKind.ts
@@ -52,6 +52,7 @@ export enum TokenKind {
     DoubleLiteral = 'DoubleLiteral',
     LongIntegerLiteral = 'LongIntegerLiteral',
     EscapedCharCodeLiteral = 'EscapedCharCodeLiteral', //this is used to capture things like `\n`, `\r\n` in template strings
+    RegexLiteral = 'RegexLiteral',
 
     //types
     Void = 'Void',

diff --git a/src/parser/Expression.ts b/src/parser/Expression.ts
@@ -1406,6 +1406,49 @@ export class NullCoalescingExpression extends Expression {
     }
 }
 
+export class RegexLiteralExpression extends Expression {
+    public constructor(
+        public tokens: {
+            regexLiteral: Token;
+        }
+    ) {
+        super();
+    }
+
+    public get range() {
+        return this.tokens.regexLiteral.range;
+    }
+
+    public transpile(state: BrsTranspileState): TranspileResult {
+        let text = this.tokens.regexLiteral?.text ?? '';
+        let flags = '';
+        //get any flags from the end
+        const flagMatch = /\/([a-z]+)$/i.exec(text);
+        if (flagMatch) {
+            text = text.substring(0, flagMatch.index + 1);
+            flags = flagMatch[1];
+        }
+        let pattern = text
+            //remove leading and trailing slashes
+            .substring(1, text.length - 1)
+            //escape quotemarks
+            .split('"').join('" + chr(34) + "');
+
+        return [
+            state.sourceNode(this.tokens.regexLiteral, [
+                'CreateObject("roRegex", ',
+                `"${pattern}", `,
+                `"${flags}"`,
+                ')'
+            ])
+        ];
+    }
+
+    walk(visitor: WalkVisitor, options: WalkOptions) {
+        //nothing to walk
+    }
+}
+
 // eslint-disable-next-line @typescript-eslint/consistent-indexed-object-style
 type ExpressionValue = string | number | boolean | Expression | ExpressionValue[] | { [key: string]: ExpressionValue };
 

diff --git a/src/parser/Parser.ts b/src/parser/Parser.ts
@@ -93,6 +93,7 @@ import { Logger } from '../Logger';
 import { isAnnotationExpression, isCallExpression, isCallfuncExpression, isClassMethodStatement, isCommentStatement, isDottedGetExpression, isIfStatement, isIndexedGetExpression, isVariableExpression } from '../astUtils/reflection';
 import { createVisitor, WalkMode } from '../astUtils/visitors';
 import { createStringLiteral, createToken } from '../astUtils/creators';
+import { RegexLiteralExpression } from '.';
 
 export class Parser {
     /**
@@ -1396,6 +1397,12 @@ export class Parser {
         return new NullCoalescingExpression(test, questionQuestionToken, alternate);
     }
 
+    private regexLiteralExpression() {
+        return new RegexLiteralExpression({
+            regexLiteral: this.advance()
+        });
+    }
+
     private templateString(isTagged: boolean): TemplateStringExpression | TaggedTemplateStringExpression {
         this.warnIfNotBrighterScriptMode('template string');
 
@@ -2544,6 +2551,8 @@ export class Parser {
                 return new VariableExpression(token, this.currentNamespaceName);
             case this.checkAny(TokenKind.Function, TokenKind.Sub):
                 return this.anonymousFunction();
+            case this.check(TokenKind.RegexLiteral):
+                return this.regexLiteralExpression();
             case this.check(TokenKind.Comment):
                 return new CommentStatement([this.advance()]);
             default:

diff --git a/src/parser/tests/expression/RegexLiteralExpression.spec.ts b/src/parser/tests/expression/RegexLiteralExpression.spec.ts
@@ -0,0 +1,67 @@
+import { Program } from '../../../Program';
+import { standardizePath as s } from '../../../util';
+import { getTestTranspile } from '../../../testHelpers.spec';
+
+describe('RegexLiteralExpression', () => {
+    let rootDir = s`${process.cwd()}/rootDir`;
+    let program: Program;
+    let testTranspile = getTestTranspile(() => [program, rootDir]);
+
+    beforeEach(() => {
+        program = new Program({ rootDir: rootDir });
+    });
+    afterEach(() => {
+        program.dispose();
+    });
+
+    describe('transpile', () => {
+        it('captures flags', () => {
+            testTranspile(`
+                sub main()
+                    print /hello/gi
+                end sub
+            `, `
+                sub main()
+                    print CreateObject("roRegex", "hello", "gi")
+                end sub
+            `);
+        });
+
+        it('handles when no flags', () => {
+            testTranspile(`
+                sub main()
+                    print /hello/
+                end sub
+            `, `
+                sub main()
+                    print CreateObject("roRegex", "hello", "")
+                end sub
+            `);
+        });
+
+        it('handles weird escapes', () => {
+            testTranspile(`
+                sub main()
+                    print /\\r\\n\\//
+                end sub
+            `, `
+                sub main()
+                    print CreateObject("roRegex", "\\r\\n\\/", "")
+                end sub
+            `);
+        });
+
+        it('escapes quotemark', () => {
+            testTranspile(`
+                sub main()
+                    print /"/
+                end sub
+            `, `
+                sub main()
+                    print CreateObject("roRegex", "" + chr(34) + "", "")
+                end sub
+            `);
+        });
+
+    });
+});