Skip to content

Commit

Permalink
Add regex literal support (#452)
Browse files Browse the repository at this point in the history
* Basic regex literal lexer support.

* Add lexer support for escaped regexp chars

* Add parser and transpile functionality

* Add very basic docs about regex literals

* Verify lexer handles quotemark properly

* Escape quotemarks
  • Loading branch information
TwitchBronBron committed Sep 27, 2021
1 parent 1818c36 commit 22c8a30
Show file tree
Hide file tree
Showing 8 changed files with 235 additions and 8 deletions.
1 change: 1 addition & 0 deletions docs/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ See the following pages for more information
- [Namespaces](namespaces.md)
- [Null-coalescing operator](null-coalescing-operator.md)
- [Plugins](plugins.md)
- [Regular Expression Literals](regex-literals.md)
- [Source Literals](source-literals.md)
- [Template Strings (Template Literals)](template-strings.md)
- [Ternary (Conditional) Operator](ternary-operator.md)
13 changes: 13 additions & 0 deletions docs/regex-literals.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Regular Expression Literals
You can create a regular expression literal in brighterscript. This simplifies pattern writing and improves readability.

Example:
```BrighterScript
print /hello world/ig
```

transpiles to:

```BrightScript
print CreateObject("roRegex","hello world","ig")
```
38 changes: 38 additions & 0 deletions src/lexer/Lexer.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1222,4 +1222,42 @@ describe('lexer', () => {
TokenKind.Eof
]);
});

describe('regular expression literals', () => {
function testRegex(...regexps: Array<string | RegExp>) {
regexps = regexps.map(x => x.toString());
const results = [] as string[];
for (const regexp of regexps) {
const { tokens } = Lexer.scan(regexp as string);
results.push(tokens[0].text);
}
expect(results).to.eql(regexps);
}

it('recognizes regex literals', () => {
testRegex(
/simple/,
/SimpleWithValidFlags/g,
/UnknownFlags/gi,
/with spaces/s,
/with(parens)and[squarebraces]/,
//lots of special characters
/.*()^$@/,
//captures quote char
/"/
);
});

it('handles escape characters properly', () => {
testRegex(
//an escaped forward slash right next to the end-regexp forwardslash
/\//,
/\r/,
/\n/,
/\r\n/,
//a literal backslash in front of an escape backslash
/\\\n/
);
});
});
});
71 changes: 63 additions & 8 deletions src/lexer/Lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,17 @@ export class Lexer {
}
},
'/': function (this: Lexer) {
switch (this.peek()) {
case '=':
this.advance();
this.addToken(TokenKind.ForwardslashEqual);
break;
default:
this.addToken(TokenKind.Forwardslash);
break;
//try capturing a regex literal. If that doesn't work, fall back to normal handling
if (!this.regexLiteral()) {
switch (this.peek()) {
case '=':
this.advance();
this.addToken(TokenKind.ForwardslashEqual);
break;
default:
this.addToken(TokenKind.Forwardslash);
break;
}
}
},
'\\': function (this: Lexer) {
Expand Down Expand Up @@ -384,6 +387,19 @@ export class Lexer {
this.columnEnd++;
}

private lookaheadStack = [] as Array<{ current: number; columnEnd: number }>;
private pushLookahead() {
this.lookaheadStack.push({
current: this.current,
columnEnd: this.columnEnd
});
}
private popLookahead() {
const { current, columnEnd } = this.lookaheadStack.pop();
this.current = current;
this.columnEnd = columnEnd;
}

/**
* Returns the character at position `current` or a null character if we've reached the end of
* input.
Expand Down Expand Up @@ -927,6 +943,45 @@ export class Lexer {
}
}

/**
* Capture a regex literal token. Returns false if not found.
* This is lookahead lexing which might techincally belong in the parser,
* but it's easy enough to do here in the lexer
*/
private regexLiteral() {
this.pushLookahead();

let nextCharNeedsEscaped = false;

//finite loop to prevent infinite loop if something went wrong
for (let i = this.current; i < this.source.length; i++) {

//if we reached the end of the regex, consume any flags
if (this.check('/') && !nextCharNeedsEscaped) {
this.advance();
//consume all flag-like chars (let the parser validate the actual values)
while (/[a-z]/i.exec(this.peek())) {
this.advance();
}
//finalize the regex literal and EXIT
this.addToken(TokenKind.RegexLiteral);
return true;

//if we found a non-escaped newline, there's a syntax error with this regex (or it's not a regex), so quit
} else if (this.check('\n') || this.isAtEnd()) {
break;
} else if (this.check('\\')) {
this.advance();
nextCharNeedsEscaped = true;
} else {
this.advance();
nextCharNeedsEscaped = false;
}
}
this.popLookahead();
return false;
}

/**
* Creates a `Token` and adds it to the `tokens` array.
* @param kind the type of token to produce.
Expand Down
1 change: 1 addition & 0 deletions src/lexer/TokenKind.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ export enum TokenKind {
DoubleLiteral = 'DoubleLiteral',
LongIntegerLiteral = 'LongIntegerLiteral',
EscapedCharCodeLiteral = 'EscapedCharCodeLiteral', //this is used to capture things like `\n`, `\r\n` in template strings
RegexLiteral = 'RegexLiteral',

//types
Void = 'Void',
Expand Down
43 changes: 43 additions & 0 deletions src/parser/Expression.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1406,6 +1406,49 @@ export class NullCoalescingExpression extends Expression {
}
}

export class RegexLiteralExpression extends Expression {
public constructor(
public tokens: {
regexLiteral: Token;
}
) {
super();
}

public get range() {
return this.tokens.regexLiteral.range;
}

public transpile(state: BrsTranspileState): TranspileResult {
let text = this.tokens.regexLiteral?.text ?? '';
let flags = '';
//get any flags from the end
const flagMatch = /\/([a-z]+)$/i.exec(text);
if (flagMatch) {
text = text.substring(0, flagMatch.index + 1);
flags = flagMatch[1];
}
let pattern = text
//remove leading and trailing slashes
.substring(1, text.length - 1)
//escape quotemarks
.split('"').join('" + chr(34) + "');

return [
state.sourceNode(this.tokens.regexLiteral, [
'CreateObject("roRegex", ',
`"${pattern}", `,
`"${flags}"`,
')'
])
];
}

walk(visitor: WalkVisitor, options: WalkOptions) {
//nothing to walk
}
}

// eslint-disable-next-line @typescript-eslint/consistent-indexed-object-style
type ExpressionValue = string | number | boolean | Expression | ExpressionValue[] | { [key: string]: ExpressionValue };

Expand Down
9 changes: 9 additions & 0 deletions src/parser/Parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ import { Logger } from '../Logger';
import { isAnnotationExpression, isCallExpression, isCallfuncExpression, isClassMethodStatement, isCommentStatement, isDottedGetExpression, isIfStatement, isIndexedGetExpression, isVariableExpression } from '../astUtils/reflection';
import { createVisitor, WalkMode } from '../astUtils/visitors';
import { createStringLiteral, createToken } from '../astUtils/creators';
import { RegexLiteralExpression } from '.';

export class Parser {
/**
Expand Down Expand Up @@ -1396,6 +1397,12 @@ export class Parser {
return new NullCoalescingExpression(test, questionQuestionToken, alternate);
}

private regexLiteralExpression() {
return new RegexLiteralExpression({
regexLiteral: this.advance()
});
}

private templateString(isTagged: boolean): TemplateStringExpression | TaggedTemplateStringExpression {
this.warnIfNotBrighterScriptMode('template string');

Expand Down Expand Up @@ -2544,6 +2551,8 @@ export class Parser {
return new VariableExpression(token, this.currentNamespaceName);
case this.checkAny(TokenKind.Function, TokenKind.Sub):
return this.anonymousFunction();
case this.check(TokenKind.RegexLiteral):
return this.regexLiteralExpression();
case this.check(TokenKind.Comment):
return new CommentStatement([this.advance()]);
default:
Expand Down
67 changes: 67 additions & 0 deletions src/parser/tests/expression/RegexLiteralExpression.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import { Program } from '../../../Program';
import { standardizePath as s } from '../../../util';
import { getTestTranspile } from '../../../testHelpers.spec';

describe('RegexLiteralExpression', () => {
let rootDir = s`${process.cwd()}/rootDir`;
let program: Program;
let testTranspile = getTestTranspile(() => [program, rootDir]);

beforeEach(() => {
program = new Program({ rootDir: rootDir });
});
afterEach(() => {
program.dispose();
});

describe('transpile', () => {
it('captures flags', () => {
testTranspile(`
sub main()
print /hello/gi
end sub
`, `
sub main()
print CreateObject("roRegex", "hello", "gi")
end sub
`);
});

it('handles when no flags', () => {
testTranspile(`
sub main()
print /hello/
end sub
`, `
sub main()
print CreateObject("roRegex", "hello", "")
end sub
`);
});

it('handles weird escapes', () => {
testTranspile(`
sub main()
print /\\r\\n\\//
end sub
`, `
sub main()
print CreateObject("roRegex", "\\r\\n\\/", "")
end sub
`);
});

it('escapes quotemark', () => {
testTranspile(`
sub main()
print /"/
end sub
`, `
sub main()
print CreateObject("roRegex", "" + chr(34) + "", "")
end sub
`);
});

});
});

0 comments on commit 22c8a30

Please sign in to comment.