Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add regex literal support #452

Merged
merged 6 commits into from
Sep 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ See the following pages for more information
- [Namespaces](namespaces.md)
- [Null-coalescing operator](null-coalescing-operator.md)
- [Plugins](plugins.md)
- [Regular Expression Literals](regex-literals.md)
- [Source Literals](source-literals.md)
- [Template Strings (Template Literals)](template-strings.md)
- [Ternary (Conditional) Operator](ternary-operator.md)
13 changes: 13 additions & 0 deletions docs/regex-literals.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Regular Expression Literals
You can create a regular expression literal in brighterscript. This simplifies pattern writing and improves readability.

Example:
```BrighterScript
print /hello world/ig
```

transpiles to:

```BrightScript
print CreateObject("roRegex","hello world","ig")
```
38 changes: 38 additions & 0 deletions src/lexer/Lexer.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1222,4 +1222,42 @@ describe('lexer', () => {
TokenKind.Eof
]);
});

describe('regular expression literals', () => {
function testRegex(...regexps: Array<string | RegExp>) {
regexps = regexps.map(x => x.toString());
const results = [] as string[];
for (const regexp of regexps) {
const { tokens } = Lexer.scan(regexp as string);
results.push(tokens[0].text);
}
expect(results).to.eql(regexps);
}

it('recognizes regex literals', () => {
testRegex(
/simple/,
/SimpleWithValidFlags/g,
/UnknownFlags/gi,
TwitchBronBron marked this conversation as resolved.
Show resolved Hide resolved
/with spaces/s,
/with(parens)and[squarebraces]/,
//lots of special characters
/.*()^$@/,
//captures quote char
/"/
);
});

it('handles escape characters properly', () => {
testRegex(
//an escaped forward slash right next to the end-regexp forwardslash
/\//,
/\r/,
/\n/,
/\r\n/,
//a literal backslash in front of an escape backslash
/\\\n/
);
});
});
});
71 changes: 63 additions & 8 deletions src/lexer/Lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,17 @@ export class Lexer {
}
},
'/': function (this: Lexer) {
switch (this.peek()) {
case '=':
this.advance();
this.addToken(TokenKind.ForwardslashEqual);
break;
default:
this.addToken(TokenKind.Forwardslash);
break;
//try capturing a regex literal. If that doesn't work, fall back to normal handling
if (!this.regexLiteral()) {
switch (this.peek()) {
case '=':
this.advance();
this.addToken(TokenKind.ForwardslashEqual);
break;
default:
this.addToken(TokenKind.Forwardslash);
break;
}
}
},
'\\': function (this: Lexer) {
Expand Down Expand Up @@ -384,6 +387,19 @@ export class Lexer {
this.columnEnd++;
}

private lookaheadStack = [] as Array<{ current: number; columnEnd: number }>;
private pushLookahead() {
this.lookaheadStack.push({
current: this.current,
columnEnd: this.columnEnd
});
}
private popLookahead() {
const { current, columnEnd } = this.lookaheadStack.pop();
this.current = current;
this.columnEnd = columnEnd;
}

/**
* Returns the character at position `current` or a null character if we've reached the end of
* input.
Expand Down Expand Up @@ -927,6 +943,45 @@ export class Lexer {
}
}

/**
* Capture a regex literal token. Returns false if not found.
* This is lookahead lexing which might techincally belong in the parser,
* but it's easy enough to do here in the lexer
*/
private regexLiteral() {
this.pushLookahead();

let nextCharNeedsEscaped = false;

//finite loop to prevent infinite loop if something went wrong
for (let i = this.current; i < this.source.length; i++) {

//if we reached the end of the regex, consume any flags
if (this.check('/') && !nextCharNeedsEscaped) {
this.advance();
//consume all flag-like chars (let the parser validate the actual values)
while (/[a-z]/i.exec(this.peek())) {
this.advance();
}
//finalize the regex literal and EXIT
this.addToken(TokenKind.RegexLiteral);
return true;

//if we found a non-escaped newline, there's a syntax error with this regex (or it's not a regex), so quit
} else if (this.check('\n') || this.isAtEnd()) {
break;
} else if (this.check('\\')) {
this.advance();
nextCharNeedsEscaped = true;
} else {
this.advance();
nextCharNeedsEscaped = false;
}
}
this.popLookahead();
return false;
}

/**
* Creates a `Token` and adds it to the `tokens` array.
* @param kind the type of token to produce.
Expand Down
1 change: 1 addition & 0 deletions src/lexer/TokenKind.ts
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ export enum TokenKind {
DoubleLiteral = 'DoubleLiteral',
LongIntegerLiteral = 'LongIntegerLiteral',
EscapedCharCodeLiteral = 'EscapedCharCodeLiteral', //this is used to capture things like `\n`, `\r\n` in template strings
RegexLiteral = 'RegexLiteral',

//types
Void = 'Void',
Expand Down
43 changes: 43 additions & 0 deletions src/parser/Expression.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1406,6 +1406,49 @@ export class NullCoalescingExpression extends Expression {
}
}

export class RegexLiteralExpression extends Expression {
public constructor(
public tokens: {
regexLiteral: Token;
}
) {
super();
}

public get range() {
return this.tokens.regexLiteral.range;
}

public transpile(state: BrsTranspileState): TranspileResult {
let text = this.tokens.regexLiteral?.text ?? '';
let flags = '';
//get any flags from the end
const flagMatch = /\/([a-z]+)$/i.exec(text);
if (flagMatch) {
text = text.substring(0, flagMatch.index + 1);
flags = flagMatch[1];
}
let pattern = text
//remove leading and trailing slashes
.substring(1, text.length - 1)
//escape quotemarks
.split('"').join('" + chr(34) + "');

return [
state.sourceNode(this.tokens.regexLiteral, [
'CreateObject("roRegex", ',
`"${pattern}", `,
`"${flags}"`,
')'
])
];
}

walk(visitor: WalkVisitor, options: WalkOptions) {
//nothing to walk
}
}

// eslint-disable-next-line @typescript-eslint/consistent-indexed-object-style
type ExpressionValue = string | number | boolean | Expression | ExpressionValue[] | { [key: string]: ExpressionValue };

Expand Down
9 changes: 9 additions & 0 deletions src/parser/Parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ import { Logger } from '../Logger';
import { isAnnotationExpression, isCallExpression, isCallfuncExpression, isClassMethodStatement, isCommentStatement, isDottedGetExpression, isIfStatement, isIndexedGetExpression, isVariableExpression } from '../astUtils/reflection';
import { createVisitor, WalkMode } from '../astUtils/visitors';
import { createStringLiteral, createToken } from '../astUtils/creators';
import { RegexLiteralExpression } from '.';

export class Parser {
/**
Expand Down Expand Up @@ -1396,6 +1397,12 @@ export class Parser {
return new NullCoalescingExpression(test, questionQuestionToken, alternate);
}

private regexLiteralExpression() {
return new RegexLiteralExpression({
regexLiteral: this.advance()
});
}

private templateString(isTagged: boolean): TemplateStringExpression | TaggedTemplateStringExpression {
this.warnIfNotBrighterScriptMode('template string');

Expand Down Expand Up @@ -2544,6 +2551,8 @@ export class Parser {
return new VariableExpression(token, this.currentNamespaceName);
case this.checkAny(TokenKind.Function, TokenKind.Sub):
return this.anonymousFunction();
case this.check(TokenKind.RegexLiteral):
return this.regexLiteralExpression();
case this.check(TokenKind.Comment):
return new CommentStatement([this.advance()]);
default:
Expand Down
67 changes: 67 additions & 0 deletions src/parser/tests/expression/RegexLiteralExpression.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import { Program } from '../../../Program';
import { standardizePath as s } from '../../../util';
import { getTestTranspile } from '../../../testHelpers.spec';

describe('RegexLiteralExpression', () => {
let rootDir = s`${process.cwd()}/rootDir`;
let program: Program;
let testTranspile = getTestTranspile(() => [program, rootDir]);

beforeEach(() => {
program = new Program({ rootDir: rootDir });
});
afterEach(() => {
program.dispose();
});

describe('transpile', () => {
it('captures flags', () => {
testTranspile(`
sub main()
print /hello/gi
end sub
`, `
sub main()
print CreateObject("roRegex", "hello", "gi")
end sub
`);
});

it('handles when no flags', () => {
testTranspile(`
sub main()
print /hello/
end sub
`, `
sub main()
print CreateObject("roRegex", "hello", "")
end sub
`);
});

it('handles weird escapes', () => {
testTranspile(`
sub main()
print /\\r\\n\\//
chrisdp marked this conversation as resolved.
Show resolved Hide resolved
end sub
`, `
sub main()
print CreateObject("roRegex", "\\r\\n\\/", "")
end sub
`);
});

it('escapes quotemark', () => {
testTranspile(`
sub main()
print /"/
end sub
`, `
sub main()
print CreateObject("roRegex", "" + chr(34) + "", "")
end sub
`);
});

});
});