Skip to content

Commit

Permalink
refactor: simplify lexer and parser
Browse files Browse the repository at this point in the history
  • Loading branch information
lppedd committed Mar 27, 2023
1 parent b6c5bd7 commit 56bfb3e
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 102 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ public void visitIdentifier(@NotNull final PomskyIdentifierPsiElement element) {
return;
}

final var message = "Unknown character class `%s`".formatted(element.getName());
final var message = "Unknown character class '%s'".formatted(element.getName());
holder.newAnnotation(HighlightSeverity.ERROR, message)
.range(element.getTextRange())
.create();
Expand Down
189 changes: 91 additions & 98 deletions src/main/java/com/github/lppedd/idea/pomsky/lang/lexer/pomsky.flex
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,9 @@ import com.intellij.psi.TokenType;
// Primitives
Whitespace = \s+
Number = [0-9_]+
NonPrintable = [nrtaef]
CodePoint = U{Whitespace}*\+{Whitespace}*[a-fA-F0-9]{1,6}
Identifier = [\p{Alpha}_][\p{Alpha}\p{N}_]*
GroupName = [\p{Alpha}\p{N}_-]* // This is a relaxed variant. The correct regexp is [a-zA-Z][a-zA-Z0-9]*
GroupName = [\p{Alpha}\p{N}_-]+ // This is a relaxed variant. The correct regexp is [a-zA-Z][a-zA-Z0-9]*

// Complex tokens
Comment = #.*
Expand All @@ -64,131 +63,125 @@ Keyword = let

%%

<YYINITIAL> {
{Whitespace} {
return TokenType.WHITE_SPACE;
}

{Comment} {
return PomskyTypes.COMMENT;
}
{Whitespace} {
return TokenType.WHITE_SPACE;
}

{Keyword} {
return PomskyTypes.KEYWORD;
}
{Comment} {
return PomskyTypes.COMMENT;
}

{CodePoint} {
return PomskyTypes.CODE_POINT;
}
{Keyword} {
return PomskyTypes.KEYWORD;
}

[\^$] | \!?% | Start | End {
return PomskyTypes.BOUNDARY;
}
{CodePoint} {
return PomskyTypes.CODE_POINT;
}

{Identifier} {
return PomskyTypes.IDENTIFIER;
}
[\^$] | \!?% | Start | End {
return PomskyTypes.BOUNDARY;
}

{NonPrintable} {
return PomskyTypes.NON_PRINTABLE;
}
{Identifier} {
return PomskyTypes.IDENTIFIER;
}

' {
yybegin(STRING_SINGLE);
}
' {
yybegin(STRING_SINGLE);
}

\" {
yybegin(STRING_DOUBLE);
}
\" {
yybegin(STRING_DOUBLE);
}

[0-9]+ {
return PomskyTypes.NUMBER;
}
[0-9]+ {
return PomskyTypes.NUMBER;
}

::({GroupName} | {Number})? {
return PomskyTypes.GROUP_REFERENCE;
}
::({GroupName} | {Number})? {
return PomskyTypes.GROUP_REFERENCE;
}

, {
return PomskyTypes.COMMA;
}
, {
return PomskyTypes.COMMA;
}

; {
return PomskyTypes.SEMICOLON;
}
; {
return PomskyTypes.SEMICOLON;
}

: {
yybegin(GROUP_EXPRESSION);
return PomskyTypes.COLON;
}
: {
yybegin(GROUP_EXPRESSION);
return PomskyTypes.COLON;
}

= {
return PomskyTypes.EQ;
}
= {
return PomskyTypes.EQ;
}

[*+?] {
return PomskyTypes.QUANTIFIER;
}
[*+?] {
return PomskyTypes.QUANTIFIER;
}

\| {
return PomskyTypes.UNION;
}
\| {
return PomskyTypes.UNION;
}

\[ {
return PomskyTypes.CLASS_BEGIN;
}
\[ {
return PomskyTypes.CLASS_BEGIN;
}

] {
return PomskyTypes.CLASS_END;
}
] {
return PomskyTypes.CLASS_END;
}

\( {
return PomskyTypes.GROUP_BEGIN;
}
\( {
return PomskyTypes.GROUP_BEGIN;
}

\) {
return PomskyTypes.GROUP_END;
}
\) {
return PomskyTypes.GROUP_END;
}

\{ {
return PomskyTypes.LBRACE;
}
\{ {
return PomskyTypes.LBRACE;
}

\} {
return PomskyTypes.RBRACE;
}
\} {
return PomskyTypes.RBRACE;
}

>> {
return PomskyTypes.LOOKAHEAD;
}
>> {
return PomskyTypes.LOOKAHEAD;
}

\<< {
return PomskyTypes.LOOKBEHIND;
}
\<< {
return PomskyTypes.LOOKBEHIND;
}

\!>> {
return PomskyTypes.LOOKAHEAD_NEGATED;
}
\!>> {
return PomskyTypes.LOOKAHEAD_NEGATED;
}

\!<< {
return PomskyTypes.LOOKBEHIND_NEGATED;
}
\!<< {
return PomskyTypes.LOOKBEHIND_NEGATED;
}

\! {
return PomskyTypes.NEGATION;
}
\! {
return PomskyTypes.NEGATION;
}

- {
return PomskyTypes.RANGE_SEPARATOR;
}
- {
return PomskyTypes.RANGE_SEPARATOR;
}

\. {
return PomskyTypes.DOT;
}
\. {
return PomskyTypes.DOT;
}

[^] {
return PlainTextTokenTypes.PLAIN_TEXT;
}
[^] {
return PlainTextTokenTypes.PLAIN_TEXT;
}

// A literal string in the form: 'example of string' or 'example of \'string\''
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@
BOUNDARY = "regexp:[\^$]|!?%|Start|End"
STRING = "regexp:'.*'"
NUMBER = "regexp:[0-9_]+"
NON_PRINTABLE = "regexp:[nrtaef]"
CODE_POINT = "regexp:U\+?[a-fA-F0-9]{1,6}"
IDENTIFIER = "regexp:[\p{Alpha}_][\p{Alpha}\p{N}_]*"
GROUP_NAME = "regexp:[a-zA-Z][a-zA-Z0-9]*"
Expand Down Expand Up @@ -193,7 +192,7 @@ private character_set_expression_inner ::=

private character_set_inner ::=
character_set_range
| NON_PRINTABLE
| IDENTIFIER
| CODE_POINT
| NEGATION? STRING
| NEGATION? IDENTIFIER
Expand All @@ -204,7 +203,7 @@ character_set_range ::=

private single_char ::=
STRING
| NON_PRINTABLE
| IDENTIFIER
| CODE_POINT

// *****************************************************************************
Expand Down

0 comments on commit 56bfb3e

Please sign in to comment.