Skip to content

Commit

Permalink
optimize Lexer performance (#65)
Browse files Browse the repository at this point in the history
  • Loading branch information
JanTvrdik authored and dg committed Oct 12, 2022
1 parent b821041 commit 5068cf8
Showing 1 changed file with 7 additions and 16 deletions.
23 changes: 7 additions & 16 deletions src/Neon/Lexer.php
Original file line number Diff line number Diff line change
Expand Up @@ -50,28 +50,19 @@ public function tokenize(string $input): TokenStream
{
$input = str_replace("\r", '', $input);
$pattern = '~(' . implode(')|(', self::Patterns) . ')~Amixu';
$res = preg_match_all($pattern, $input, $tokens, PREG_SET_ORDER | PREG_UNMATCHED_AS_NULL);
$res = preg_match_all($pattern, $input, $matches, PREG_SET_ORDER);
if ($res === false) {
throw new Exception('Invalid UTF-8 sequence.');
}

$types = array_keys(self::Patterns);
$offset = 0;
foreach ($tokens as &$token) {
$type = null;
for ($i = 1; $i <= count($types); $i++) {
if (isset($token[$i])) {
$type = $types[$i - 1];
if ($type === Token::Char) {
$type = $token[0];
}

break;
}
}

$token = new Token($token[0], $type);
$offset += strlen($token->value);

$tokens = [];
foreach ($matches as $match) {
$type = $types[count($match) - 2];
$tokens[] = new Token($match[0], $type === Token::Char ? $match[0] : $type);
$offset += strlen($match[0]);
}

$stream = new TokenStream($tokens);
Expand Down

0 comments on commit 5068cf8

Please sign in to comment.