From b5154c11374fdf348839a4b3581c308d01e81ba9 Mon Sep 17 00:00:00 2001 From: Thomas Nordahl Pedersen Date: Wed, 13 Jul 2022 16:29:51 +0200 Subject: [PATCH 1/3] PHP 8 upgrades + inspections --- src/Splitter.php | 2 +- src/Tokenizer.php | 122 ++++++++++++++++++++++++---------------------- 2 files changed, 66 insertions(+), 58 deletions(-) diff --git a/src/Splitter.php b/src/Splitter.php index 9255b42..b79a522 100644 --- a/src/Splitter.php +++ b/src/Splitter.php @@ -13,7 +13,7 @@ abstract class Splitter * * @return string[] list of SQL statements */ - public static function split(string $sql, bool $strip_comments = true) + public static function split(string $sql, bool $strip_comments = true): array { $tokens = Tokenizer::tokenize($sql); diff --git a/src/Tokenizer.php b/src/Tokenizer.php index abc1d1a..234d177 100644 --- a/src/Tokenizer.php +++ b/src/Tokenizer.php @@ -16,27 +16,18 @@ */ class Tokenizer { - /** - * @var int - */ - protected $offset = 0; + protected int $offset = 0; - /** - * @var string - */ - protected $input; + protected string $input; - /** - * @var string - */ - protected $delimiter_pattern = ";"; + protected string $delimiter_pattern = ";"; /** * @param string $input * * @return array tree-structure of SQL tokens */ - public static function tokenize(string $input) + public static function tokenize(string $input): array { $parser = new self($input); @@ -51,12 +42,14 @@ protected function __construct(string $input) /** * @return string[] */ - protected function statements() + protected function statements(): array { $statements = []; - while ($result = $this->statement()) { + $result = $this->statement(); + while ($result) { $statements[] = $result; + $result = $this->statement(); } return $statements; @@ -65,7 +58,7 @@ protected function statements() /** * @return string[]|null */ - protected function statement() + protected function statement(): ?array { $this->consume('\s*'); @@ -74,9 +67,11 @@ protected function statement() } $tokens = []; - - while ("" !== $token = $this->token()) { + $token = $this->token(); + while ($token !== "") { if (is_string($token) && preg_match('/^delimiter$/i', $token) === 1) { + // Omit DELIMITER command - it isn't part of SQL statement syntax + $this->consume('[ ]*'); $delimiter = trim($this->consume('.*?[\r\n]+')); @@ -87,61 +82,73 @@ protected function statement() $this->delimiter_pattern = preg_quote($delimiter); - continue; // omits DELIMITER command - it isn't part of SQL statement syntax + } else { + $tokens[] = $token; } - - $tokens[] = $token; + $token = $this->token(); } return $tokens; } /** + * TODO: Refactor this - cyclomatic complexity > 10 + * * @return array|string */ - protected function token() + protected function token(): array|string { if ($this->consume($this->delimiter_pattern)) { return ""; // end of statement } - if ("" !== $token = $this->consume('\w+')) { + $token = $this->consume('\w+'); + if ($token !== "") { return $token; } - if ($token = $this->consume('\s+')) { + $token = $this->consume('\s+'); + if ($token) { return $token; } - if ($token = $this->comment()) { + $token = $this->comment(); + if ($token) { return $token; } - if ($token = $this->consume('\@\w+')) { + $token = $this->consume('\@\w+'); + if ($token) { return $token; // @var } - if ($token = $this->consume(':\w+')) { - return $token; // :var (PDO placeholder) + $token = $this->consume(':\w+'); + if ($token) { + return $token; // PDO placeholder } - if ($token = $this->consume('[+\-\*\/.,!=^|&<>:@%~#]+')) { + $token = $this->consume('[+\-\*\/.,!=^|&<>:@%~#]+'); + if ($token) { return $token; // various operators } - if ($token = $this->consume(';')) { + $token = $this->consume(';'); + if ($token) { return $token; // statement separator (when $delimiter_pattern has been modified) } - if ($token = $this->quoted()) { + $token = $this->quoted(); + if ($token) { return $token; } - if ($tokens = $this->grouped()) { + $tokens = $this->grouped(); + if ($tokens) { return $tokens; } - if ($token = $this->dollarquoted()) { + $token = $this->dollarquoted(); + if ($token) { return $token; } @@ -152,18 +159,17 @@ protected function token() $this->fail("expected SQL token"); } - /** - * @return string|null - */ - protected function comment() + protected function comment(): ?string { - if ($start = $this->consume('--')) { + $start = $this->consume('--'); + if ($start) { $comment = $this->consume("[^\r\n]*"); return "{$start}{$comment}"; } - if ($start = $this->consume('\/\*')) { + $start = $this->consume('\/\*'); + if ($start) { $comment = $this->consume('.*?\*\/'); if ($comment) { @@ -176,12 +182,10 @@ protected function comment() return null; } - /** - * @return string|null - */ - protected function dollarquoted() + protected function dollarquoted(): ?string { - if ($delimiter = $this->consume('\$\w*\$')) { + $delimiter = $this->consume('\$\w*\$'); + if ($delimiter) { $end_delimiter = preg_quote($delimiter); $body = $this->consume(".*?{$end_delimiter}"); @@ -198,10 +202,7 @@ protected function dollarquoted() return null; } - /** - * @return array|null - */ - protected function grouped() + protected function grouped(): ?array { static $end = [ "(" => ")", @@ -209,7 +210,8 @@ protected function grouped() "[" => "]", ]; - if ($opening = $this->consume('[({\[]')) { + $opening = $this->consume('[({\[]'); + if ($opening) { $closing = $end[$opening]; $tokens = [$opening]; @@ -218,12 +220,13 @@ protected function grouped() if ($this->is($closing)) { $tokens[] = $closing; - $this->offset +=1; + $this->offset += 1; return $tokens; } - if ("" !== $token = $this->token()) { + $token = $this->token(); + if ($token !== "") { $tokens[] = $token; } else { $this->fail("expected token or group end: {$closing}"); @@ -237,9 +240,11 @@ protected function grouped() /** * @return string|null */ - protected function quoted() + protected function quoted(): ?string { - if ($quote = $this->consume('[`\'"]')) { + $quote = $this->consume('[`\'"]'); + + if ($quote) { $tokens = [$quote]; $not_quote = '[^' . preg_quote($quote) . '\\\\]*'; @@ -261,7 +266,9 @@ protected function quoted() return implode('', $tokens); } - if ("" !== $token = $this->consume($not_quote)) { + $token = $this->consume($not_quote); + + if ($token !== "") { $tokens[] = $token; continue; @@ -286,7 +293,7 @@ protected function is(string $exact): bool protected function matches(string $pattern): bool { - return preg_match("/{$pattern}/sA", $this->input, $matches, 0, $this->offset) === 1; + return preg_match(pattern: "/{$pattern}/sA", subject: $this->input, offset: $this->offset) === 1; } protected function consume(string $pattern): string @@ -300,8 +307,9 @@ protected function consume(string $pattern): string return ''; } - protected function fail(string $why) + protected function fail(string $why): void { - throw new RuntimeException("unexpected input: {$why}, at: {$this->offset}, got: \"" . substr($this->input, $this->offset, 1) . "\""); + throw new RuntimeException("unexpected input: {$why}, at: {$this->offset}, got: \"" . substr($this->input, + $this->offset, 1) . "\""); } } From 106f5ab078828fa6de836795bdcd128f864b4de7 Mon Sep 17 00:00:00 2001 From: Thomas Nordahl Pedersen Date: Thu, 11 Aug 2022 15:31:30 +0200 Subject: [PATCH 2/3] Removed docblocks that only had redundant typehint info --- src/Tokenizer.php | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Tokenizer.php b/src/Tokenizer.php index 2e2ec1e..e1924d4 100644 --- a/src/Tokenizer.php +++ b/src/Tokenizer.php @@ -106,8 +106,6 @@ protected function statement(): ?array /** * TODO: Refactor this - cyclomatic complexity > 10 - * - * @return array|string */ protected function token(): array|string { @@ -250,9 +248,6 @@ protected function grouped(): ?array return null; } - /** - * @return string|null - */ protected function quoted(): ?string { $quote = $this->consume('[`\'"]'); From 5c21595537c11635e7bb49e49a12fd258735fc3a Mon Sep 17 00:00:00 2001 From: Thomas Nordahl Pedersen Date: Thu, 11 Aug 2022 15:43:09 +0200 Subject: [PATCH 3/3] Documentation --- README.md | 2 -- UPGRADING.md | 5 +++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index fc7421d..aeb8f57 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,7 @@ A simple parser to split SQL (and/or DDL) files into individual SQL queries and strip comments. -[![PHP Version](https://img.shields.io/badge/php-7.4%2B-blue.svg)](https://packagist.org/packages/kodus/sql-split) [![PHP Version](https://img.shields.io/badge/php-8.0%2B-blue.svg)](https://packagist.org/packages/kodus/sql-split) -[![PHP Version](https://img.shields.io/badge/php-8.1%2B-blue.svg)](https://packagist.org/packages/kodus/sql-split) [![Build Status](https://travis-ci.org/kodus/sql-split.svg?branch=master)](https://travis-ci.org/kodus/sql-split) ### Install via Composer diff --git a/UPGRADING.md b/UPGRADING.md index ec59571..28ba991 100644 --- a/UPGRADING.md +++ b/UPGRADING.md @@ -5,6 +5,11 @@ UPGRADING ### PHP 8.0 required From version 2.0.0, `kodus/sql-split` requires at least PHP version 8.0 +### Typehints added +Typehints were added to properties and method signatures of `Kodus\SQLSplit\Tokenizer` and `Kodus\SQLSplit\Splitter`. + +If you've made classes that extend those, you should update these classes with correct typehints as well. + ### Namespace changed From version 2.0.0, the namespace of the `Splitter` and `Tokenizer` has changed from `Kodus\SQL` to `Kodus\SQLSplit`.