Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@

A simple parser to split SQL (and/or DDL) files into individual SQL queries and strip comments.

[![PHP Version](https://img.shields.io/badge/php-7.4%2B-blue.svg)](https://packagist.org/packages/kodus/sql-split)
[![PHP Version](https://img.shields.io/badge/php-8.0%2B-blue.svg)](https://packagist.org/packages/kodus/sql-split)
[![PHP Version](https://img.shields.io/badge/php-8.1%2B-blue.svg)](https://packagist.org/packages/kodus/sql-split)
[![Build Status](https://travis-ci.org/kodus/sql-split.svg?branch=master)](https://travis-ci.org/kodus/sql-split)

### Install via Composer
Expand Down
5 changes: 5 additions & 0 deletions UPGRADING.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@ UPGRADING
### PHP 8.0 required
From version 2.0.0, `kodus/sql-split` requires at least PHP version 8.0

### Typehints added
Typehints were added to properties and method signatures of `Kodus\SQLSplit\Tokenizer` and `Kodus\SQLSplit\Splitter`.

If you've made classes that extend those, you should update these classes with correct typehints as well.

### Namespace changed
From version 2.0.0, the namespace of the `Splitter` and `Tokenizer` has changed from `Kodus\SQL` to `Kodus\SQLSplit`.

Expand Down
2 changes: 1 addition & 1 deletion src/Splitter.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ abstract class Splitter
*
* @return string[] list of SQL statements
*/
public static function split(string $sql, bool $strip_comments = true)
public static function split(string $sql, bool $strip_comments = true): array
{
$tokens = Tokenizer::tokenize($sql);

Expand Down
119 changes: 61 additions & 58 deletions src/Tokenizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,18 @@
*/
class Tokenizer
{
/**
* @var int
*/
protected $offset = 0;
protected int $offset = 0;

/**
* @var string
*/
protected $input;
protected string $input;

/**
* @var string
*/
protected $delimiter_pattern = ";";
protected string $delimiter_pattern = ";";

/**
* @param string $input
*
* @return array tree-structure of SQL tokens
*/
public static function tokenize(string $input)
public static function tokenize(string $input): array
{
$parser = new self($input);

Expand All @@ -51,7 +42,7 @@ protected function __construct(string $input)
/**
* @return string[]
*/
protected function statements()
protected function statements(): array
{
$statements = [];

Expand All @@ -62,6 +53,7 @@ protected function statements()
}
} while ($result !== null);


return $statements;
}

Expand All @@ -74,7 +66,7 @@ protected function statements()
*
* @return string[]|null
*/
protected function statement()
protected function statement(): ?array
{
$this->consume('\s*');

Expand All @@ -84,12 +76,15 @@ protected function statement()

$tokens = [];

while ("" !== $token = $this->token()) {
$token = $this->token();
while ($token !== "") {
/**
* DEV NOTE: This checks for DELIMITER statement, that changes delimiter from ; to something else.
* If detected, it will extract the new DELIMITER and reassign $this->delimiter_pattern.
*/
if (is_string($token) && preg_match('/^delimiter$/i', $token) === 1) {
// Omit DELIMITER command - it isn't part of SQL statement syntax

$this->consume('[ ]*');

$delimiter = trim($this->consume('.*?[\r\n]+'));
Expand All @@ -100,61 +95,71 @@ protected function statement()

$this->delimiter_pattern = preg_quote($delimiter);

continue; // omits DELIMITER command - it isn't part of SQL statement syntax
} else {
$tokens[] = $token;
}

$tokens[] = $token;
$token = $this->token();
}

return $tokens;
}

/**
* @return array|string
* TODO: Refactor this - cyclomatic complexity > 10
Comment thread
thomasnordahl-dk marked this conversation as resolved.
*/
protected function token()
protected function token(): array|string
{
if ($this->consume($this->delimiter_pattern)) {
return ""; // end of statement
}

if ("" !== $token = $this->consume('\w+')) {
$token = $this->consume('\w+');
if ($token !== "") {
return $token;
}

if ($token = $this->consume('\s+')) {
$token = $this->consume('\s+');
if ($token) {
return $token;
}

if ($token = $this->comment()) {
$token = $this->comment();
if ($token) {
return $token;
}

if ($token = $this->consume('\@\w+')) {
$token = $this->consume('\@\w+');
if ($token) {
return $token; // @var
}

if ($token = $this->consume(':\w+')) {
return $token; // :var (PDO placeholder)
$token = $this->consume(':\w+');
if ($token) {
return $token; // PDO placeholder
}

if ($token = $this->consume('[+\-\*\/.,!=^|&<>:@%~#]+')) {
$token = $this->consume('[+\-\*\/.,!=^|&<>:@%~#]+');
if ($token) {
return $token; // various operators
}

if ($token = $this->consume(';')) {
$token = $this->consume(';');
if ($token) {
return $token; // statement separator (when $delimiter_pattern has been modified)
}

if ($token = $this->quoted()) {
$token = $this->quoted();
if ($token) {
return $token;
}

if ($tokens = $this->grouped()) {
$tokens = $this->grouped();
if ($tokens) {
return $tokens;
}

if ($token = $this->dollarquoted()) {
$token = $this->dollarquoted();
if ($token) {
return $token;
}

Expand All @@ -165,18 +170,17 @@ protected function token()
$this->fail("expected SQL token");
}

/**
* @return string|null
*/
protected function comment()
protected function comment(): ?string
{
if ($start = $this->consume('--')) {
$start = $this->consume('--');
if ($start) {
$comment = $this->consume("[^\r\n]*");

return "{$start}{$comment}";
}

if ($start = $this->consume('\/\*')) {
$start = $this->consume('\/\*');
if ($start) {
$comment = $this->consume('.*?\*\/');

if ($comment) {
Expand All @@ -189,12 +193,10 @@ protected function comment()
return null;
}

/**
* @return string|null
*/
protected function dollarquoted()
protected function dollarquoted(): ?string
{
if ($delimiter = $this->consume('\$\w*\$')) {
$delimiter = $this->consume('\$\w*\$');
if ($delimiter) {
$end_delimiter = preg_quote($delimiter);

$body = $this->consume(".*?{$end_delimiter}");
Expand All @@ -211,18 +213,16 @@ protected function dollarquoted()
return null;
}

/**
* @return array|null
*/
protected function grouped()
protected function grouped(): ?array
{
static $end = [
"(" => ")",
"{" => "}",
"[" => "]",
];

if ($opening = $this->consume('[({\[]')) {
$opening = $this->consume('[({\[]');
if ($opening) {
$closing = $end[$opening];

$tokens = [$opening];
Expand All @@ -236,7 +236,8 @@ protected function grouped()
return $tokens;
}

if ("" !== $token = $this->token()) {
$token = $this->token();
if ($token !== "") {
$tokens[] = $token;
} else {
$this->fail("expected token or group end: {$closing}");
Expand All @@ -247,12 +248,11 @@ protected function grouped()
return null;
}

/**
* @return string|null
*/
protected function quoted()
protected function quoted(): ?string
{
if ($quote = $this->consume('[`\'"]')) {
$quote = $this->consume('[`\'"]');

if ($quote) {
$tokens = [$quote];

$not_quote = '[^' . preg_quote($quote) . '\\\\]*';
Expand All @@ -274,7 +274,9 @@ protected function quoted()
return implode('', $tokens);
}

if ("" !== $token = $this->consume($not_quote)) {
$token = $this->consume($not_quote);

if ($token !== "") {
$tokens[] = $token;

continue;
Expand All @@ -299,7 +301,7 @@ protected function is(string $exact): bool

protected function matches(string $pattern): bool
{
return preg_match("/{$pattern}/sA", $this->input, $matches, 0, $this->offset) === 1;
return preg_match(pattern: "/{$pattern}/sA", subject: $this->input, offset: $this->offset) === 1;
}

protected function consume(string $pattern): string
Expand All @@ -313,8 +315,9 @@ protected function consume(string $pattern): string
return '';
}

protected function fail(string $why)
protected function fail(string $why): void
{
throw new RuntimeException("unexpected input: {$why}, at: {$this->offset}, got: \"" . substr($this->input, $this->offset, 1) . "\"");
throw new RuntimeException("unexpected input: {$why}, at: {$this->offset}, got: \"" . substr($this->input,
$this->offset, 1) . "\"");
}
}