From f319c5d4e7004390c8f86257147c38a6441cacf3 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Tue, 25 Jun 2024 17:45:25 +0200 Subject: [PATCH 01/21] RegexArrayShapeMatcher - when all groups are optional return a more precise union, instead of a shape with optional offsets --- src/Type/Php/RegexArrayShapeMatcher.php | 16 ++++++++++++++-- .../Analyser/nsrt/preg_match_shapes_php.php | 15 ++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index bee04770b4..d9eb992f22 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -16,6 +16,7 @@ use PHPStan\Type\StringType; use PHPStan\Type\Type; use PHPStan\Type\TypeCombinator; +use PHPStan\Type\UnionType; use function array_reverse; use function count; use function in_array; @@ -97,13 +98,14 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched $trailingOptionals++; } - for ($i = 0; $i < count($captureGroups); $i++) { + $countGroups = count($captureGroups); + for ($i = 0; $i < $countGroups; $i++) { $captureGroup = $captureGroups[$i]; if (!$wasMatched->yes()) { $optional = true; } else { - if ($i < count($captureGroups) - $trailingOptionals) { + if ($i < $countGroups - $trailingOptionals) { $optional = false; } else { $optional = $captureGroup->isOptional(); @@ -125,6 +127,16 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched ); } + // when all groups are optional return a more precise union, instead of a shape with optional offsets + if ($countGroups === $trailingOptionals && $wasMatched->yes()) { + $overallType = $builder->getArray(); + return TypeCombinator::union( + new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()]), + // same shape, but without optional keys + new ConstantArrayType($overallType->getKeyTypes(), $overallType->getValueTypes()), + ); + } + return $builder->getArray(); } diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php index 3d7194d994..0f759b2f2d 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php @@ -118,9 +118,9 @@ function doOffsetCapture(string $s): void { function doUnmatchedAsNull(string $s): void { if (preg_match('/(foo)?(bar)?(baz)?/', $s, $matches, PREG_UNMATCHED_AS_NULL)) { - assertType('array{0: string, 1?: string|null, 2?: string|null, 3?: string|null}', $matches); + assertType('array{string, string|null, string|null, string|null}|array{string}', $matches); } - assertType('array{}|array{0: string, 1?: string|null, 2?: string|null, 3?: string|null}', $matches); + assertType('array{}|array{string, string|null, string|null, string|null}|array{string}', $matches); } function doUnknownFlags(string $s, int $flags): void { @@ -252,7 +252,7 @@ function doFoo(string $row): void assertType('array{0: string, 1: string, 2?: string}', $matches); } if (preg_match('~^(a(b)?)?$~', $row, $matches) === 1) { - assertType('array{0: string, 1?: string, 2?: string}', $matches); + assertType('array{string, string, string}|array{string}', $matches); } } @@ -273,3 +273,12 @@ function doFoo3(string $row): void assertType('array{string, string, string, string, string, string, string}', $matches); } + +function allGroupsOptional(string $size): void +{ + if (preg_match('~^a\.b(c(\d+))?d~', $size, $matches) !== 1) { + throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); + } + + assertType('array{string, string, string}|array{string}', $matches); +} From d68b3aae0ddb6c058b721a3669d619ad6cd4e765 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Tue, 25 Jun 2024 17:47:38 +0200 Subject: [PATCH 02/21] Update RegexArrayShapeMatcher.php --- src/Type/Php/RegexArrayShapeMatcher.php | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index d9eb992f22..9a4e5255f2 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -16,7 +16,6 @@ use PHPStan\Type\StringType; use PHPStan\Type\Type; use PHPStan\Type\TypeCombinator; -use PHPStan\Type\UnionType; use function array_reverse; use function count; use function in_array; From 8760b9b9a1059b1c7b109f1f729ca9cef445c878 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Tue, 25 Jun 2024 17:59:17 +0200 Subject: [PATCH 03/21] fix phpstan errors --- src/Type/Php/RegexArrayShapeMatcher.php | 27 +++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index 9a4e5255f2..4c09c619f2 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -126,17 +126,32 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched ); } + $overallType = $builder->getArray(); + $constantArrays = $overallType->getConstantArrays(); + // when all groups are optional return a more precise union, instead of a shape with optional offsets - if ($countGroups === $trailingOptionals && $wasMatched->yes()) { - $overallType = $builder->getArray(); - return TypeCombinator::union( + if ( + $countGroups === $trailingOptionals + && $wasMatched->yes() + && count($constantArrays) > 0 + ) { + $result = [ + // first item in matches contains the overall match. new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()]), + ]; + + foreach ($constantArrays as $constantArray) { // same shape, but without optional keys - new ConstantArrayType($overallType->getKeyTypes(), $overallType->getValueTypes()), - ); + $result[] = new ConstantArrayType( + $constantArray->getKeyTypes(), + $constantArray->getValueTypes(), + ); + } + + return TypeCombinator::union(...$result); } - return $builder->getArray(); + return $overallType; } private function getKeyType(int|string $key): Type From 77b4b55e9fa4fbb1736472a7dbe0517e81b0dda2 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Tue, 25 Jun 2024 18:04:20 +0200 Subject: [PATCH 04/21] more tests --- tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php index 0f759b2f2d..067da49328 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php @@ -279,6 +279,15 @@ function allGroupsOptional(string $size): void if (preg_match('~^a\.b(c(\d+))?d~', $size, $matches) !== 1) { throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); } + assertType('array{string, string, string}|array{string}', $matches); + if (preg_match('~^a\.(b)?(c)?d~', $size, $matches) !== 1) { + throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); + } + assertType('array{string, string, string}|array{string}', $matches); + + if (preg_match('~^a\.(?:(b)|(c))?d~', $size, $matches) !== 1) { + throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); + } assertType('array{string, string, string}|array{string}', $matches); } From 019fdd163b5da82a64ecd40a015ddd4a754b1c9b Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Tue, 25 Jun 2024 18:08:28 +0200 Subject: [PATCH 05/21] simplify --- src/Type/Php/RegexArrayShapeMatcher.php | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index 4c09c619f2..a7263263bb 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -127,19 +127,21 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched } $overallType = $builder->getArray(); - $constantArrays = $overallType->getConstantArrays(); // when all groups are optional return a more precise union, instead of a shape with optional offsets if ( $countGroups === $trailingOptionals && $wasMatched->yes() - && count($constantArrays) > 0 ) { + $constantArrays = $overallType->getConstantArrays(); + if ($constantArrays === []) { + return $overallType; + } + $result = [ // first item in matches contains the overall match. new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()]), ]; - foreach ($constantArrays as $constantArray) { // same shape, but without optional keys $result[] = new ConstantArrayType( From 2b4b8a8f5e81f81a2ca13c1918e9db3541d6ab9a Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Tue, 25 Jun 2024 18:14:15 +0200 Subject: [PATCH 06/21] Update preg_match_shapes.php --- .../{preg_match_shapes_php.php => preg_match_shapes.php} | 5 ----- 1 file changed, 5 deletions(-) rename tests/PHPStan/Analyser/nsrt/{preg_match_shapes_php.php => preg_match_shapes.php} (98%) diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php similarity index 98% rename from tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php rename to tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 067da49328..1360c9a0d4 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -285,9 +285,4 @@ function allGroupsOptional(string $size): void throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); } assertType('array{string, string, string}|array{string}', $matches); - - if (preg_match('~^a\.(?:(b)|(c))?d~', $size, $matches) !== 1) { - throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); - } - assertType('array{string, string, string}|array{string}', $matches); } From 02dcd85ae5659cdf0c3c7819f01e8166a278bf83 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sat, 29 Jun 2024 09:03:39 +0200 Subject: [PATCH 07/21] Discard changes to src/Type/Php/RegexArrayShapeMatcher.php --- src/Type/Php/RegexArrayShapeMatcher.php | 34 +++---------------------- 1 file changed, 3 insertions(+), 31 deletions(-) diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index a7263263bb..bee04770b4 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -97,14 +97,13 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched $trailingOptionals++; } - $countGroups = count($captureGroups); - for ($i = 0; $i < $countGroups; $i++) { + for ($i = 0; $i < count($captureGroups); $i++) { $captureGroup = $captureGroups[$i]; if (!$wasMatched->yes()) { $optional = true; } else { - if ($i < $countGroups - $trailingOptionals) { + if ($i < count($captureGroups) - $trailingOptionals) { $optional = false; } else { $optional = $captureGroup->isOptional(); @@ -126,34 +125,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched ); } - $overallType = $builder->getArray(); - - // when all groups are optional return a more precise union, instead of a shape with optional offsets - if ( - $countGroups === $trailingOptionals - && $wasMatched->yes() - ) { - $constantArrays = $overallType->getConstantArrays(); - if ($constantArrays === []) { - return $overallType; - } - - $result = [ - // first item in matches contains the overall match. - new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()]), - ]; - foreach ($constantArrays as $constantArray) { - // same shape, but without optional keys - $result[] = new ConstantArrayType( - $constantArray->getKeyTypes(), - $constantArray->getValueTypes(), - ); - } - - return TypeCombinator::union(...$result); - } - - return $overallType; + return $builder->getArray(); } private function getKeyType(int|string $key): Type From 147677e1c7162ac53585a389a6066cb2017dd47a Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sat, 29 Jun 2024 09:27:26 +0200 Subject: [PATCH 08/21] refactor --- src/Type/Php/RegexArrayShapeMatcher.php | 5 +++-- src/Type/Php/RegexCapturingGroup.php | 21 +++++++++++++++------ 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index bee04770b4..d9b73a6a88 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -193,12 +193,13 @@ private function parseGroups(string $regex): ?array private function walkRegexAst(TreeNode $ast, int $inAlternation, int $inOptionalQuantification, array &$capturings): void { if ($ast->getId() === '#capturing') { - $capturings[] = RegexCapturingGroup::unnamed($inAlternation > 0 || $inOptionalQuantification > 0); + $capturings[] = RegexCapturingGroup::unnamed($inAlternation > 0, $inOptionalQuantification > 0); } elseif ($ast->getId() === '#namedcapturing') { $name = $ast->getChild(0)->getValue()['value']; $capturings[] = RegexCapturingGroup::named( $name, - $inAlternation > 0 || $inOptionalQuantification > 0, + $inAlternation > 0, + $inOptionalQuantification > 0, ); } diff --git a/src/Type/Php/RegexCapturingGroup.php b/src/Type/Php/RegexCapturingGroup.php index bc0b6bfdb0..b3a6387f90 100644 --- a/src/Type/Php/RegexCapturingGroup.php +++ b/src/Type/Php/RegexCapturingGroup.php @@ -5,23 +5,32 @@ class RegexCapturingGroup { - private function __construct(private ?string $name, private bool $optional) + private function __construct( + private ?string $name, + private bool $inAlternation, + private bool $inOptionalQuantification, + ) { } - public static function unnamed(bool $optional): self + public static function unnamed(bool $inAlternation, bool $inOptionalQuantification): self { - return new self(null, $optional); + return new self(null, $inAlternation, $inOptionalQuantification); } - public static function named(string $name, bool $optional): self + public static function named(string $name, bool $inAlternation, bool $inOptionalQuantification): self { - return new self($name, $optional); + return new self($name, $inAlternation, $inOptionalQuantification); } public function isOptional(): bool { - return $this->optional; + return $this->inAlternation || $this->inOptionalQuantification; + } + + public function inAlternation(): bool + { + return $this->inAlternation; } /** @phpstan-assert-if-true !null $this->getName() */ From 8aaf8fd1492cede8388866202cf35bd04230a76c Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sat, 29 Jun 2024 09:27:57 +0200 Subject: [PATCH 09/21] extract buildArrayType --- src/Type/Php/RegexArrayShapeMatcher.php | 27 ++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index d9b73a6a88..a2462048d1 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -79,8 +79,25 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched return null; } - $builder = ConstantArrayTypeBuilder::createEmpty(); + $trailingOptionals = 0; + foreach (array_reverse($captureGroups) as $captureGroup) { + if (!$captureGroup->isOptional()) { + break; + } + $trailingOptionals++; + } + $valueType = $this->getValueType($flags ?? 0); + return $this->buildArrayType($captureGroups, $valueType, $wasMatched, $trailingOptionals); + } + + private function buildArrayType( + array $captureGroups, + Type $valueType, + TrinaryLogic $wasMatched, + int $trailingOptionals + ): Type { + $builder = ConstantArrayTypeBuilder::createEmpty(); // first item in matches contains the overall match. $builder->setOffsetValueType( @@ -89,14 +106,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched !$wasMatched->yes(), ); - $trailingOptionals = 0; - foreach (array_reverse($captureGroups) as $captureGroup) { - if (!$captureGroup->isOptional()) { - break; - } - $trailingOptionals++; - } - for ($i = 0; $i < count($captureGroups); $i++) { $captureGroup = $captureGroups[$i]; From 1f05ea28ebb4363a570a1d048a4e20f2c894137b Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sat, 29 Jun 2024 11:37:05 +0200 Subject: [PATCH 10/21] detect top level capturing groups --- src/Type/Php/RegexArrayShapeMatcher.php | 15 +++++++++++---- src/Type/Php/RegexCapturingGroup.php | 13 +++++++------ 2 files changed, 18 insertions(+), 10 deletions(-) diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index a2462048d1..f902853600 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -191,7 +191,7 @@ private function parseGroups(string $regex): ?array } $capturings = []; - $this->walkRegexAst($ast, 0, 0, $capturings); + $this->walkRegexAst($ast, 0, 0, false, $capturings); return $capturings; } @@ -199,17 +199,24 @@ private function parseGroups(string $regex): ?array /** * @param list $capturings */ - private function walkRegexAst(TreeNode $ast, int $inAlternation, int $inOptionalQuantification, array &$capturings): void + private function walkRegexAst(TreeNode $ast, int $inAlternation, int $inOptionalQuantification, bool $inCapturing, array &$capturings): void { if ($ast->getId() === '#capturing') { - $capturings[] = RegexCapturingGroup::unnamed($inAlternation > 0, $inOptionalQuantification > 0); + $capturings[] = RegexCapturingGroup::unnamed( + $inAlternation > 0, + $inOptionalQuantification > 0, + !$inCapturing + ); + $inCapturing = true; } elseif ($ast->getId() === '#namedcapturing') { $name = $ast->getChild(0)->getValue()['value']; $capturings[] = RegexCapturingGroup::named( $name, $inAlternation > 0, $inOptionalQuantification > 0, + !$inCapturing ); + $inCapturing = true; } if ($ast->getId() === '#alternation') { @@ -230,7 +237,7 @@ private function walkRegexAst(TreeNode $ast, int $inAlternation, int $inOptional } foreach ($ast->getChildren() as $child) { - $this->walkRegexAst($child, $inAlternation, $inOptionalQuantification, $capturings); + $this->walkRegexAst($child, $inAlternation, $inOptionalQuantification, $inCapturing, $capturings); } } diff --git a/src/Type/Php/RegexCapturingGroup.php b/src/Type/Php/RegexCapturingGroup.php index b3a6387f90..4ae4e64c75 100644 --- a/src/Type/Php/RegexCapturingGroup.php +++ b/src/Type/Php/RegexCapturingGroup.php @@ -9,18 +9,19 @@ private function __construct( private ?string $name, private bool $inAlternation, private bool $inOptionalQuantification, + private bool $isTopLevel ) { } - public static function unnamed(bool $inAlternation, bool $inOptionalQuantification): self + public static function unnamed(bool $inAlternation, bool $inOptionalQuantification, bool $isTopLevel): self { - return new self(null, $inAlternation, $inOptionalQuantification); + return new self(null, $inAlternation, $inOptionalQuantification, $isTopLevel); } - public static function named(string $name, bool $inAlternation, bool $inOptionalQuantification): self + public static function named(string $name, bool $inAlternation, bool $inOptionalQuantification, bool $isTopLevel): self { - return new self($name, $inAlternation, $inOptionalQuantification); + return new self($name, $inAlternation, $inOptionalQuantification, $isTopLevel); } public function isOptional(): bool @@ -28,9 +29,9 @@ public function isOptional(): bool return $this->inAlternation || $this->inOptionalQuantification; } - public function inAlternation(): bool + public function isTopLevel(): bool { - return $this->inAlternation; + return $this->isTopLevel; } /** @phpstan-assert-if-true !null $this->getName() */ From 27485e9cdff5027cd8939eea445cf68fd931276e Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sat, 29 Jun 2024 11:37:10 +0200 Subject: [PATCH 11/21] refactor --- src/Type/Php/RegexArrayShapeMatcher.php | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index f902853600..59fb959adf 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -106,13 +106,14 @@ private function buildArrayType( !$wasMatched->yes(), ); - for ($i = 0; $i < count($captureGroups); $i++) { + $countGroups = count($captureGroups); + for ($i = 0; $i < $countGroups; $i++) { $captureGroup = $captureGroups[$i]; if (!$wasMatched->yes()) { $optional = true; } else { - if ($i < count($captureGroups) - $trailingOptionals) { + if ($i < $countGroups - $trailingOptionals) { $optional = false; } else { $optional = $captureGroup->isOptional(); From bea500e6b4a28eee507ff85e4bf6a50999786220 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sat, 29 Jun 2024 15:16:35 +0200 Subject: [PATCH 12/21] fix test --- tests/PHPStan/Analyser/nsrt/preg_match_shapes.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 1360c9a0d4..210a682cdd 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -118,7 +118,7 @@ function doOffsetCapture(string $s): void { function doUnmatchedAsNull(string $s): void { if (preg_match('/(foo)?(bar)?(baz)?/', $s, $matches, PREG_UNMATCHED_AS_NULL)) { - assertType('array{string, string|null, string|null, string|null}|array{string}', $matches); + assertType('array{0: string, 1?: string|null, 2?: string|null, 3?: string|null}', $matches); } assertType('array{}|array{string, string|null, string|null, string|null}|array{string}', $matches); } From cbf42634243214297407b138d4df8272e2595a2c Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sat, 29 Jun 2024 15:19:29 +0200 Subject: [PATCH 13/21] intermediate --- src/Type/Php/RegexArrayShapeMatcher.php | 171 ++++++++++++++++++++---- src/Type/Php/RegexCapturingGroup.php | 17 ++- 2 files changed, 158 insertions(+), 30 deletions(-) diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index 59fb959adf..c9c665323c 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -7,6 +7,7 @@ use Hoa\Compiler\Llk\TreeNode; use Hoa\Exception\Exception; use Hoa\File\Read; +use PHPStan\Internal\CombinationsHelper; use PHPStan\TrinaryLogic; use PHPStan\Type\Constant\ConstantArrayType; use PHPStan\Type\Constant\ConstantArrayTypeBuilder; @@ -16,6 +17,7 @@ use PHPStan\Type\StringType; use PHPStan\Type\Type; use PHPStan\Type\TypeCombinator; +use PHPStan\Type\UnionType; use function array_reverse; use function count; use function in_array; @@ -73,30 +75,79 @@ public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $was */ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched): ?Type { - $captureGroups = $this->parseGroups($regex); - if ($captureGroups === null) { + $parseResult = $this->parseGroups($regex); + if ($parseResult === null) { // regex could not be parsed by Hoa/Regex return null; } + [$groupList, $combiGroupsIds] = $parseResult; $trailingOptionals = 0; - foreach (array_reverse($captureGroups) as $captureGroup) { + foreach (array_reverse($groupList) as $captureGroup) { if (!$captureGroup->isOptional()) { break; } $trailingOptionals++; } + $overallType = []; $valueType = $this->getValueType($flags ?? 0); - return $this->buildArrayType($captureGroups, $valueType, $wasMatched, $trailingOptionals); + if ( + count($groupList) === 1 + && !$groupList[0]->inAlternation() + && $wasMatched->yes() + ) { + $combiType = $this->buildArrayType( + $groupList, + $combiGroupsIds[0], + $valueType, + $wasMatched, + $trailingOptionals + ); + + $constantArrays = $combiType->getConstantArrays(); + if ($constantArrays === []) { + return $combiType; + } + + // first item in matches contains the overall match. + $overallType[] = new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()]); + + foreach ($constantArrays as $constantArray) { + // same shape, but without optional keys + $overallType[] = new ConstantArrayType( + $constantArray->getKeyTypes(), + $constantArray->getValueTypes(), + ); + } + } else { + foreach($combiGroupsIds as $combiIds) { + $combiType = $this->buildArrayType( + $groupList, + $combiIds, + $valueType, + $wasMatched, + $trailingOptionals + ); + $overallType[] = $combiType; + } + } + + return TypeCombinator::union(...$overallType); } + /** + * @param list $combiIds + * @param list|null $captureGroups + */ private function buildArrayType( array $captureGroups, + array $combiIds, Type $valueType, TrinaryLogic $wasMatched, - int $trailingOptionals - ): Type { + int $trailingOptionals, + ): Type + { $builder = ConstantArrayTypeBuilder::createEmpty(); // first item in matches contains the overall match. @@ -113,7 +164,10 @@ private function buildArrayType( if (!$wasMatched->yes()) { $optional = true; } else { - if ($i < $countGroups - $trailingOptionals) { + if ( + $i < $countGroups - $trailingOptionals + || !in_array($captureGroup->getId(), $combiIds, true) + ) { $optional = false; } else { $optional = $captureGroup->isOptional(); @@ -176,7 +230,7 @@ private function getValueType(int $flags): Type } /** - * @return list|null + * @return array{list, list>}|null */ private function parseGroups(string $regex): ?array { @@ -191,54 +245,113 @@ private function parseGroups(string $regex): ?array return null; } - $capturings = []; - $this->walkRegexAst($ast, 0, 0, false, $capturings); + $capturingGroups = []; + $groupCombinations = []; + $alternationIndex = -1; + $this->walkRegexAst( + $ast, + false, + false, + false, + $alternationIndex, + 0, + $capturingGroups, + $groupCombinations + ); - return $capturings; + $allCombinations = iterator_to_array(CombinationsHelper::combinations($groupCombinations)); + $combiGroupsIds = []; + foreach($allCombinations as $combination) { + $combi = []; + foreach($combination as $groupIds) { + foreach($groupIds as $groupId) { + $combi[] = $groupId; + } + } + $combiGroupsIds[] = $combi; + } + + return [$capturingGroups, $combiGroupsIds]; } /** - * @param list $capturings + * @param list $capturingGroups */ - private function walkRegexAst(TreeNode $ast, int $inAlternation, int $inOptionalQuantification, bool $inCapturing, array &$capturings): void + private function walkRegexAst( + TreeNode $ast, + bool $inAlternation, + bool $inOptionalQuantification, + bool $inCapturing, + int &$alternationIndex, + int $combinationIndex, + array &$capturingGroups, + array &$groupCombinations + ): void { + $group = null; if ($ast->getId() === '#capturing') { - $capturings[] = RegexCapturingGroup::unnamed( - $inAlternation > 0, - $inOptionalQuantification > 0, - !$inCapturing + $group = RegexCapturingGroup::unnamed( + $inAlternation, + $inOptionalQuantification, + !$inCapturing, ); $inCapturing = true; } elseif ($ast->getId() === '#namedcapturing') { $name = $ast->getChild(0)->getValue()['value']; - $capturings[] = RegexCapturingGroup::named( + $group = RegexCapturingGroup::named( $name, - $inAlternation > 0, - $inOptionalQuantification > 0, - !$inCapturing + $inAlternation, + $inOptionalQuantification, + !$inCapturing, ); $inCapturing = true; } - if ($ast->getId() === '#alternation') { - $inAlternation++; - } - if ($ast->getId() === '#quantification') { $lastChild = $ast->getChild($ast->getChildrenNumber() - 1); $value = $lastChild->getValue(); if ($value['token'] === 'n_to_m' && str_contains($value['value'], '{0,')) { - $inOptionalQuantification++; + $inOptionalQuantification = true; } elseif ($value['token'] === 'zero_or_one') { - $inOptionalQuantification++; + $inOptionalQuantification = true; } elseif ($value['token'] === 'zero_or_more') { - $inOptionalQuantification++; + $inOptionalQuantification = true; } } + if ($ast->getId() === '#alternation') { + $alternationIndex++; + $inAlternation = true; + } + + if ($group !== null) { + $capturingGroups[] = $group; + + if (!array_key_exists($alternationIndex, $groupCombinations)) { + $groupCombinations[$alternationIndex] = []; + } + if (!array_key_exists($combinationIndex, $groupCombinations[$alternationIndex])) { + $groupCombinations[$alternationIndex][$combinationIndex] = []; + } + $groupCombinations[$alternationIndex][$combinationIndex][] = $group->getId(); + } + foreach ($ast->getChildren() as $child) { - $this->walkRegexAst($child, $inAlternation, $inOptionalQuantification, $inCapturing, $capturings); + $this->walkRegexAst( + $child, + $inAlternation, + $inOptionalQuantification, + $inCapturing, + $alternationIndex, + $combinationIndex, + $capturingGroups, + $groupCombinations, + ); + + if ($ast->getId() === '#alternation') { + $combinationIndex++; + } } } diff --git a/src/Type/Php/RegexCapturingGroup.php b/src/Type/Php/RegexCapturingGroup.php index 4ae4e64c75..4371b9b929 100644 --- a/src/Type/Php/RegexCapturingGroup.php +++ b/src/Type/Php/RegexCapturingGroup.php @@ -4,14 +4,19 @@ class RegexCapturingGroup { + private int $id; + + static private int $idGenerator = 1; private function __construct( private ?string $name, private bool $inAlternation, private bool $inOptionalQuantification, - private bool $isTopLevel + private bool $isTopLevel, ) { + $this->id = self::$idGenerator; + self::$idGenerator++; } public static function unnamed(bool $inAlternation, bool $inOptionalQuantification, bool $isTopLevel): self @@ -24,11 +29,21 @@ public static function named(string $name, bool $inAlternation, bool $inOptional return new self($name, $inAlternation, $inOptionalQuantification, $isTopLevel); } + public function getId(): int + { + return $this->id; + } + public function isOptional(): bool { return $this->inAlternation || $this->inOptionalQuantification; } + public function inAlternation(): bool + { + return $this->inAlternation; + } + public function isTopLevel(): bool { return $this->isTopLevel; From 7cce3a8cda874519512b75c0da7822049174e135 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sat, 29 Jun 2024 15:27:52 +0200 Subject: [PATCH 14/21] simplify --- src/Type/Php/RegexArrayShapeMatcher.php | 68 ++++--------------- .../Analyser/nsrt/preg_match_shapes.php | 2 +- 2 files changed, 13 insertions(+), 57 deletions(-) diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index c9c665323c..33244bb7a8 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -75,12 +75,11 @@ public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $was */ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched): ?Type { - $parseResult = $this->parseGroups($regex); - if ($parseResult === null) { + $groupList = $this->parseGroups($regex); + if ($groupList === null) { // regex could not be parsed by Hoa/Regex return null; } - [$groupList, $combiGroupsIds] = $parseResult; $trailingOptionals = 0; foreach (array_reverse($groupList) as $captureGroup) { @@ -99,7 +98,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched ) { $combiType = $this->buildArrayType( $groupList, - $combiGroupsIds[0], $valueType, $wasMatched, $trailingOptionals @@ -120,20 +118,16 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched $constantArray->getValueTypes(), ); } - } else { - foreach($combiGroupsIds as $combiIds) { - $combiType = $this->buildArrayType( - $groupList, - $combiIds, - $valueType, - $wasMatched, - $trailingOptionals - ); - $overallType[] = $combiType; - } + + return TypeCombinator::union(...$overallType); } - return TypeCombinator::union(...$overallType); + return $this->buildArrayType( + $groupList, + $valueType, + $wasMatched, + $trailingOptionals + ); } /** @@ -142,7 +136,6 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched */ private function buildArrayType( array $captureGroups, - array $combiIds, Type $valueType, TrinaryLogic $wasMatched, int $trailingOptionals, @@ -166,7 +159,6 @@ private function buildArrayType( } else { if ( $i < $countGroups - $trailingOptionals - || !in_array($captureGroup->getId(), $combiIds, true) ) { $optional = false; } else { @@ -246,32 +238,15 @@ private function parseGroups(string $regex): ?array } $capturingGroups = []; - $groupCombinations = []; - $alternationIndex = -1; $this->walkRegexAst( $ast, false, false, false, - $alternationIndex, - 0, - $capturingGroups, - $groupCombinations + $capturingGroups ); - $allCombinations = iterator_to_array(CombinationsHelper::combinations($groupCombinations)); - $combiGroupsIds = []; - foreach($allCombinations as $combination) { - $combi = []; - foreach($combination as $groupIds) { - foreach($groupIds as $groupId) { - $combi[] = $groupId; - } - } - $combiGroupsIds[] = $combi; - } - - return [$capturingGroups, $combiGroupsIds]; + return $capturingGroups; } /** @@ -282,10 +257,7 @@ private function walkRegexAst( bool $inAlternation, bool $inOptionalQuantification, bool $inCapturing, - int &$alternationIndex, - int $combinationIndex, array &$capturingGroups, - array &$groupCombinations ): void { $group = null; @@ -321,20 +293,11 @@ private function walkRegexAst( } if ($ast->getId() === '#alternation') { - $alternationIndex++; $inAlternation = true; } if ($group !== null) { $capturingGroups[] = $group; - - if (!array_key_exists($alternationIndex, $groupCombinations)) { - $groupCombinations[$alternationIndex] = []; - } - if (!array_key_exists($combinationIndex, $groupCombinations[$alternationIndex])) { - $groupCombinations[$alternationIndex][$combinationIndex] = []; - } - $groupCombinations[$alternationIndex][$combinationIndex][] = $group->getId(); } foreach ($ast->getChildren() as $child) { @@ -343,15 +306,8 @@ private function walkRegexAst( $inAlternation, $inOptionalQuantification, $inCapturing, - $alternationIndex, - $combinationIndex, $capturingGroups, - $groupCombinations, ); - - if ($ast->getId() === '#alternation') { - $combinationIndex++; - } } } diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 210a682cdd..f6b028cbcb 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -284,5 +284,5 @@ function allGroupsOptional(string $size): void if (preg_match('~^a\.(b)?(c)?d~', $size, $matches) !== 1) { throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); } - assertType('array{string, string, string}|array{string}', $matches); + assertType('array{0: string, 1?: string, 2?: string}', $matches); } From 024b51ef72d7de4f041daa22589a0ac7b107e1a5 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sun, 30 Jun 2024 12:13:57 +0200 Subject: [PATCH 15/21] fix --- src/Type/Php/RegexArrayShapeMatcher.php | 71 ++++++++++++------- src/Type/Php/RegexCapturingGroup.php | 22 ++++-- .../Analyser/nsrt/preg_match_shapes.php | 13 ++-- 3 files changed, 68 insertions(+), 38 deletions(-) diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index 33244bb7a8..e29bab39f2 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -89,13 +89,17 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched $trailingOptionals++; } - $overallType = []; $valueType = $this->getValueType($flags ?? 0); + $onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList); if ( - count($groupList) === 1 - && !$groupList[0]->inAlternation() - && $wasMatched->yes() + $wasMatched->yes() + && $onlyOptionalTopLevelGroup !== null ) { + // if only one top level capturing optional group exists + // we build a more precise constant union of a empty-match and a match with the group + + $onlyOptionalTopLevelGroup->removeOptionalQualification(); + $combiType = $this->buildArrayType( $groupList, $valueType, @@ -103,23 +107,10 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched $trailingOptionals ); - $constantArrays = $combiType->getConstantArrays(); - if ($constantArrays === []) { - return $combiType; - } - - // first item in matches contains the overall match. - $overallType[] = new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()]); - - foreach ($constantArrays as $constantArray) { - // same shape, but without optional keys - $overallType[] = new ConstantArrayType( - $constantArray->getKeyTypes(), - $constantArray->getValueTypes(), - ); - } - - return TypeCombinator::union(...$overallType); + return TypeCombinator::union( + new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()]), + $combiType + ); } return $this->buildArrayType( @@ -130,6 +121,31 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched ); } + /** + * @param list $captureGroups + */ + private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCapturingGroup + { + $group = null; + foreach ($captureGroups as $captureGroup) { + if (!$captureGroup->isTopLevel()) { + continue; + } + + if (!$captureGroup->isOptional()) { + return null; + } + + if ($group !== null) { + return null; + } + + $group = $captureGroup; + } + + return $group; + } + /** * @param list $combiIds * @param list|null $captureGroups @@ -242,7 +258,7 @@ private function parseGroups(string $regex): ?array $ast, false, false, - false, + null, $capturingGroups ); @@ -256,7 +272,7 @@ private function walkRegexAst( TreeNode $ast, bool $inAlternation, bool $inOptionalQuantification, - bool $inCapturing, + ?RegexCapturingGroup $inCapturing, array &$capturingGroups, ): void { @@ -265,20 +281,21 @@ private function walkRegexAst( $group = RegexCapturingGroup::unnamed( $inAlternation, $inOptionalQuantification, - !$inCapturing, + $inCapturing ); - $inCapturing = true; + $inCapturing = $group; } elseif ($ast->getId() === '#namedcapturing') { $name = $ast->getChild(0)->getValue()['value']; $group = RegexCapturingGroup::named( $name, $inAlternation, $inOptionalQuantification, - !$inCapturing, + $inCapturing ); - $inCapturing = true; + $inCapturing = $group; } + $inOptionalQuantification = false; if ($ast->getId() === '#quantification') { $lastChild = $ast->getChild($ast->getChildrenNumber() - 1); $value = $lastChild->getValue(); diff --git a/src/Type/Php/RegexCapturingGroup.php b/src/Type/Php/RegexCapturingGroup.php index 4371b9b929..9ccfa362ec 100644 --- a/src/Type/Php/RegexCapturingGroup.php +++ b/src/Type/Php/RegexCapturingGroup.php @@ -12,21 +12,26 @@ private function __construct( private ?string $name, private bool $inAlternation, private bool $inOptionalQuantification, - private bool $isTopLevel, + private ?RegexCapturingGroup $parent, ) { $this->id = self::$idGenerator; self::$idGenerator++; } - public static function unnamed(bool $inAlternation, bool $inOptionalQuantification, bool $isTopLevel): self + public static function unnamed(bool $inAlternation, bool $inOptionalQuantification, ?RegexCapturingGroup $parent): self { - return new self(null, $inAlternation, $inOptionalQuantification, $isTopLevel); + return new self(null, $inAlternation, $inOptionalQuantification, $parent); } - public static function named(string $name, bool $inAlternation, bool $inOptionalQuantification, bool $isTopLevel): self + public static function named(string $name, bool $inAlternation, bool $inOptionalQuantification, ?RegexCapturingGroup $parent): self { - return new self($name, $inAlternation, $inOptionalQuantification, $isTopLevel); + return new self($name, $inAlternation, $inOptionalQuantification, $parent); + } + + public function removeOptionalQualification(): void + { + $this->inOptionalQuantification = false; } public function getId(): int @@ -36,7 +41,10 @@ public function getId(): int public function isOptional(): bool { - return $this->inAlternation || $this->inOptionalQuantification; + return + $this->inAlternation + || $this->inOptionalQuantification + || ($this->parent !== null && $this->parent->isOptional()); } public function inAlternation(): bool @@ -46,7 +54,7 @@ public function inAlternation(): bool public function isTopLevel(): bool { - return $this->isTopLevel; + return $this->parent === null; } /** @phpstan-assert-if-true !null $this->getName() */ diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index f6b028cbcb..52878bc521 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -104,9 +104,9 @@ function doNamedSubpattern(string $s): void { assertType('array{}|array{0: string, name: string, 1: string}', $matches); if (preg_match('/^(?\S+::\S+)(?:(? with data set (?:#\d+|"[^"]+"))\s\()?/', $s, $matches)) { - assertType('array{0: string, name: string, 1: string, dataname?: string, 2?: string}', $matches); + assertType('array{0: string, name: string, 1: string, dataname: string, 2: string}', $matches); } - assertType('array{}|array{0: string, name: string, 1: string, dataname?: string, 2?: string}', $matches); + assertType('array{}|array{0: string, name: string, 1: string, dataname: string, 2: string}', $matches); } function doOffsetCapture(string $s): void { @@ -120,7 +120,7 @@ function doUnmatchedAsNull(string $s): void { if (preg_match('/(foo)?(bar)?(baz)?/', $s, $matches, PREG_UNMATCHED_AS_NULL)) { assertType('array{0: string, 1?: string|null, 2?: string|null, 3?: string|null}', $matches); } - assertType('array{}|array{string, string|null, string|null, string|null}|array{string}', $matches); + assertType('array{}|array{0: string, 1?: string|null, 2?: string|null, 3?: string|null}', $matches); } function doUnknownFlags(string $s, int $flags): void { @@ -252,7 +252,7 @@ function doFoo(string $row): void assertType('array{0: string, 1: string, 2?: string}', $matches); } if (preg_match('~^(a(b)?)?$~', $row, $matches) === 1) { - assertType('array{string, string, string}|array{string}', $matches); + assertType('array{0: string, 1?: string, 2?: string}', $matches); } } @@ -281,6 +281,11 @@ function allGroupsOptional(string $size): void } assertType('array{string, string, string}|array{string}', $matches); + if (preg_match('~^a\.b(c(\d+))d~', $size, $matches) !== 1) { + throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); + } + assertType('array{string, string, string}', $matches); + if (preg_match('~^a\.(b)?(c)?d~', $size, $matches) !== 1) { throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); } From 827e266d8c4dfbb97b56d4e864e15810b18e1ab1 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sun, 30 Jun 2024 12:18:50 +0200 Subject: [PATCH 16/21] fix --- src/Type/Php/RegexArrayShapeMatcher.php | 19 ++++++++----------- src/Type/Php/RegexCapturingGroup.php | 6 +++--- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index e29bab39f2..e798b58281 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -7,7 +7,6 @@ use Hoa\Compiler\Llk\TreeNode; use Hoa\Exception\Exception; use Hoa\File\Read; -use PHPStan\Internal\CombinationsHelper; use PHPStan\TrinaryLogic; use PHPStan\Type\Constant\ConstantArrayType; use PHPStan\Type\Constant\ConstantArrayTypeBuilder; @@ -17,7 +16,6 @@ use PHPStan\Type\StringType; use PHPStan\Type\Type; use PHPStan\Type\TypeCombinator; -use PHPStan\Type\UnionType; use function array_reverse; use function count; use function in_array; @@ -104,12 +102,12 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched $groupList, $valueType, $wasMatched, - $trailingOptionals + $trailingOptionals, ); return TypeCombinator::union( new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()]), - $combiType + $combiType, ); } @@ -117,7 +115,7 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched $groupList, $valueType, $wasMatched, - $trailingOptionals + $trailingOptionals, ); } @@ -147,8 +145,7 @@ private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCaptu } /** - * @param list $combiIds - * @param list|null $captureGroups + * @param list $captureGroups */ private function buildArrayType( array $captureGroups, @@ -238,7 +235,7 @@ private function getValueType(int $flags): Type } /** - * @return array{list, list>}|null + * @return list|null */ private function parseGroups(string $regex): ?array { @@ -259,7 +256,7 @@ private function parseGroups(string $regex): ?array false, false, null, - $capturingGroups + $capturingGroups, ); return $capturingGroups; @@ -281,7 +278,7 @@ private function walkRegexAst( $group = RegexCapturingGroup::unnamed( $inAlternation, $inOptionalQuantification, - $inCapturing + $inCapturing, ); $inCapturing = $group; } elseif ($ast->getId() === '#namedcapturing') { @@ -290,7 +287,7 @@ private function walkRegexAst( $name, $inAlternation, $inOptionalQuantification, - $inCapturing + $inCapturing, ); $inCapturing = $group; } diff --git a/src/Type/Php/RegexCapturingGroup.php b/src/Type/Php/RegexCapturingGroup.php index 9ccfa362ec..bd7dcfadae 100644 --- a/src/Type/Php/RegexCapturingGroup.php +++ b/src/Type/Php/RegexCapturingGroup.php @@ -4,9 +4,10 @@ class RegexCapturingGroup { + private int $id; - static private int $idGenerator = 1; + private static int $idGenerator = 1; private function __construct( private ?string $name, @@ -41,8 +42,7 @@ public function getId(): int public function isOptional(): bool { - return - $this->inAlternation + return $this->inAlternation || $this->inOptionalQuantification || ($this->parent !== null && $this->parent->isOptional()); } From ec604feb37b385c1c91b0543f026561743d9f573 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sun, 30 Jun 2024 12:21:51 +0200 Subject: [PATCH 17/21] simplify --- src/Type/Php/RegexCapturingGroup.php | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/src/Type/Php/RegexCapturingGroup.php b/src/Type/Php/RegexCapturingGroup.php index bd7dcfadae..b914bec640 100644 --- a/src/Type/Php/RegexCapturingGroup.php +++ b/src/Type/Php/RegexCapturingGroup.php @@ -5,10 +5,6 @@ class RegexCapturingGroup { - private int $id; - - private static int $idGenerator = 1; - private function __construct( private ?string $name, private bool $inAlternation, @@ -16,8 +12,6 @@ private function __construct( private ?RegexCapturingGroup $parent, ) { - $this->id = self::$idGenerator; - self::$idGenerator++; } public static function unnamed(bool $inAlternation, bool $inOptionalQuantification, ?RegexCapturingGroup $parent): self @@ -35,11 +29,6 @@ public function removeOptionalQualification(): void $this->inOptionalQuantification = false; } - public function getId(): int - { - return $this->id; - } - public function isOptional(): bool { return $this->inAlternation @@ -47,11 +36,6 @@ public function isOptional(): bool || ($this->parent !== null && $this->parent->isOptional()); } - public function inAlternation(): bool - { - return $this->inAlternation; - } - public function isTopLevel(): bool { return $this->parent === null; From f468b68d288027bd6c73cb6c6ad415414cec3b4a Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sun, 30 Jun 2024 12:27:54 +0200 Subject: [PATCH 18/21] cs --- src/Type/Php/RegexArrayShapeMatcher.php | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index e798b58281..939ebea229 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -170,9 +170,7 @@ private function buildArrayType( if (!$wasMatched->yes()) { $optional = true; } else { - if ( - $i < $countGroups - $trailingOptionals - ) { + if ($i < $countGroups - $trailingOptionals) { $optional = false; } else { $optional = $captureGroup->isOptional(); From 2abf16fe5494e4c74c8cddb752b11a5623efce34 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sun, 30 Jun 2024 12:40:26 +0200 Subject: [PATCH 19/21] handle optional non-capturing groups --- src/Type/Php/RegexArrayShapeMatcher.php | 20 ++++++++----- src/Type/Php/RegexCapturingGroup.php | 15 ++++++++-- src/Type/Php/RegexNonCapturingGroup.php | 29 +++++++++++++++++++ .../Analyser/nsrt/preg_match_shapes.php | 4 +-- 4 files changed, 56 insertions(+), 12 deletions(-) create mode 100644 src/Type/Php/RegexNonCapturingGroup.php diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index 939ebea229..9eba6560d6 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -267,7 +267,7 @@ private function walkRegexAst( TreeNode $ast, bool $inAlternation, bool $inOptionalQuantification, - ?RegexCapturingGroup $inCapturing, + RegexCapturingGroup|RegexNonCapturingGroup|null $parentGroup, array &$capturingGroups, ): void { @@ -276,18 +276,24 @@ private function walkRegexAst( $group = RegexCapturingGroup::unnamed( $inAlternation, $inOptionalQuantification, - $inCapturing, + $parentGroup, ); - $inCapturing = $group; + $parentGroup = $group; } elseif ($ast->getId() === '#namedcapturing') { $name = $ast->getChild(0)->getValue()['value']; $group = RegexCapturingGroup::named( $name, $inAlternation, $inOptionalQuantification, - $inCapturing, + $parentGroup, ); - $inCapturing = $group; + $parentGroup = $group; + } elseif ($ast->getId() === '#noncapturing') { + $group = RegexNonCapturingGroup::create( + $inOptionalQuantification, + $parentGroup, + ); + $parentGroup = $group; } $inOptionalQuantification = false; @@ -308,7 +314,7 @@ private function walkRegexAst( $inAlternation = true; } - if ($group !== null) { + if ($group instanceof RegexCapturingGroup) { $capturingGroups[] = $group; } @@ -317,7 +323,7 @@ private function walkRegexAst( $child, $inAlternation, $inOptionalQuantification, - $inCapturing, + $parentGroup, $capturingGroups, ); } diff --git a/src/Type/Php/RegexCapturingGroup.php b/src/Type/Php/RegexCapturingGroup.php index b914bec640..9e5885564f 100644 --- a/src/Type/Php/RegexCapturingGroup.php +++ b/src/Type/Php/RegexCapturingGroup.php @@ -9,17 +9,26 @@ private function __construct( private ?string $name, private bool $inAlternation, private bool $inOptionalQuantification, - private ?RegexCapturingGroup $parent, + private RegexCapturingGroup|RegexNonCapturingGroup|null $parent, ) { } - public static function unnamed(bool $inAlternation, bool $inOptionalQuantification, ?RegexCapturingGroup $parent): self + public static function unnamed( + bool $inAlternation, + bool $inOptionalQuantification, + RegexCapturingGroup|RegexNonCapturingGroup|null $parent, + ): self { return new self(null, $inAlternation, $inOptionalQuantification, $parent); } - public static function named(string $name, bool $inAlternation, bool $inOptionalQuantification, ?RegexCapturingGroup $parent): self + public static function named( + string $name, + bool $inAlternation, + bool $inOptionalQuantification, + RegexCapturingGroup|RegexNonCapturingGroup|null $parent, + ): self { return new self($name, $inAlternation, $inOptionalQuantification, $parent); } diff --git a/src/Type/Php/RegexNonCapturingGroup.php b/src/Type/Php/RegexNonCapturingGroup.php new file mode 100644 index 0000000000..f897e0d26a --- /dev/null +++ b/src/Type/Php/RegexNonCapturingGroup.php @@ -0,0 +1,29 @@ +inOptionalQuantification + || ($this->parent !== null && $this->parent->isOptional()); + } + +} diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 52878bc521..14e028300f 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -104,9 +104,9 @@ function doNamedSubpattern(string $s): void { assertType('array{}|array{0: string, name: string, 1: string}', $matches); if (preg_match('/^(?\S+::\S+)(?:(? with data set (?:#\d+|"[^"]+"))\s\()?/', $s, $matches)) { - assertType('array{0: string, name: string, 1: string, dataname: string, 2: string}', $matches); + assertType('array{0: string, name: string, 1: string, dataname?: string, 2?: string}', $matches); } - assertType('array{}|array{0: string, name: string, 1: string, dataname: string, 2: string}', $matches); + assertType('array{}|array{0: string, name: string, 1: string, dataname?: string, 2?: string}', $matches); } function doOffsetCapture(string $s): void { From f199130fd5839dd9f0180e3890ce474756c02bf3 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sun, 30 Jun 2024 12:44:02 +0200 Subject: [PATCH 20/21] tests --- tests/PHPStan/Analyser/nsrt/preg_match_shapes.php | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 14e028300f..afe1d981cc 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -274,13 +274,23 @@ function doFoo3(string $row): void assertType('array{string, string, string, string, string, string, string}', $matches); } -function allGroupsOptional(string $size): void +function groupsOptional(string $size): void { if (preg_match('~^a\.b(c(\d+))?d~', $size, $matches) !== 1) { throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); } assertType('array{string, string, string}|array{string}', $matches); + if (preg_match('~^a\.b(c(\d+)?)d~', $size, $matches) !== 1) { + throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); + } + assertType('array{0: string, 1: string, 2?: string}', $matches); + + if (preg_match('~^a\.b(c(\d+)?)?d~', $size, $matches) !== 1) { + throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); + } + assertType('array{0: string, 1?: string, 2?: string}', $matches); + if (preg_match('~^a\.b(c(\d+))d~', $size, $matches) !== 1) { throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); } From 379e99f3e633ce5cc9553f4d339bcde0ddc0a34f Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Sun, 30 Jun 2024 12:55:58 +0200 Subject: [PATCH 21/21] Update preg_match_shapes.php --- tests/PHPStan/Analyser/nsrt/preg_match_shapes.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index afe1d981cc..dfc7eaedc6 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -276,6 +276,11 @@ function doFoo3(string $row): void function groupsOptional(string $size): void { + if (preg_match('~^a\.b(c(\d+)(\d+)(\s+))?d~', $size, $matches) !== 1) { + throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); + } + assertType('array{string, string, string, string, string}|array{string}', $matches); + if (preg_match('~^a\.b(c(\d+))?d~', $size, $matches) !== 1) { throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); }