diff --git a/src/Type/Php/RegexArrayShapeMatcher.php b/src/Type/Php/RegexArrayShapeMatcher.php index bee04770b4..9eba6560d6 100644 --- a/src/Type/Php/RegexArrayShapeMatcher.php +++ b/src/Type/Php/RegexArrayShapeMatcher.php @@ -73,14 +73,88 @@ public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $was */ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched): ?Type { - $captureGroups = $this->parseGroups($regex); - if ($captureGroups === null) { + $groupList = $this->parseGroups($regex); + if ($groupList === null) { // regex could not be parsed by Hoa/Regex return null; } - $builder = ConstantArrayTypeBuilder::createEmpty(); + $trailingOptionals = 0; + foreach (array_reverse($groupList) as $captureGroup) { + if (!$captureGroup->isOptional()) { + break; + } + $trailingOptionals++; + } + $valueType = $this->getValueType($flags ?? 0); + $onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList); + if ( + $wasMatched->yes() + && $onlyOptionalTopLevelGroup !== null + ) { + // if only one top level capturing optional group exists + // we build a more precise constant union of a empty-match and a match with the group + + $onlyOptionalTopLevelGroup->removeOptionalQualification(); + + $combiType = $this->buildArrayType( + $groupList, + $valueType, + $wasMatched, + $trailingOptionals, + ); + + return TypeCombinator::union( + new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()]), + $combiType, + ); + } + + return $this->buildArrayType( + $groupList, + $valueType, + $wasMatched, + $trailingOptionals, + ); + } + + /** + * @param list $captureGroups + */ + private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCapturingGroup + { + $group = null; + foreach ($captureGroups as $captureGroup) { + if (!$captureGroup->isTopLevel()) { + continue; + } + + if (!$captureGroup->isOptional()) { + return null; + } + + if ($group !== null) { + return null; + } + + $group = $captureGroup; + } + + return $group; + } + + /** + * @param list $captureGroups + */ + private function buildArrayType( + array $captureGroups, + Type $valueType, + TrinaryLogic $wasMatched, + int $trailingOptionals, + ): Type + { + $builder = ConstantArrayTypeBuilder::createEmpty(); // first item in matches contains the overall match. $builder->setOffsetValueType( @@ -89,21 +163,14 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched !$wasMatched->yes(), ); - $trailingOptionals = 0; - foreach (array_reverse($captureGroups) as $captureGroup) { - if (!$captureGroup->isOptional()) { - break; - } - $trailingOptionals++; - } - - for ($i = 0; $i < count($captureGroups); $i++) { + $countGroups = count($captureGroups); + for ($i = 0; $i < $countGroups; $i++) { $captureGroup = $captureGroups[$i]; if (!$wasMatched->yes()) { $optional = true; } else { - if ($i < count($captureGroups) - $trailingOptionals) { + if ($i < $countGroups - $trailingOptionals) { $optional = false; } else { $optional = $captureGroup->isOptional(); @@ -181,46 +248,84 @@ private function parseGroups(string $regex): ?array return null; } - $capturings = []; - $this->walkRegexAst($ast, 0, 0, $capturings); + $capturingGroups = []; + $this->walkRegexAst( + $ast, + false, + false, + null, + $capturingGroups, + ); - return $capturings; + return $capturingGroups; } /** - * @param list $capturings + * @param list $capturingGroups */ - private function walkRegexAst(TreeNode $ast, int $inAlternation, int $inOptionalQuantification, array &$capturings): void + private function walkRegexAst( + TreeNode $ast, + bool $inAlternation, + bool $inOptionalQuantification, + RegexCapturingGroup|RegexNonCapturingGroup|null $parentGroup, + array &$capturingGroups, + ): void { + $group = null; if ($ast->getId() === '#capturing') { - $capturings[] = RegexCapturingGroup::unnamed($inAlternation > 0 || $inOptionalQuantification > 0); + $group = RegexCapturingGroup::unnamed( + $inAlternation, + $inOptionalQuantification, + $parentGroup, + ); + $parentGroup = $group; } elseif ($ast->getId() === '#namedcapturing') { $name = $ast->getChild(0)->getValue()['value']; - $capturings[] = RegexCapturingGroup::named( + $group = RegexCapturingGroup::named( $name, - $inAlternation > 0 || $inOptionalQuantification > 0, + $inAlternation, + $inOptionalQuantification, + $parentGroup, ); + $parentGroup = $group; + } elseif ($ast->getId() === '#noncapturing') { + $group = RegexNonCapturingGroup::create( + $inOptionalQuantification, + $parentGroup, + ); + $parentGroup = $group; } - if ($ast->getId() === '#alternation') { - $inAlternation++; - } - + $inOptionalQuantification = false; if ($ast->getId() === '#quantification') { $lastChild = $ast->getChild($ast->getChildrenNumber() - 1); $value = $lastChild->getValue(); if ($value['token'] === 'n_to_m' && str_contains($value['value'], '{0,')) { - $inOptionalQuantification++; + $inOptionalQuantification = true; } elseif ($value['token'] === 'zero_or_one') { - $inOptionalQuantification++; + $inOptionalQuantification = true; } elseif ($value['token'] === 'zero_or_more') { - $inOptionalQuantification++; + $inOptionalQuantification = true; } } + if ($ast->getId() === '#alternation') { + $inAlternation = true; + } + + if ($group instanceof RegexCapturingGroup) { + $capturingGroups[] = $group; + } + foreach ($ast->getChildren() as $child) { - $this->walkRegexAst($child, $inAlternation, $inOptionalQuantification, $capturings); + $this->walkRegexAst( + $child, + $inAlternation, + $inOptionalQuantification, + $parentGroup, + $capturingGroups, + ); } } diff --git a/src/Type/Php/RegexCapturingGroup.php b/src/Type/Php/RegexCapturingGroup.php index bc0b6bfdb0..9e5885564f 100644 --- a/src/Type/Php/RegexCapturingGroup.php +++ b/src/Type/Php/RegexCapturingGroup.php @@ -5,23 +5,49 @@ class RegexCapturingGroup { - private function __construct(private ?string $name, private bool $optional) + private function __construct( + private ?string $name, + private bool $inAlternation, + private bool $inOptionalQuantification, + private RegexCapturingGroup|RegexNonCapturingGroup|null $parent, + ) { } - public static function unnamed(bool $optional): self + public static function unnamed( + bool $inAlternation, + bool $inOptionalQuantification, + RegexCapturingGroup|RegexNonCapturingGroup|null $parent, + ): self { - return new self(null, $optional); + return new self(null, $inAlternation, $inOptionalQuantification, $parent); } - public static function named(string $name, bool $optional): self + public static function named( + string $name, + bool $inAlternation, + bool $inOptionalQuantification, + RegexCapturingGroup|RegexNonCapturingGroup|null $parent, + ): self { - return new self($name, $optional); + return new self($name, $inAlternation, $inOptionalQuantification, $parent); + } + + public function removeOptionalQualification(): void + { + $this->inOptionalQuantification = false; } public function isOptional(): bool { - return $this->optional; + return $this->inAlternation + || $this->inOptionalQuantification + || ($this->parent !== null && $this->parent->isOptional()); + } + + public function isTopLevel(): bool + { + return $this->parent === null; } /** @phpstan-assert-if-true !null $this->getName() */ diff --git a/src/Type/Php/RegexNonCapturingGroup.php b/src/Type/Php/RegexNonCapturingGroup.php new file mode 100644 index 0000000000..f897e0d26a --- /dev/null +++ b/src/Type/Php/RegexNonCapturingGroup.php @@ -0,0 +1,29 @@ +inOptionalQuantification + || ($this->parent !== null && $this->parent->isOptional()); + } + +} diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php similarity index 88% rename from tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php rename to tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 3d7194d994..dfc7eaedc6 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes_php.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -273,3 +273,36 @@ function doFoo3(string $row): void assertType('array{string, string, string, string, string, string, string}', $matches); } + +function groupsOptional(string $size): void +{ + if (preg_match('~^a\.b(c(\d+)(\d+)(\s+))?d~', $size, $matches) !== 1) { + throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); + } + assertType('array{string, string, string, string, string}|array{string}', $matches); + + if (preg_match('~^a\.b(c(\d+))?d~', $size, $matches) !== 1) { + throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); + } + assertType('array{string, string, string}|array{string}', $matches); + + if (preg_match('~^a\.b(c(\d+)?)d~', $size, $matches) !== 1) { + throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); + } + assertType('array{0: string, 1: string, 2?: string}', $matches); + + if (preg_match('~^a\.b(c(\d+)?)?d~', $size, $matches) !== 1) { + throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); + } + assertType('array{0: string, 1?: string, 2?: string}', $matches); + + if (preg_match('~^a\.b(c(\d+))d~', $size, $matches) !== 1) { + throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); + } + assertType('array{string, string, string}', $matches); + + if (preg_match('~^a\.(b)?(c)?d~', $size, $matches) !== 1) { + throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); + } + assertType('array{0: string, 1?: string, 2?: string}', $matches); +}