Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
163 changes: 134 additions & 29 deletions src/Type/Php/RegexArrayShapeMatcher.php
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,88 @@ public function matchType(Type $patternType, ?Type $flagsType, TrinaryLogic $was
*/
private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched): ?Type
{
$captureGroups = $this->parseGroups($regex);
if ($captureGroups === null) {
$groupList = $this->parseGroups($regex);
if ($groupList === null) {
// regex could not be parsed by Hoa/Regex
return null;
}

$builder = ConstantArrayTypeBuilder::createEmpty();
$trailingOptionals = 0;
foreach (array_reverse($groupList) as $captureGroup) {
if (!$captureGroup->isOptional()) {
break;
}
$trailingOptionals++;
}

$valueType = $this->getValueType($flags ?? 0);
$onlyOptionalTopLevelGroup = $this->getOnlyOptionalTopLevelGroup($groupList);
if (
$wasMatched->yes()
&& $onlyOptionalTopLevelGroup !== null
) {
// if only one top level capturing optional group exists
// we build a more precise constant union of a empty-match and a match with the group
Comment on lines +96 to +97
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am limiting to this specific case for now, because whether $matches contains a offset or not depends on many factors. I did this already for more stuff, but it was easily breaking stuff.

will bring back more sophiscated stuff in future PRs


$onlyOptionalTopLevelGroup->removeOptionalQualification();

$combiType = $this->buildArrayType(
$groupList,
$valueType,
$wasMatched,
$trailingOptionals,
);

return TypeCombinator::union(
new ConstantArrayType([new ConstantIntegerType(0)], [new StringType()]),
$combiType,
);
}

return $this->buildArrayType(
$groupList,
$valueType,
$wasMatched,
$trailingOptionals,
);
}

/**
* @param list<RegexCapturingGroup> $captureGroups
*/
private function getOnlyOptionalTopLevelGroup(array $captureGroups): ?RegexCapturingGroup
{
$group = null;
foreach ($captureGroups as $captureGroup) {
if (!$captureGroup->isTopLevel()) {
continue;
}

if (!$captureGroup->isOptional()) {
return null;
}

if ($group !== null) {
return null;
}

$group = $captureGroup;
}

return $group;
}

/**
* @param list<RegexCapturingGroup> $captureGroups
*/
private function buildArrayType(
array $captureGroups,
Type $valueType,
TrinaryLogic $wasMatched,
int $trailingOptionals,
): Type
{
$builder = ConstantArrayTypeBuilder::createEmpty();

// first item in matches contains the overall match.
$builder->setOffsetValueType(
Expand All @@ -89,21 +163,14 @@ private function matchRegex(string $regex, ?int $flags, TrinaryLogic $wasMatched
!$wasMatched->yes(),
);

$trailingOptionals = 0;
foreach (array_reverse($captureGroups) as $captureGroup) {
if (!$captureGroup->isOptional()) {
break;
}
$trailingOptionals++;
}

for ($i = 0; $i < count($captureGroups); $i++) {
$countGroups = count($captureGroups);
for ($i = 0; $i < $countGroups; $i++) {
$captureGroup = $captureGroups[$i];

if (!$wasMatched->yes()) {
$optional = true;
} else {
if ($i < count($captureGroups) - $trailingOptionals) {
if ($i < $countGroups - $trailingOptionals) {
$optional = false;
} else {
$optional = $captureGroup->isOptional();
Expand Down Expand Up @@ -181,46 +248,84 @@ private function parseGroups(string $regex): ?array
return null;
}

$capturings = [];
$this->walkRegexAst($ast, 0, 0, $capturings);
$capturingGroups = [];
$this->walkRegexAst(
$ast,
false,
false,
null,
$capturingGroups,
);

return $capturings;
return $capturingGroups;
}

/**
* @param list<RegexCapturingGroup> $capturings
* @param list<RegexCapturingGroup> $capturingGroups
*/
private function walkRegexAst(TreeNode $ast, int $inAlternation, int $inOptionalQuantification, array &$capturings): void
private function walkRegexAst(
TreeNode $ast,
bool $inAlternation,
bool $inOptionalQuantification,
RegexCapturingGroup|RegexNonCapturingGroup|null $parentGroup,
array &$capturingGroups,
): void
{
$group = null;
if ($ast->getId() === '#capturing') {
$capturings[] = RegexCapturingGroup::unnamed($inAlternation > 0 || $inOptionalQuantification > 0);
$group = RegexCapturingGroup::unnamed(
$inAlternation,
$inOptionalQuantification,
$parentGroup,
);
$parentGroup = $group;
} elseif ($ast->getId() === '#namedcapturing') {
$name = $ast->getChild(0)->getValue()['value'];
$capturings[] = RegexCapturingGroup::named(
$group = RegexCapturingGroup::named(
$name,
$inAlternation > 0 || $inOptionalQuantification > 0,
$inAlternation,
$inOptionalQuantification,
$parentGroup,
);
$parentGroup = $group;
} elseif ($ast->getId() === '#noncapturing') {
$group = RegexNonCapturingGroup::create(
$inOptionalQuantification,
$parentGroup,
);
$parentGroup = $group;
}

if ($ast->getId() === '#alternation') {
$inAlternation++;
}

$inOptionalQuantification = false;
if ($ast->getId() === '#quantification') {
$lastChild = $ast->getChild($ast->getChildrenNumber() - 1);
$value = $lastChild->getValue();

if ($value['token'] === 'n_to_m' && str_contains($value['value'], '{0,')) {
$inOptionalQuantification++;
$inOptionalQuantification = true;
} elseif ($value['token'] === 'zero_or_one') {
$inOptionalQuantification++;
$inOptionalQuantification = true;
} elseif ($value['token'] === 'zero_or_more') {
$inOptionalQuantification++;
$inOptionalQuantification = true;
}
}

if ($ast->getId() === '#alternation') {
$inAlternation = true;
}

if ($group instanceof RegexCapturingGroup) {
$capturingGroups[] = $group;
}

foreach ($ast->getChildren() as $child) {
$this->walkRegexAst($child, $inAlternation, $inOptionalQuantification, $capturings);
$this->walkRegexAst(
$child,
$inAlternation,
$inOptionalQuantification,
$parentGroup,
$capturingGroups,
);
}
}

Expand Down
38 changes: 32 additions & 6 deletions src/Type/Php/RegexCapturingGroup.php
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,49 @@
class RegexCapturingGroup
{

private function __construct(private ?string $name, private bool $optional)
private function __construct(
private ?string $name,
private bool $inAlternation,
private bool $inOptionalQuantification,
private RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
)
{
}

public static function unnamed(bool $optional): self
public static function unnamed(
bool $inAlternation,
bool $inOptionalQuantification,
RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
): self
{
return new self(null, $optional);
return new self(null, $inAlternation, $inOptionalQuantification, $parent);
}

public static function named(string $name, bool $optional): self
public static function named(
string $name,
bool $inAlternation,
bool $inOptionalQuantification,
RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
): self
{
return new self($name, $optional);
return new self($name, $inAlternation, $inOptionalQuantification, $parent);
}

public function removeOptionalQualification(): void
{
$this->inOptionalQuantification = false;
}

public function isOptional(): bool
{
return $this->optional;
return $this->inAlternation
|| $this->inOptionalQuantification
|| ($this->parent !== null && $this->parent->isOptional());
}

public function isTopLevel(): bool
{
return $this->parent === null;
}

/** @phpstan-assert-if-true !null $this->getName() */
Expand Down
29 changes: 29 additions & 0 deletions src/Type/Php/RegexNonCapturingGroup.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<?php declare(strict_types = 1);

namespace PHPStan\Type\Php;

class RegexNonCapturingGroup
Copy link
Contributor Author

@staabm staabm Jun 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the optional groups detection, now also takes non-capturing groups into account... these do not capture results itself, but might impact nested capturing groups

{

private function __construct(
private bool $inOptionalQuantification,
private RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
)
{
}

public static function create(
bool $inOptionalQuantification,
RegexCapturingGroup|RegexNonCapturingGroup|null $parent,
): self
{
return new self($inOptionalQuantification, $parent);
}

public function isOptional(): bool
{
return $this->inOptionalQuantification
|| ($this->parent !== null && $this->parent->isOptional());
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -273,3 +273,36 @@ function doFoo3(string $row): void

assertType('array{string, string, string, string, string, string, string}', $matches);
}

function groupsOptional(string $size): void
{
if (preg_match('~^a\.b(c(\d+)(\d+)(\s+))?d~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{string, string, string, string, string}|array{string}', $matches);

if (preg_match('~^a\.b(c(\d+))?d~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{string, string, string}|array{string}', $matches);

if (preg_match('~^a\.b(c(\d+)?)d~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{0: string, 1: string, 2?: string}', $matches);

if (preg_match('~^a\.b(c(\d+)?)?d~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{0: string, 1?: string, 2?: string}', $matches);

if (preg_match('~^a\.b(c(\d+))d~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{string, string, string}', $matches);

if (preg_match('~^a\.(b)?(c)?d~', $size, $matches) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{0: string, 1?: string, 2?: string}', $matches);
}