diff --git a/composer.json b/composer.json index f13429c..9583281 100644 --- a/composer.json +++ b/composer.json @@ -11,7 +11,10 @@ }, "autoload-dev": { "psr-4": { - "Flyfinder\\": ["tests/unit/"] + "Flyfinder\\": [ + "tests/integration/", + "tests/unit/" + ] } }, "require": { diff --git a/phpcs.xml.dist b/phpcs.xml.dist index d9873c7..ef52a2c 100644 --- a/phpcs.xml.dist +++ b/phpcs.xml.dist @@ -17,12 +17,15 @@ */src/Finder.php */src/Specification/SpecificationInterface.php + */src/Specification/PrunableInterface.php + */src/Specification/CompositeSpecification.php */tests/unit/Specification/GlobTest.php */src/Specification/SpecificationInterface.php + */src/Specification/PrunableInterface.php diff --git a/src/Finder.php b/src/Finder.php index 218002c..6f77363 100644 --- a/src/Finder.php +++ b/src/Finder.php @@ -13,6 +13,7 @@ namespace Flyfinder; +use Flyfinder\Specification\CompositeSpecification; use Flyfinder\Specification\SpecificationInterface; use Generator; use League\Flysystem\File; @@ -88,13 +89,13 @@ private function yieldFilesInPath(SpecificationInterface $specification, string yield $location; } - if ($location['type'] !== 'dir') { + if ($location['type'] !== 'dir' + || !CompositeSpecification::thatCanBeSatisfiedBySomethingBelow($specification, $location) + ) { continue; } - foreach ($this->yieldFilesInPath($specification, $location['path']) as $returnedLocation) { - yield $returnedLocation; - } + yield from $this->yieldFilesInPath($specification, $location['path']); } } } diff --git a/src/Specification/AndSpecification.php b/src/Specification/AndSpecification.php index 75ca6a8..40a3628 100644 --- a/src/Specification/AndSpecification.php +++ b/src/Specification/AndSpecification.php @@ -42,4 +42,18 @@ public function isSatisfiedBy(array $value) : bool { return $this->one->isSatisfiedBy($value) && $this->other->isSatisfiedBy($value); } + + /** {@inheritDoc} */ + public function canBeSatisfiedBySomethingBelow(array $value) : bool + { + return self::thatCanBeSatisfiedBySomethingBelow($this->one, $value) + && self::thatCanBeSatisfiedBySomethingBelow($this->other, $value); + } + + /** {@inheritDoc} */ + public function willBeSatisfiedByEverythingBelow(array $value) : bool + { + return self::thatWillBeSatisfiedByEverythingBelow($this->one, $value) + && self::thatWillBeSatisfiedByEverythingBelow($this->other, $value); + } } diff --git a/src/Specification/CompositeSpecification.php b/src/Specification/CompositeSpecification.php index e1829c4..97bbf77 100644 --- a/src/Specification/CompositeSpecification.php +++ b/src/Specification/CompositeSpecification.php @@ -19,7 +19,7 @@ * * @psalm-immutable */ -abstract class CompositeSpecification implements SpecificationInterface +abstract class CompositeSpecification implements SpecificationInterface, PrunableInterface { /** * Returns a specification that satisfies the original specification @@ -47,4 +47,47 @@ public function notSpecification() : NotSpecification { return new NotSpecification($this); } + + /** {@inheritDoc} */ + public function canBeSatisfiedBySomethingBelow(array $value) : bool + { + return true; + } + + /** {@inheritDoc} */ + public function willBeSatisfiedByEverythingBelow(array $value) : bool + { + return false; + } + + /** + * Provide default {@see canBeSatisfiedBySomethingBelow()} logic for specification classes + * that don't implement PrunableInterface + * + * @param mixed[] $value + * + * @psalm-param array{basename: string, path: string, stream: resource, dirname: string, type: string, extension: string} $value + * @psalm-mutation-free + */ + public static function thatCanBeSatisfiedBySomethingBelow(SpecificationInterface $that, array $value) : bool + { + return $that instanceof PrunableInterface + ? $that->canBeSatisfiedBySomethingBelow($value) + : true; + } + + /** + * Provide default {@see willBeSatisfiedByEverythingBelow()} logic for specification classes + * that don't implement PrunableInterface + * + * @param mixed[] $value + * + * @psalm-param array{basename: string, path: string, stream: resource, dirname: string, type: string, extension: string} $value + * @psalm-mutation-free + */ + public static function thatWillBeSatisfiedByEverythingBelow(SpecificationInterface $that, array $value) : bool + { + return $that instanceof PrunableInterface + && $that->willBeSatisfiedByEverythingBelow($value); + } } diff --git a/src/Specification/Glob.php b/src/Specification/Glob.php index e07977a..6ea58c5 100644 --- a/src/Specification/Glob.php +++ b/src/Specification/Glob.php @@ -17,10 +17,18 @@ namespace Flyfinder\Specification; use InvalidArgumentException; +use function array_slice; +use function count; +use function explode; +use function implode; +use function max; +use function min; use function preg_match; +use function rtrim; use function sprintf; use function strlen; use function strpos; +use function substr; /** * Glob specification class @@ -40,10 +48,29 @@ final class Glob extends CompositeSpecification */ private $staticPrefix; + /** + * The "bounded prefix" is the part of the glob up to the first recursive wildcard "**". + * It is the longest prefix for which the number of directory segments in the partial match + * is known. If the glob does not contain the recursive wildcard "**", the full glob is returned. + * + * @var string + */ + private $boundedPrefix; + + /** + * The "total prefix" is the part of the glob before the trailing catch-all wildcard sequence if the glob + * ends with one, otherwise null. It is needed for implementing the A-quantifier pruning hint. + * + * @var string|null + */ + private $totalPrefix; + public function __construct(string $glob) { - $this->regex = self::toRegEx($glob); - $this->staticPrefix = self::getStaticPrefix($glob); + $this->regex = self::toRegEx($glob); + $this->staticPrefix = self::getStaticPrefix($glob); + $this->boundedPrefix = self::getBoundedPrefix($glob); + $this->totalPrefix = self::getTotalPrefix($glob); } /** @@ -80,12 +107,7 @@ public function isSatisfiedBy(array $value) : bool */ private static function getStaticPrefix(string $glob) : string { - if (strpos($glob, '/') !== 0 && strpos($glob, '://') === false) { - throw new InvalidArgumentException(sprintf( - 'The glob "%s" is not absolute and not a URI.', - $glob - )); - } + self::assertValidGlob($glob); $prefix = ''; $length = strlen($glob); for ($i = 0; $i < $length; ++$i) { @@ -103,27 +125,38 @@ private static function getStaticPrefix(string $glob) : string case '[': break 2; case '\\': - if (isset($glob[$i + 1])) { - switch ($glob[$i + 1]) { - case '*': - case '?': - case '{': - case '}': - case '[': - case ']': - case '-': - case '^': - case '$': - case '~': - case '\\': - $prefix .= $glob[$i + 1]; - ++$i; - break; - default: - $prefix .= '\\'; - } + [$unescaped, $consumedChars] = self::scanBackslashSequence($glob, $i); + $prefix .= $unescaped; + $i += $consumedChars; + break; + default: + $prefix .= $c; + break; + } + } + return $prefix; + } + + private static function getBoundedPrefix(string $glob) : string + { + self::assertValidGlob($glob); + $prefix = ''; + $length = strlen($glob); + + for ($i = 0; $i < $length; ++$i) { + $c = $glob[$i]; + switch ($c) { + case '/': + $prefix .= '/'; + if (self::isRecursiveWildcard($glob, $i)) { + break 2; } break; + case '\\': + [$unescaped, $consumedChars] = self::scanBackslashSequence($glob, $i); + $prefix .= $unescaped; + $i += $consumedChars; + break; default: $prefix .= $c; break; @@ -132,6 +165,61 @@ private static function getStaticPrefix(string $glob) : string return $prefix; } + private static function getTotalPrefix(string $glob) : ?string + { + self::assertValidGlob($glob); + $matches = []; + return preg_match('~(?boundedPrefix, '/')); + $howManySegmentsToConsider = min(count($valueSegments), count($boundedPrefixSegments)); + $boundedPrefixGlob = implode('/', array_slice($boundedPrefixSegments, 0, $howManySegmentsToConsider)); + $valuePathPrefix = implode('/', array_slice($valueSegments, 1, max($howManySegmentsToConsider-1, 0))); + $prefixValue = $value; + $prefixValue['path'] = $valuePathPrefix; + $spec = new Glob($boundedPrefixGlob); + return $spec->isSatisfiedBy($prefixValue); + } + + /** @inheritDoc */ + public function willBeSatisfiedByEverythingBelow(array $value) : bool + { + if ($this->totalPrefix === null) { + return false; + } + $spec = new Glob(rtrim($this->totalPrefix, '/') . '/**/*'); + $terminatedValue = $value; + $terminatedValue['path'] = rtrim($terminatedValue['path'], '/') . '/x/x'; + return $spec->isSatisfiedBy($terminatedValue); + } } diff --git a/src/Specification/InPath.php b/src/Specification/InPath.php index 258ede2..4395c59 100644 --- a/src/Specification/InPath.php +++ b/src/Specification/InPath.php @@ -14,7 +14,12 @@ namespace Flyfinder\Specification; use Flyfinder\Path; +use function array_slice; +use function count; +use function explode; +use function implode; use function in_array; +use function min; use function preg_match; use function str_replace; @@ -85,4 +90,20 @@ public function isSatisfiedBy(array $value) : bool return false; } + + /** @inheritDoc */ + public function canBeSatisfiedBySomethingBelow(array $value) : bool + { + $pathSegments = explode('/', (string) $this->path); + $valueSegments = explode('/', $value['path']); + $pathPrefixSegments = array_slice($pathSegments, 0, min(count($pathSegments), count($valueSegments))); + $spec = new InPath(new Path(implode('/', $pathPrefixSegments))); + return $spec->isSatisfiedBy($value); + } + + /** @inheritDoc */ + public function willBeSatisfiedByEverythingBelow(array $value) : bool + { + return $this->isSatisfiedBy($value); + } } diff --git a/src/Specification/NotSpecification.php b/src/Specification/NotSpecification.php index a2aa5a0..ae31915 100644 --- a/src/Specification/NotSpecification.php +++ b/src/Specification/NotSpecification.php @@ -38,4 +38,16 @@ public function isSatisfiedBy(array $value) : bool { return !$this->wrapped->isSatisfiedBy($value); } + + /** @inheritDoc */ + public function canBeSatisfiedBySomethingBelow(array $value) : bool + { + return !self::thatWillBeSatisfiedByEverythingBelow($this->wrapped, $value); + } + + /** @inheritDoc */ + public function willBeSatisfiedByEverythingBelow(array $value) : bool + { + return !self::thatCanBeSatisfiedBySomethingBelow($this->wrapped, $value); + } } diff --git a/src/Specification/OrSpecification.php b/src/Specification/OrSpecification.php index 516b9f0..fecc6cd 100644 --- a/src/Specification/OrSpecification.php +++ b/src/Specification/OrSpecification.php @@ -42,4 +42,18 @@ public function isSatisfiedBy(array $value) : bool { return $this->one->isSatisfiedBy($value) || $this->other->isSatisfiedBy($value); } + + /** @inheritDoc */ + public function canBeSatisfiedBySomethingBelow(array $value) : bool + { + return self::thatCanBeSatisfiedBySomethingBelow($this->one, $value) + || self::thatCanBeSatisfiedBySomethingBelow($this->other, $value); + } + + /** @inheritDoc */ + public function willBeSatisfiedByEverythingBelow(array $value) : bool + { + return self::thatWillBeSatisfiedByEverythingBelow($this->one, $value) + || self::thatWillBeSatisfiedByEverythingBelow($this->other, $value); + } } diff --git a/src/Specification/PrunableInterface.php b/src/Specification/PrunableInterface.php new file mode 100644 index 0000000..8393ab1 --- /dev/null +++ b/src/Specification/PrunableInterface.php @@ -0,0 +1,32 @@ +assertSame('find', $this->fixture->getMethod()); } + public function testIfNotHiddenLetsSubpathsThrough() : void + { + $files = [ 'foo/bar/.hidden/baz/not-hidden.txt' ]; + $this->fixture->setFilesystem($this->mockFileSystem($files)); + $notHidden = (new IsHidden())->notSpecification(); + $this->assertEquals( + $files, + $this->generatorToFileList($this->fixture->handle($notHidden)) + ); + } + + public function testIfDoubleNotHiddenLetsSubpathsThrough() : void + { + $files = [ '.foo/.bar/not-hidden/.baz/.hidden.txt' ]; + $this->fixture->setFilesystem($this->mockFileSystem($files)); + $notHidden = (new IsHidden())->notSpecification()->notSpecification(); + $this->assertEquals( + $files, + $this->generatorToFileList($this->fixture->handle($notHidden)) + ); + } + + public function testIfNeitherHiddenNorExtLetsSubpathsThrough() : void + { + $files = [ 'foo/bar/.hidden/baz.ext/neither-hidden-nor.ext.zzz' ]; + $this->fixture->setFilesystem($this->mockFileSystem($files)); + + $neitherHiddenNorExt = + (new IsHidden())->notSpecification() + ->andSpecification((new HasExtension(['ext']))->notSpecification()); + $this->assertEquals( + $files, + $this->generatorToFileList($this->fixture->handle($neitherHiddenNorExt)) + ); + + $neitherHiddenNorExtDeMorgan = (new IsHidden())->orSpecification(new HasExtension(['ext']))->notSpecification(); + $this->assertEquals( + $files, + $this->generatorToFileList($this->fixture->handle($neitherHiddenNorExtDeMorgan)) + ); + } + + public function testIfNegatedOrCullsExactMatches() : void + { + $files = [ + 'foo/bar/baz/whatever.txt', + 'foo/gen/pics/bottle.jpg', + 'foo/lou/time.txt', + ]; + $this->fixture->setFilesystem($this->mockFileSystem($files, ['foo/bar', 'foo/gen'])); + + $negatedOr = + (new InPath(new Path('foo/gen'))) + ->orSpecification(new InPath(new Path('foo/bar'))) + ->notSpecification(); + + $this->assertEquals( + ['foo/lou/time.txt'], + $this->generatorToFileList($this->fixture->handle($negatedOr)) + ); + + $negatedOrDeMorgan = + (new InPath(new Path('foo/gen')))->notSpecification() + ->andSpecification((new InPath(new Path('foo/bar')))->notSpecification()); + + $this->assertEquals( + ['foo/lou/time.txt'], + $this->generatorToFileList($this->fixture->handle($negatedOrDeMorgan)) + ); + } + + public function testIfNegatedAndCullsExactMatches() : void + { + $files = [ + 'foo/bar/baz/whatever.txt', + 'foo/gen/pics/bottle.jpg', + 'foo/lou/time.txt', + ]; + $expected = [ + 'foo/gen/pics/bottle.jpg', + 'foo/lou/time.txt', + ]; + $this->fixture->setFilesystem($this->mockFileSystem($files, ['foo/bar'])); + + $negatedAnd = + (new InPath(new Path('foo/*'))) + ->andSpecification(new InPath(new Path('*/bar'))) + ->notSpecification(); + + $this->assertEquals( + $expected, + $this->generatorToFileList($this->fixture->handle($negatedAnd)) + ); + + $negatedAndDeMorgan = + (new InPath(new Path('foo/*')))->notSpecification() + ->orSpecification((new InPath(new Path('*/bar')))->notSpecification()); + + $this->assertEquals( + $expected, + $this->generatorToFileList($this->fixture->handle($negatedAndDeMorgan)) + ); + } + + /** * @covers ::handle * @covers ::setFilesystem @@ -97,6 +214,10 @@ public function testIfCorrectFilesAreBeingYielded() : void ->with($listContents1[0]) ->andReturn(true); + $isHidden->shouldReceive('canBeSatisfiedBySomethingBelow') + ->with($listContents1[0]) + ->andReturn(true); + $isHidden->shouldReceive('isSatisfiedBy') ->with($listContents1[1]) ->andReturn(false); @@ -127,4 +248,212 @@ public function testIfCorrectFilesAreBeingYielded() : void $this->assertSame($expected, $result); } + + public function testSubtreePruningOptimization() : void + { + $filesystem = $this->mockFileSystem( + [ + 'foo/bar/baz/file.txt', + 'foo/bar/baz/file2.txt', + 'foo/bar/baz/excluded/excluded.txt', + 'foo/bar/baz/excluded/culled/culled.txt', + 'foo/bar/baz/excluded/important/reincluded.txt', + 'foo/bar/file3.txt', + 'foo/lou/someSubdir/file4.txt', + 'foo/irrelevant1/', + 'irrelevant2/irrelevant3/irrelevantFile.txt', + ], + [ + 'foo/irrelevant1', + 'irrelevant2', + 'foo/bar/baz/excluded/culled', + ] + ); + + $inFooBar = new InPath(new Path('foo/bar')); + $inFooLou = new InPath(new Path('foo/lou')); + $inExcl = new InPath(new Path('foo/bar/baz/excl*')); + $inReincl = new InPath(new Path('foo/bar/baz/*/important')); + $spec = + $inFooBar + ->orSpecification($inFooLou) + ->andSpecification($inExcl->notSpecification()) + ->orSpecification($inReincl); + + $finder = $this->fixture; + $finder->setFilesystem($filesystem); + $generator = $finder->handle($spec); + + $expected = [ + 'foo/bar/baz/file.txt', + 'foo/bar/baz/file2.txt', + 'foo/bar/file3.txt', + 'foo/bar/baz/excluded/important/reincluded.txt', + 'foo/lou/someSubdir/file4.txt', + ]; + sort($expected); + + $this->assertEquals($expected, $this->generatorToFileList($generator)); + } + + public function testGlobSubtreePruning() : void + { + $filesystem = $this->mockFileSystem( + [ + 'foo/bar/baz/file.txt', + 'foo/bar/baz/file2.txt', + 'foo/bar/baz/excluded/excluded.txt', + 'foo/bar/baz/excluded/culled/culled.txt', + 'foo/bar/baz/excluded/important/reincluded.txt', + 'foo/bar/file3.txt', + 'foo/lou/someSubdir/file4.txt', + 'foo/irrelevant1/', + 'irrelevant2/irrelevant3/irrelevantFile.txt', + ], + [ + 'foo/irrelevant1', + 'irrelevant2', + 'foo/bar/baz/excluded/culled', + ] + ); + $txtInFooBar = new Glob('/foo/bar/**/*.txt'); + $inFooLou = new Glob('/foo/lou/**/*'); + $inExcl = new Glob('/foo/bar/baz/excl*/**/*'); + $inReincl = new Glob('/foo/bar/baz/*/important/**/*'); + $spec = $txtInFooBar + ->orSpecification($inFooLou) + ->andSpecification($inExcl->notSpecification()) + ->orSpecification($inReincl); + + $finder = $this->fixture; + $finder->setFilesystem($filesystem); + $generator = $finder->handle($spec); + $expected = [ + 'foo/bar/baz/file.txt', + 'foo/bar/baz/file2.txt', + 'foo/bar/file3.txt', + 'foo/bar/baz/excluded/important/reincluded.txt', + 'foo/lou/someSubdir/file4.txt', + ]; + sort($expected); + + $this->assertEquals($expected, $this->generatorToFileList($generator)); + } + + /** + * @return string[] + */ + protected function generatorToFileList(Generator $generator) : array + { + $actual = array_values(array_map(static function ($v) { + return $v['path']; + }, iterator_to_array($generator))); + sort($actual); + return $actual; + } + + /** + * @param string[] $pathList + * + * @return mixed[] + */ + protected function mockFileTree(array $pathList) : array + { + $result = [ + '.' => [ + 'type' => 'dir', + 'path' => '', + 'dirname' => '.', + 'basename' => '.', + 'filename' => '.', + 'contents' => [], + ], + ]; + foreach ($pathList as $path) { + $isFile = substr($path, -1) !== '/'; + $child = null; + while (true) { + $info = pathinfo($path); + if ($isFile) { + $isFile = false; + $result[$path] = [ + 'type' => 'file', + 'path' => $path, + 'dirname' => $info['dirname'], + 'basename' => $info['basename'], + 'filename' => $info['filename'], + 'extension' => $info['extension'], + ]; + } else { + $existed = true; + if (!isset($result[$path])) { + $existed = false; + $result[$path] = [ + 'type' => 'dir', + 'path' => $path, + 'basename' => $info['basename'], + 'filename' => $info['filename'], + 'contents' => [], + ]; + } + if ($child!==null) { + $result[$path]['contents'][] = $child; + } + if ($existed) { + break; + } + } + $child = $info['basename']; + $path = $info['dirname']; + } + } + return $result; + } + + /** + * @param mixed[] $fileTreeMock + * + * @return mixed[] + */ + protected function mockListContents(array $fileTreeMock, string $path) : array + { + $path = trim($path, '/'); + if (substr($path . ' ', 0, 2)==='./') { + $path = substr($path, 2); + } + if ($path==='') { + $path = '.'; + } + + if (!isset($fileTreeMock[$path]) || $fileTreeMock[$path]['type'] === 'file') { + return []; + } + $result = []; + foreach ($fileTreeMock[$path]['contents'] as $basename) { + $childPath = ($path==='.' ? '' : $path . '/') . $basename; + if (!isset($fileTreeMock[$childPath])) { + continue; + } + + $result[$basename] = $fileTreeMock[$childPath]; + } + return $result; + } + + /** + * @param string[] $paths + * @param string[] $pathsThatShouldNotBeListed + */ + protected function mockFileSystem(array $paths, array $pathsThatShouldNotBeListed = []) : FilesystemInterface + { + $fsData = $this->mockFileTree($paths); + $filesystem = m::mock(Filesystem::class); + $filesystem->shouldReceive('listContents') + ->zeroOrMoreTimes() + ->andReturnUsing(function (string $path) use ($fsData, $pathsThatShouldNotBeListed) : array { + $this->assertNotContains($path, $pathsThatShouldNotBeListed); + return array_values($this->mockListContents($fsData, $path)); + }); + return $filesystem; + } }