From a2109c1c3a69f5231073ae819e49af7a1274b78f Mon Sep 17 00:00:00 2001 From: przepompownia Date: Wed, 3 Dec 2025 00:45:48 +0100 Subject: [PATCH] Add fuzzy matching to name completion Co-authored-by: @mamazu Resolves #2562 --- doc/reference/configuration.rst | 15 +++++ .../ReferenceFinder/IndexedNameSearcher.php | 8 ++- lib/Indexer/Extension/IndexerExtension.php | 7 +- lib/Indexer/Model/Query/Criteria.php | 6 ++ .../Query/Criteria/ShortNameMatchesTo.php | 65 +++++++++++++++++++ .../IndexedNameSearcherTest.php | 12 ++-- .../Criteria/ShortNameMatchesToBench.php | 54 +++++++++++++++ .../Query/Criteria/ShortNameMatchesToTest.php | 47 ++++++++++++++ 8 files changed, 204 insertions(+), 10 deletions(-) create mode 100644 lib/Indexer/Model/Query/Criteria/ShortNameMatchesTo.php create mode 100644 lib/Indexer/Tests/Benchmark/Model/Query/Criteria/ShortNameMatchesToBench.php create mode 100644 lib/Indexer/Tests/Unit/Model/Query/Criteria/ShortNameMatchesToTest.php diff --git a/doc/reference/configuration.rst b/doc/reference/configuration.rst index f8df942c19..e331abe676 100644 --- a/doc/reference/configuration.rst +++ b/doc/reference/configuration.rst @@ -1896,6 +1896,21 @@ When searching the index exclude records whose fully qualified names match any o **Default**: ``[]`` +.. _param_indexer.searcher_semi_fuzzy: + + +``indexer.searcher_semi_fuzzy`` +""""""""""""""""""""""""""""""" + + +Type: boolean + + +How to match short names: by default only the leading part is matched (case insensitive). If true, the leading parts of subsequent subwords also match (camel/underscore, case sensitive). For example `InEx` and `index` match `IndexerExtension` but `inex` does not, `arw` matches `array_walk`. + + +**Default**: ``false`` + .. _ObjectRendererExtension: diff --git a/lib/Indexer/Adapter/ReferenceFinder/IndexedNameSearcher.php b/lib/Indexer/Adapter/ReferenceFinder/IndexedNameSearcher.php index c631d4e48f..1dc0e6a3ad 100644 --- a/lib/Indexer/Adapter/ReferenceFinder/IndexedNameSearcher.php +++ b/lib/Indexer/Adapter/ReferenceFinder/IndexedNameSearcher.php @@ -15,8 +15,10 @@ class IndexedNameSearcher implements NameSearcher { - public function __construct(private SearchClient $client) - { + public function __construct( + private SearchClient $client, + private bool $semiFuzzy, + ) { } /** @@ -30,7 +32,7 @@ public function search(string $name, ?string $type = null): Generator $fullyQualified = str_starts_with($name, '\\'); - $criteria = $fullyQualified ? Criteria::fqnBeginsWith(substr($name, 1)) : Criteria::shortNameBeginsWith($name); + $criteria = $fullyQualified ? Criteria::fqnBeginsWith(substr($name, 1)) : Criteria::shortNameMatchesTo($name, $this->semiFuzzy); $typeCriteria = $this->resolveTypeCriteria($type); diff --git a/lib/Indexer/Extension/IndexerExtension.php b/lib/Indexer/Extension/IndexerExtension.php index 868f27591f..d9f37c87ee 100644 --- a/lib/Indexer/Extension/IndexerExtension.php +++ b/lib/Indexer/Extension/IndexerExtension.php @@ -67,6 +67,7 @@ class IndexerExtension implements Extension public const PARAM_IMPLEMENTATIONS_DEEP_REFERENCES = 'indexer.implementation_finder.deep'; public const PARAM_STUB_PATHS = 'indexer.stub_paths'; public const PARAM_SUPPORTED_EXTENSIONS = 'indexer.supported_extensions'; + public const PARAM_SEARCHER_SEMI_FUZZY = 'indexer.searcher_semi_fuzzy'; public const TAG_WATCHER = 'indexer.watcher'; private const SERVICE_INDEXER_EXCLUDE_PATTERNS = 'indexer.exclude_patterns'; private const SERVICE_INDEXER_INCLUDE_PATTERNS = 'indexer.include_patterns'; @@ -101,6 +102,7 @@ public function configure(Resolver $schema): void self::PARAM_IMPLEMENTATIONS_DEEP_REFERENCES => true, self::PARAM_SUPPORTED_EXTENSIONS => ['php', 'phar'], self::PARAM_SEARCH_INCLUDE_PATTERNS => [], + self::PARAM_SEARCHER_SEMI_FUZZY => false, ]); $schema->setDescriptions([ self::PARAM_ENABLED_WATCHERS => 'List of allowed watchers. The first watcher that supports the current system will be used', @@ -117,6 +119,7 @@ public function configure(Resolver $schema): void self::PARAM_IMPLEMENTATIONS_DEEP_REFERENCES => 'Recurse over class implementations to resolve all class implementations (not just the classes directly implementing the subject)', self::PARAM_SUPPORTED_EXTENSIONS => 'File extensions (e.g. `php`) for files that should be indexed', self::PARAM_SEARCH_INCLUDE_PATTERNS => 'When searching the index exclude records whose fully qualified names match any of these regex patterns (use to exclude suggestions from search results). Namespace separators must be escaped as `\\\\\\\\` for example `^Foo\\\\\\\\` to include all namespaces whose first segment is `Foo`', + self::PARAM_SEARCHER_SEMI_FUZZY => 'How to match short names: by default only the leading part is matched (case insensitive). If true, the leading parts of subsequent subwords also match (camel/underscore, case sensitive). For example `InEx` and `index` match `IndexerExtension` but `inex` does not, `arw` matches `array_walk`.', ]); $schema->setTypes([ self::PARAM_ENABLED_WATCHERS => 'array', @@ -133,6 +136,7 @@ public function configure(Resolver $schema): void self::PARAM_IMPLEMENTATIONS_DEEP_REFERENCES => 'boolean', self::PARAM_SUPPORTED_EXTENSIONS => 'array', self::PARAM_SEARCH_INCLUDE_PATTERNS => 'array', + self::PARAM_SEARCHER_SEMI_FUZZY => 'boolean', ]); } @@ -296,7 +300,8 @@ private function registerReferenceFinderAdapters(ContainerBuilder $container): v $container->register(IndexedNameSearcher::class, function (Container $container) { return new IndexedNameSearcher( - $container->get(SearchClient::class) + $container->get(SearchClient::class), + $container->parameter(self::PARAM_SEARCHER_SEMI_FUZZY)->bool(), ); }, [ ReferenceFinderExtension::TAG_NAME_SEARCHER => []]); } diff --git a/lib/Indexer/Model/Query/Criteria.php b/lib/Indexer/Model/Query/Criteria.php index 0952ea22fa..ad0a22145c 100644 --- a/lib/Indexer/Model/Query/Criteria.php +++ b/lib/Indexer/Model/Query/Criteria.php @@ -6,6 +6,7 @@ use Phpactor\Indexer\Model\Query\Criteria\FileAbsolutePathBeginsWith; use Phpactor\Indexer\Model\Query\Criteria\HasFlags; use Phpactor\Indexer\Model\Query\Criteria\IsClassType; +use Phpactor\Indexer\Model\Query\Criteria\ShortNameMatchesTo; use Phpactor\Indexer\Model\Query\Criteria\ShortNameContains; use Phpactor\Indexer\Model\Query\Criteria\ExactShortName; use Phpactor\Indexer\Model\Query\Criteria\FqnBeginsWith; @@ -32,6 +33,11 @@ public static function shortNameBeginsWith(string $name): ShortNameBeginsWith return new ShortNameBeginsWith($name); } + public static function shortNameMatchesTo(string $name, bool $semiFuzzy): ShortNameMatchesTo + { + return new ShortNameMatchesTo($name, $semiFuzzy); + } + public static function fqnBeginsWith(string $name): FqnBeginsWith { return new FqnBeginsWith($name); diff --git a/lib/Indexer/Model/Query/Criteria/ShortNameMatchesTo.php b/lib/Indexer/Model/Query/Criteria/ShortNameMatchesTo.php new file mode 100644 index 0000000000..6000a00aeb --- /dev/null +++ b/lib/Indexer/Model/Query/Criteria/ShortNameMatchesTo.php @@ -0,0 +1,65 @@ +name) { + return false; + } + + if (str_starts_with(mb_strtolower($record->shortName()), mb_strtolower($this->name))) { + return true; + } + + if (false === $this->semiFuzzy) { + return false; + } + + return $this->semiFuzzySearch($this->name, $record->shortName()); + } + + private function semiFuzzySearch(string $search, string $subject): bool + { + $index = -1; + + foreach (mb_str_split($search) as $char) { + $newIndex = mb_strpos($subject, $char, $index + 1); + + if (false === $newIndex) { + return false; + } + + if ($newIndex === $index + 1 || ctype_upper($char) || $char === '_') { + $index = $newIndex; + continue; + } + + $underscoreIndex = mb_strpos($subject, '_', $index + 1); + + if (false === $underscoreIndex || $newIndex !== $underscoreIndex + 1) { + return false; + } + + $index = $newIndex; + } + + return true; + } +} diff --git a/lib/Indexer/Tests/Adapter/ReferenceFinder/IndexedNameSearcherTest.php b/lib/Indexer/Tests/Adapter/ReferenceFinder/IndexedNameSearcherTest.php index 8f865b9f92..9b5d6a8b58 100644 --- a/lib/Indexer/Tests/Adapter/ReferenceFinder/IndexedNameSearcherTest.php +++ b/lib/Indexer/Tests/Adapter/ReferenceFinder/IndexedNameSearcherTest.php @@ -30,7 +30,7 @@ public function testSearcherWithAbsolute(): void $this->workspace()->put('project/Barfoo.php', 'indexAgent(); $agent->indexer()->getJob()->run(); - $searcher = new IndexedNameSearcher($agent->search()); + $searcher = new IndexedNameSearcher($agent->search(), false); $results = iterator_to_array($searcher->search('\Foo')); @@ -42,7 +42,7 @@ public function testSearcher(): void $this->workspace()->put('project/Foobar.php', 'indexAgent(); $agent->indexer()->getJob()->run(); - $searcher = new IndexedNameSearcher($agent->search()); + $searcher = new IndexedNameSearcher($agent->search(), false); foreach ($searcher->search('Foo') as $result) { assert($result instanceof NameSearchResult); @@ -57,7 +57,7 @@ public function testSearcherForInterface(): void $this->workspace()->put('project/Foobar.php', 'indexAgent(); $agent->indexer()->getJob()->run(); - $searcher = new IndexedNameSearcher($agent->search()); + $searcher = new IndexedNameSearcher($agent->search(), false); foreach ($searcher->search('Foo', NameSearcherType::INTERFACE) as $result) { assert($result instanceof NameSearchResult); @@ -75,7 +75,7 @@ public function testSearcherForEnum(): void $this->workspace()->put('project/Foobar.php', 'indexAgent(); $agent->indexer()->getJob()->run(); - $searcher = new IndexedNameSearcher($agent->search()); + $searcher = new IndexedNameSearcher($agent->search(), false); foreach ($searcher->search('Foo', NameSearcherType::ENUM) as $result) { assert($result instanceof NameSearchResult); @@ -93,7 +93,7 @@ public function testSearcherForTrait(): void $this->workspace()->put('project/Foobar.php', 'indexAgent(); $agent->indexer()->getJob()->run(); - $searcher = new IndexedNameSearcher($agent->search()); + $searcher = new IndexedNameSearcher($agent->search(), false); foreach ($searcher->search('Foo', NameSearcherType::TRAIT) as $result) { assert($result instanceof NameSearchResult); @@ -119,7 +119,7 @@ public function testSearcherForAttribute(string $query, string $type, array $exp $agent = $this->indexAgent(); $agent->indexer()->getJob()->run(); - $searcher = new IndexedNameSearcher($agent->search()); + $searcher = new IndexedNameSearcher($agent->search(), false); $resultPaths = []; $offset = 1 + mb_strlen($this->workspace()->path()); diff --git a/lib/Indexer/Tests/Benchmark/Model/Query/Criteria/ShortNameMatchesToBench.php b/lib/Indexer/Tests/Benchmark/Model/Query/Criteria/ShortNameMatchesToBench.php new file mode 100644 index 0000000000..5428150106 --- /dev/null +++ b/lib/Indexer/Tests/Benchmark/Model/Query/Criteria/ShortNameMatchesToBench.php @@ -0,0 +1,54 @@ +isSatisfiedBy($record); + } + + /** + * @ParamProviders("provideSearch") + * @Revs(1000) + * @Iterations(5) + * @param array{string, string} $data + */ + public function benchShortNameMatchesTo(array $data): void + { + $criteria = new ShortNameMatchesTo($data[0], true); + + $record = ClassRecord::fromName($data[1]); + + $criteria->isSatisfiedBy($record); + } + + /** + * @return Generator + */ + public function provideSearch(): Generator + { + yield 'leading substring' => ['Bag', 'Foobar\\Bagno']; + yield 'empty search' => ['', 'Foobar\\Bagno']; + yield 'subsequence' => ['bgn', 'Foobar\\Bagno']; + yield 'multibyte' => ['☠😼', 'Foobar\\😼☠k😼']; + } +} diff --git a/lib/Indexer/Tests/Unit/Model/Query/Criteria/ShortNameMatchesToTest.php b/lib/Indexer/Tests/Unit/Model/Query/Criteria/ShortNameMatchesToTest.php new file mode 100644 index 0000000000..a1ae9061a1 --- /dev/null +++ b/lib/Indexer/Tests/Unit/Model/Query/Criteria/ShortNameMatchesToTest.php @@ -0,0 +1,47 @@ +isSatisfiedBy($record)); + self::assertSame($expectedFuzzy, (new ShortNameMatchesTo($name, true))->isSatisfiedBy($record)); + } + + /** + * @return Generator + */ + public static function provideSearch(): Generator + { + yield 'empty search' => ['', 'Foobar\\Bagno', false, false]; + yield 'no match' => ['Barfoo', 'Foobar\\Bazfoo', false, false]; + yield 'matches exact' => ['Barfoo', 'Foobar\\Barfoo', true, true]; + yield 'substring' => ['Bag', 'Foobar\\Bagno', true, true]; + yield 'subsequence' => ['bgn', 'Foobar\\Bagno', false, false]; + yield 'negative camel 1' => ['Shame', 'ShortNameBeginsWith', false, false]; + yield 'tolower leading' => ['short', 'ShortNameBeginsWith', true, true]; + yield 'camel 1' => ['ShBeg', 'ShortNameBeginsWith', false, true]; + yield 'camel 2' => ['hBeg', 'ShortNameBeginsWith', false, false]; + yield 'camel 3' => ['BegWit', 'ShortNameBeginsWith', false, true]; + yield 'camel only upper' => ['SBW', 'ShortNameBeginsWith', false, true]; + yield 'underscore in subject and phrase' => ['fil_g_c', 'file_get_contents', false, true]; + yield 'underscore only in subject' => ['filgc', 'file_get_contents', false, true]; + yield 'underscore in subject, negative' => ['fits', 'file_get_contents', false, false]; + yield 'multibyte' => ['😼☠', 'Foobar\\😼☠k😼', true, true]; + yield 'lower first' => ['gNT', 'getDescendantNodesAndTokens', false, true]; + yield 'only upper in subject' => ['tr', 'TARGET_CLASS', false, false]; + yield 'only upper in subject 2' => ['tc', 'TARGET_CLASS', false, false]; + } +}