From f476ff16b77b6c714a29eea5925e9b7aab5658f2 Mon Sep 17 00:00:00 2001 From: Roman Parpalak Date: Tue, 14 Nov 2023 21:14:54 +0200 Subject: [PATCH] Optimizations in PHP code. --- src/S2/Rose/Entity/ExternalId.php | 2 +- src/S2/Rose/Entity/ResultTrace.php | 26 ++++----------- src/S2/Rose/Entity/WordPositionContainer.php | 35 ++++++++++++++++---- src/S2/Rose/Indexer.php | 2 +- src/S2/Rose/Storage/Database/PdoStorage.php | 10 ++++-- 5 files changed, 45 insertions(+), 30 deletions(-) diff --git a/src/S2/Rose/Entity/ExternalId.php b/src/S2/Rose/Entity/ExternalId.php index 349b1a2..0239d74 100644 --- a/src/S2/Rose/Entity/ExternalId.php +++ b/src/S2/Rose/Entity/ExternalId.php @@ -22,7 +22,7 @@ public function __construct($id, ?int $instanceId = null) throw new InvalidArgumentException('Instance id must be positive.'); } - if (!is_string($id) && !is_int($id) && !is_float($id)) { + if (!\is_string($id) && !\is_int($id) && !\is_float($id)) { throw new InvalidArgumentException('External id must be string or int or float.'); } diff --git a/src/S2/Rose/Entity/ResultTrace.php b/src/S2/Rose/Entity/ResultTrace.php index 91319fc..36ce2b0 100644 --- a/src/S2/Rose/Entity/ResultTrace.php +++ b/src/S2/Rose/Entity/ResultTrace.php @@ -8,15 +8,13 @@ class ResultTrace { - protected $data = []; + protected array $data = []; /** - * @param string $word - * @param string $serializedExtId * @param float[]|array $weights * @param int[] $positions */ - public function addWordWeight($word, $serializedExtId, array $weights, $positions) + public function addWordWeight(string $word, string $serializedExtId, array $weights, array $positions): void { $this->data[$serializedExtId]['fulltext ' . $word][] = [ sprintf( @@ -28,33 +26,21 @@ public function addWordWeight($word, $serializedExtId, array $weights, $position } /** - * @param string $word - * @param string $serializedExtId * @param float[]|array $weights */ - public function addKeywordWeight($word, $serializedExtId, array $weights) + public function addKeywordWeight(string $word, string $serializedExtId, array $weights): void { $this->data[$serializedExtId]['keyword ' . $word][] = [ (string)array_product($weights) => $weights, ]; } - /** - * @param string $word1 - * @param string $word2 - * @param string $serializedExtId - * @param float $weight - * @param int $distance - */ - public function addNeighbourWeight($word1, $word2, $serializedExtId, $weight, $distance) + public function addNeighbourWeight(string $word1, string $word2, string $serializedExtId, float $weight, int $distance): void { - $this->data[$serializedExtId]['fulltext ' . $word1 . ' - ' . $word2][] = sprintf('%s: matches are close (shift = %s)', $weight, $distance); + $this->data[$serializedExtId]['fulltext ' . $word1 . ' - ' . $word2][] = $weight . ': matches are close (shift = ' . $distance . ')'; } - /** - * @return array - */ - public function toArray() + public function toArray(): array { return $this->data; } diff --git a/src/S2/Rose/Entity/WordPositionContainer.php b/src/S2/Rose/Entity/WordPositionContainer.php index 8310e88..0fda7fa 100644 --- a/src/S2/Rose/Entity/WordPositionContainer.php +++ b/src/S2/Rose/Entity/WordPositionContainer.php @@ -27,13 +27,15 @@ public function addWordAt(string $word, int $position): self { $this->data[$word][] = $position; + sort($this->data[$word]); // TODO make more reliable requirement of input arrays to be sorted. + return $this; } public function compareWith(self $referenceContainer): array { $wordMap = array_keys($this->data); - $len = count($wordMap); + $len = \count($wordMap); $result = []; /** @noinspection ForeachInvariantsInspection */ @@ -57,17 +59,38 @@ public function compareWith(self $referenceContainer): array } /** + * This method uses linear algorithm, therefore input arrays must be sorted. + * Otherwise, the output is incorrect. + * * @param int[] $a1 * @param int[] $a2 + * + * @return int It's important to return a signed value, not an absolute value. */ protected static function compareArrays(array $a1, array $a2, int $shift): int { + $len1 = \count($a1); + $len2 = \count($a2); + $result = self::INFINITY; - foreach ($a1 as $x) { - foreach ($a2 as $y) { - if (abs($y - $x - $shift) < abs($result)) { - $result = $y - $x - $shift; - } + $index1 = 0; + $index2 = 0; + + while ($index1 < $len1 && $index2 < $len2) { + $diff = $a2[$index2] - $a1[$index1] - $shift; + + if ($diff === 0) { + return 0; + } + + if (abs($result) > abs($diff)) { + $result = $diff; + } + + if ($diff < 0) { + $index2++; + } else { + $index1++; } } diff --git a/src/S2/Rose/Indexer.php b/src/S2/Rose/Indexer.php index 9e6ff32..af53297 100644 --- a/src/S2/Rose/Indexer.php +++ b/src/S2/Rose/Indexer.php @@ -191,7 +191,7 @@ protected function doIndex(Indexable $indexable): void * * this method returns * - * [10 => 'well-known', '10.001' => 'well', '10.002' => 'known', 11 => 'fact'] + * [10 => 'well-known', 11 => 'fact', '10.001' => 'well', '10.002' => 'known'] * * @param array $words * @return array diff --git a/src/S2/Rose/Storage/Database/PdoStorage.php b/src/S2/Rose/Storage/Database/PdoStorage.php index 9955a7a..57d74f8 100644 --- a/src/S2/Rose/Storage/Database/PdoStorage.php +++ b/src/S2/Rose/Storage/Database/PdoStorage.php @@ -117,8 +117,14 @@ public function addToFulltextIndex(array $titleWords, array $keywords, array $co } $internalId = $this->getInternalIdFromExternalId($externalId); - $wordIds = $this->getWordIds(array_merge($contentWords, $titleWords, $keywords)); - + $wordIds = $this->getWordIds(array_merge(array_values($contentWords), array_values($titleWords), array_values($keywords))); + + /** + * @see \S2\Rose\Entity\WordPositionContainer::compareArrays for sorting requirement + */ + ksort($titleWords); + ksort($keywords); + ksort($contentWords); $this->getRepository()->insertFulltext($titleWords, $keywords, $contentWords, $wordIds, $internalId); }