From a04d24cfcd744283691052ad466bbcfa83bea6e7 Mon Sep 17 00:00:00 2001 From: Jason Marble Date: Thu, 23 Oct 2025 20:17:14 -0700 Subject: [PATCH] Add duplicateStrings() method to collections Adds optimized string duplicate detection as a companion to uniqueStrings(). Uses isset() hash lookups for O(n) performance (8-40x faster than duplicates() for string-based operations). Supports keys, closures, and nested property access. --- src/Illuminate/Collections/Collection.php | 25 +++ src/Illuminate/Collections/LazyCollection.php | 25 +++ tests/Support/SupportCollectionTest.php | 190 ++++++++++++++++++ 3 files changed, 240 insertions(+) diff --git a/src/Illuminate/Collections/Collection.php b/src/Illuminate/Collections/Collection.php index 08a852fe19fa..3eee7b9ed35d 100644 --- a/src/Illuminate/Collections/Collection.php +++ b/src/Illuminate/Collections/Collection.php @@ -367,6 +367,31 @@ public function duplicatesStrict($callback = null) return $this->duplicates($callback, true); } + /** + * Return only duplicate items from the collection array using string comparison. + * + * @param (callable(TValue, TKey): string)|string|null $key + * @return static + */ + public function duplicateStrings($key = null) + { + $items = is_null($key) ? $this->items : $this->map($this->valueRetriever($key)); + + $exists = []; + + return $this->filter(function ($item, $itemKey) use ($items, $key, &$exists) { + $id = is_null($key) ? $item : $items[$itemKey]; + + if (isset($exists[$id])) { + return true; + } + + $exists[$id] = true; + + return false; + }); + } + /** * Get the comparison function to detect duplicates. * diff --git a/src/Illuminate/Collections/LazyCollection.php b/src/Illuminate/Collections/LazyCollection.php index 95b61720afc4..5d70e17a2d2c 100644 --- a/src/Illuminate/Collections/LazyCollection.php +++ b/src/Illuminate/Collections/LazyCollection.php @@ -441,6 +441,31 @@ public function duplicatesStrict($callback = null) return $this->passthru('duplicatesStrict', func_get_args()); } + /** + * Return only duplicate items from the collection using string comparison. + * + * @param (callable(TValue, TKey): string)|string|null $key + * @return static + */ + public function duplicateStrings($key = null) + { + $callback = $this->valueRetriever($key); + + return new static(function () use ($callback) { + $exists = []; + + foreach ($this as $key => $item) { + $id = $callback($item, $key); + + if (isset($exists[$id])) { + yield $key => $item; + } else { + $exists[$id] = true; + } + } + }); + } + /** * Get all items except for those with the specified keys. * diff --git a/tests/Support/SupportCollectionTest.php b/tests/Support/SupportCollectionTest.php index 81a6a3fd7dbe..567896c54a8e 100755 --- a/tests/Support/SupportCollectionTest.php +++ b/tests/Support/SupportCollectionTest.php @@ -1552,6 +1552,196 @@ public function testDuplicatesWithStrict($collection) $this->assertSame([2 => $expected, 5 => '2'], $duplicates); } + #[DataProvider('collectionClassProvider')] + public function testDuplicateStrings($collection) + { + $c = new $collection(['Hello', 'World', 'World', 'Hello']); + $this->assertEquals(['World', 'Hello'], $c->duplicateStrings()->values()->all()); + + $c = new $collection(['user@example.com', 'admin@example.com', 'user@example.com']); + $this->assertEquals(['user@example.com'], $c->duplicateStrings()->values()->all()); + + $c = new $collection(['SKU-001', 'SKU-002', 'SKU-001', 'SKU-003']); + $this->assertEquals(['SKU-001'], $c->duplicateStrings()->values()->all()); + + $c = new $collection(['5', '10', '5', '3A', '5', '5']); + $this->assertEquals(['5', '5', '5'], $c->duplicateStrings()->values()->all()); + + $c = new $collection([ + 'a' => 'foo', + 'b' => 'bar', + 'c' => 'foo', + 'd' => 'baz', + ]); + $this->assertEquals([ + 'c' => 'foo', + ], $c->duplicateStrings()->all()); + } + + #[DataProvider('collectionClassProvider')] + public function testDuplicateStringsWithKey($collection) + { + $c = new $collection([ + 1 => ['id' => 1, 'email' => 'taylor@example.com', 'name' => 'Taylor'], + 2 => ['id' => 2, 'email' => 'abigail@example.com', 'name' => 'Abigail'], + 3 => ['id' => 3, 'email' => 'taylor@example.com', 'name' => 'Taylor Otwell'], + 4 => ['id' => 4, 'email' => 'jess@example.com', 'name' => 'Jess'], + ]); + + $this->assertEquals([ + 3 => ['id' => 3, 'email' => 'taylor@example.com', 'name' => 'Taylor Otwell'], + ], $c->duplicateStrings('email')->all()); + + $c = new $collection([ + ['user' => ['email' => 'foo@example.com']], + ['user' => ['email' => 'bar@example.com']], + ['user' => ['email' => 'foo@example.com']], + ]); + + $result = $c->duplicateStrings('user.email')->values()->all(); + $this->assertCount(1, $result); + $this->assertEquals('foo@example.com', $result[0]['user']['email']); + } + + #[DataProvider('collectionClassProvider')] + public function testDuplicateStringsWithCallback($collection) + { + $c = new $collection([ + 1 => ['id' => 1, 'sku' => 'SKU-001', 'name' => 'Product 1'], + 2 => ['id' => 2, 'sku' => 'SKU-002', 'name' => 'Product 2'], + 3 => ['id' => 3, 'sku' => 'SKU-001', 'name' => 'Product 1 Duplicate'], + 4 => ['id' => 4, 'sku' => 'SKU-003', 'name' => 'Product 3'], + ]); + + $this->assertEquals([ + 3 => ['id' => 3, 'sku' => 'SKU-001', 'name' => 'Product 1 Duplicate'], + ], $c->duplicateStrings(function ($item) { + return $item['sku']; + })->all()); + + $c = new $collection([ + ['first' => 'Taylor', 'last' => 'Otwell'], + ['first' => 'Abigail', 'last' => 'Otwell'], + ['first' => 'Taylor', 'last' => 'Otwell'], + ['first' => 'Taylor', 'last' => 'Swift'], + ]); + + $this->assertEquals([ + ['first' => 'Taylor', 'last' => 'Otwell'], + ], $c->duplicateStrings(function ($item) { + return $item['first'].$item['last']; + })->values()->all()); + + $c = new $collection([ + 'a' => ['code' => 'A1'], + 'b' => ['code' => 'B2'], + 'c' => ['code' => 'A1'], + 'd' => ['code' => 'D4'], + ]); + + $result = $c->duplicateStrings(function ($item, $key) { + return $item['code']; + })->all(); + + $this->assertCount(1, $result); + $this->assertArrayHasKey('c', $result); + } + + #[DataProvider('collectionClassProvider')] + public function testDuplicateStringsPreservesKeys($collection) + { + $c = new $collection([ + 10 => 'apple', + 20 => 'banana', + 30 => 'apple', + 40 => 'cherry', + ]); + + $result = $c->duplicateStrings()->all(); + $this->assertEquals([ + 30 => 'apple', + ], $result); + + $c = new $collection([ + 'first' => 'foo', + 'second' => 'bar', + 'third' => 'foo', + 'fourth' => 'baz', + ]); + + $result = $c->duplicateStrings()->all(); + $this->assertEquals([ + 'third' => 'foo', + ], $result); + } + + #[DataProvider('collectionClassProvider')] + public function testDuplicateStringsEmptyCollection($collection) + { + $c = new $collection([]); + $this->assertEquals([], $c->duplicateStrings()->all()); + } + + #[DataProvider('collectionClassProvider')] + public function testDuplicateStringsSingleItem($collection) + { + $c = new $collection(['only-one']); + $this->assertEquals([], $c->duplicateStrings()->all()); + } + + #[DataProvider('collectionClassProvider')] + public function testDuplicateStringsNoDuplicates($collection) + { + $c = new $collection(['apple', 'banana', 'cherry', 'date']); + $this->assertEquals([], $c->duplicateStrings()->all()); + } + + #[DataProvider('collectionClassProvider')] + public function testDuplicateStringsAllDuplicates($collection) + { + $c = new $collection(['same', 'same', 'same', 'same']); + $this->assertEquals(['same', 'same', 'same'], $c->duplicateStrings()->values()->all()); + } + + #[DataProvider('collectionClassProvider')] + public function testDuplicateStringsMultipleSetsOfDuplicates($collection) + { + $c = new $collection(['a', 'b', 'a', 'c', 'b', 'a']); + $result = $c->duplicateStrings()->values()->all(); + + $this->assertCount(3, $result); + $this->assertEquals(['a', 'b', 'a'], $result); + } + + #[DataProvider('collectionClassProvider')] + public function testDuplicateStringsStringCoercion($collection) + { + $c = new $collection([1, 2, '2', 3, '3', '3']); + $result = $c->duplicateStrings()->values()->all(); + + $this->assertEquals(['2', '3', '3'], $result); + } + + #[DataProvider('collectionClassProvider')] + public function testDuplicateStringsWithMixedCasing($collection) + { + $c = new $collection(['Apple', 'apple', 'APPLE', 'banana']); + $result = $c->duplicateStrings()->all(); + + $this->assertEquals([], $result); + + $c = new $collection([ + ['name' => 'Apple'], + ['name' => 'apple'], + ['name' => 'APPLE'], + ['name' => 'banana'], + ]); + + $result = $c->duplicateStrings(fn ($item) => strtolower($item['name']))->values()->all(); + + $this->assertCount(2, $result); + } + #[DataProvider('collectionClassProvider')] public function testEach($collection) {