Skip to content

Commit

Permalink
Make PrimitiveLongCollections.deduplicate work in a single pass.
Browse files Browse the repository at this point in the history
This can dramatically improve the deduplication performance for large arrays, since the time complexity is now always O(N).
  • Loading branch information
chrisvest committed Sep 5, 2018
1 parent c6a9755 commit 8a9c4a0
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 14 deletions.
Expand Up @@ -349,24 +349,23 @@ private static <T, C extends Collection<T>> void addUnique( C collection, T item
*/
public static long[] deduplicate( long[] values )
{
int unique = 0;
for ( int i = 0; i < values.length; i++ )
if ( values.length < 2 )
{
long value = values[i];
for ( int j = 0; j < unique; j++ )
return values;
}
long lastValue = values[0];
int uniqueIndex = 1;
for ( int i = 1; i < values.length; i++ )
{
long currentValue = values[i];
if ( currentValue != lastValue )
{
if ( value == values[j] )
{
value = -1; // signal that this value is not unique
break; // we will not find more than one conflict
}
}
if ( value != -1 )
{ // this has to be done outside the inner loop, otherwise we'd never accept a single one...
values[unique++] = values[i];
values[uniqueIndex] = currentValue;
lastValue = currentValue;
uniqueIndex++;
}
}
return unique < values.length ? Arrays.copyOf( values, unique ) : values;
return uniqueIndex < values.length ? Arrays.copyOf( values, uniqueIndex ) : values;
}

/**
Expand Down
Expand Up @@ -25,9 +25,12 @@
import org.junit.jupiter.api.Test;

import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.TreeSet;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;
Expand Down Expand Up @@ -156,6 +159,31 @@ void shouldDeduplicate()
assertArrayEquals( new long[] {1L, 2L, 5L, 6L}, deduped );
}

@Test
void shouldDeduplicateWithRandomArrays()
{
int arrayLength = 5000;
int iterations = 10;
for ( int i = 0; i < iterations; i++ )
{
long[] array = ThreadLocalRandom.current().longs( arrayLength, 0, arrayLength ).sorted().toArray();
long[] dedupedActual = PrimitiveLongCollections.deduplicate( array );
TreeSet<Long> set = new TreeSet<>();
for ( long value : array )
{
set.add( value );
}
long[] dedupedExpected = new long[set.size()];
Iterator<Long> itr = set.iterator();
for ( int j = 0; j < dedupedExpected.length; j++ )
{
assertTrue( itr.hasNext() );
dedupedExpected[j] = itr.next();
}
assertArrayEquals( dedupedExpected, dedupedActual );
}
}

@Test
void shouldNotContinueToCallNextOnHasNextFalse()
{
Expand Down

0 comments on commit 8a9c4a0

Please sign in to comment.