Skip to content

Commit

Permalink
Fix array expansion in GroupByHash
Browse files Browse the repository at this point in the history
A subtle float rounding error in rehash() caused the array size in
GroupByHash to sometimes quadruple in size rather than double. Fixed
this by calling FastUtil's arraySize only once and doubling the size
every time capacity is reached.
  • Loading branch information
Raghav Sethi committed Dec 11, 2015
1 parent 81eb77b commit 459f8d4
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 22 deletions.
Expand Up @@ -15,6 +15,7 @@


import com.facebook.presto.spi.Page; import com.facebook.presto.spi.Page;
import com.facebook.presto.spi.PageBuilder; import com.facebook.presto.spi.PageBuilder;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.block.Block;
import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.block.BlockBuilder;
import com.facebook.presto.spi.type.Type; import com.facebook.presto.spi.type.Type;
Expand All @@ -26,6 +27,7 @@
import java.util.List; import java.util.List;
import java.util.Optional; import java.util.Optional;


import static com.facebook.presto.spi.StandardErrorCode.INSUFFICIENT_RESOURCES;
import static com.facebook.presto.spi.type.BigintType.BIGINT; import static com.facebook.presto.spi.type.BigintType.BIGINT;
import static com.facebook.presto.spi.type.BooleanType.BOOLEAN; import static com.facebook.presto.spi.type.BooleanType.BOOLEAN;
import static com.facebook.presto.type.TypeUtils.NULL_HASH_CODE; import static com.facebook.presto.type.TypeUtils.NULL_HASH_CODE;
Expand All @@ -45,6 +47,7 @@ public class BigintGroupByHash
private final int maskChannel; private final int maskChannel;
private final boolean outputRawHash; private final boolean outputRawHash;


private int hashCapacity;
private int maxFill; private int maxFill;
private int mask; private int mask;


Expand All @@ -69,17 +72,17 @@ public BigintGroupByHash(int hashChannel, Optional<Integer> maskChannel, boolean
this.maskChannel = requireNonNull(maskChannel, "maskChannel is null").orElse(-1); this.maskChannel = requireNonNull(maskChannel, "maskChannel is null").orElse(-1);
this.outputRawHash = outputRawHash; this.outputRawHash = outputRawHash;


int hashSize = arraySize(expectedSize, FILL_RATIO); hashCapacity = arraySize(expectedSize, FILL_RATIO);


maxFill = calculateMaxFill(hashSize); maxFill = calculateMaxFill(hashCapacity);
mask = hashSize - 1; mask = hashCapacity - 1;
values = new LongBigArray(); values = new LongBigArray();
values.ensureCapacity(hashSize); values.ensureCapacity(hashCapacity);
groupIds = new IntBigArray(-1); groupIds = new IntBigArray(-1);
groupIds.ensureCapacity(hashSize); groupIds.ensureCapacity(hashCapacity);


valuesByGroupId = new LongBigArray(); valuesByGroupId = new LongBigArray();
valuesByGroupId.ensureCapacity(hashSize); valuesByGroupId.ensureCapacity(hashCapacity);
} }


@Override @Override
Expand Down Expand Up @@ -255,20 +258,24 @@ private int addNewGroup(int hashPosition, long value)


// increase capacity, if necessary // increase capacity, if necessary
if (nextGroupId >= maxFill) { if (nextGroupId >= maxFill) {
rehash(maxFill * 2); rehash();
} }
return groupId; return groupId;
} }


private void rehash(int size) private void rehash()
{ {
int newSize = arraySize(size + 1, FILL_RATIO); long newCapacityLong = hashCapacity * 2L;
if (newCapacityLong > Integer.MAX_VALUE) {
throw new PrestoException(INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed 1 billion entries");
}
int newCapacity = (int) newCapacityLong;


int newMask = newSize - 1; int newMask = newCapacity - 1;
LongBigArray newValues = new LongBigArray(); LongBigArray newValues = new LongBigArray();
newValues.ensureCapacity(newSize); newValues.ensureCapacity(newCapacity);
IntBigArray newGroupIds = new IntBigArray(-1); IntBigArray newGroupIds = new IntBigArray(-1);
newGroupIds.ensureCapacity(newSize); newGroupIds.ensureCapacity(newCapacity);


for (int groupId = 0; groupId < nextGroupId; groupId++) { for (int groupId = 0; groupId < nextGroupId; groupId++) {
if (groupId == nullGroupId) { if (groupId == nullGroupId) {
Expand All @@ -288,7 +295,8 @@ private void rehash(int size)
} }


mask = newMask; mask = newMask;
maxFill = calculateMaxFill(newSize); hashCapacity = newCapacity;
maxFill = calculateMaxFill(hashCapacity);
values = newValues; values = newValues;
groupIds = newGroupIds; groupIds = newGroupIds;


Expand All @@ -302,12 +310,12 @@ private static int getHashPosition(long rawHash, int mask)


private static int calculateMaxFill(int hashSize) private static int calculateMaxFill(int hashSize)
{ {
checkArgument(hashSize > 0, "hashSize must greater than 0"); checkArgument(hashSize > 0, "hashCapacity must greater than 0");
int maxFill = (int) Math.ceil(hashSize * FILL_RATIO); int maxFill = (int) Math.ceil(hashSize * FILL_RATIO);
if (maxFill == hashSize) { if (maxFill == hashSize) {
maxFill--; maxFill--;
} }
checkArgument(hashSize > maxFill, "hashSize must be larger than maxFill"); checkArgument(hashSize > maxFill, "hashCapacity must be larger than maxFill");
return maxFill; return maxFill;
} }
} }
Expand Up @@ -15,6 +15,7 @@


import com.facebook.presto.spi.Page; import com.facebook.presto.spi.Page;
import com.facebook.presto.spi.PageBuilder; import com.facebook.presto.spi.PageBuilder;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.block.Block;
import com.facebook.presto.spi.block.BlockBuilder; import com.facebook.presto.spi.block.BlockBuilder;
import com.facebook.presto.spi.type.Type; import com.facebook.presto.spi.type.Type;
Expand All @@ -31,6 +32,7 @@
import static com.facebook.presto.operator.SyntheticAddress.decodePosition; import static com.facebook.presto.operator.SyntheticAddress.decodePosition;
import static com.facebook.presto.operator.SyntheticAddress.decodeSliceIndex; import static com.facebook.presto.operator.SyntheticAddress.decodeSliceIndex;
import static com.facebook.presto.operator.SyntheticAddress.encodeSyntheticAddress; import static com.facebook.presto.operator.SyntheticAddress.encodeSyntheticAddress;
import static com.facebook.presto.spi.StandardErrorCode.INSUFFICIENT_RESOURCES;
import static com.facebook.presto.spi.type.BigintType.BIGINT; import static com.facebook.presto.spi.type.BigintType.BIGINT;
import static com.facebook.presto.spi.type.BooleanType.BOOLEAN; import static com.facebook.presto.spi.type.BooleanType.BOOLEAN;
import static com.facebook.presto.sql.gen.JoinCompiler.PagesHashStrategyFactory; import static com.facebook.presto.sql.gen.JoinCompiler.PagesHashStrategyFactory;
Expand Down Expand Up @@ -287,7 +289,7 @@ private int addNewGroup(int hashPosition, int position, Page page, int rawHash)


// increase capacity, if necessary // increase capacity, if necessary
if (nextGroupId >= maxFill) { if (nextGroupId >= maxFill) {
rehash(maxFill * 2); rehash();
} }
return groupId; return groupId;
} }
Expand All @@ -304,14 +306,18 @@ private void startNewPage()
} }
} }


private void rehash(int size) private void rehash()
{ {
int newSize = arraySize(size + 1, FILL_RATIO); long newCapacityLong = this.groupIdsByHash.length * 2L;
if (newCapacityLong > Integer.MAX_VALUE) {
throw new PrestoException(INSUFFICIENT_RESOURCES, "Size of hash table cannot exceed 1 billion entries");
}
int newCapacity = (int) newCapacityLong;


int newMask = newSize - 1; int newMask = newCapacity - 1;
long[] newKey = new long[newSize]; long[] newKey = new long[newCapacity];
Arrays.fill(newKey, -1); Arrays.fill(newKey, -1);
int[] newValue = new int[newSize]; int[] newValue = new int[newCapacity];


int oldIndex = 0; int oldIndex = 0;
for (int groupId = 0; groupId < nextGroupId; groupId++) { for (int groupId = 0; groupId < nextGroupId; groupId++) {
Expand All @@ -336,7 +342,7 @@ private void rehash(int size)
} }


this.mask = newMask; this.mask = newMask;
this.maxFill = calculateMaxFill(newSize); this.maxFill = calculateMaxFill(newCapacity);
this.groupAddressByHash = newKey; this.groupAddressByHash = newKey;
this.groupIdsByHash = newValue; this.groupIdsByHash = newValue;
groupAddressByGroupId.ensureCapacity(maxFill); groupAddressByGroupId.ensureCapacity(maxFill);
Expand Down

0 comments on commit 459f8d4

Please sign in to comment.