Skip to content

Commit

Permalink
Elide VariableWidthBlock null array if all values non-null
Browse files Browse the repository at this point in the history
  • Loading branch information
dain committed Jun 19, 2018
1 parent 5936c77 commit 89673b0
Show file tree
Hide file tree
Showing 10 changed files with 78 additions and 38 deletions.
Expand Up @@ -91,13 +91,13 @@ public void testVarcharSerializedSize()
// empty page // empty page
Page page = new Page(builder.build()); Page page = new Page(builder.build());
int pageSize = serializedSize(ImmutableList.of(VARCHAR), page); int pageSize = serializedSize(ImmutableList.of(VARCHAR), page);
assertEquals(pageSize, 43); // page overhead assertEquals(pageSize, 44); // page overhead


// page with one value // page with one value
VARCHAR.writeString(builder, "alice"); VARCHAR.writeString(builder, "alice");
page = new Page(builder.build()); page = new Page(builder.build());
int firstValueSize = serializedSize(ImmutableList.of(VARCHAR), page) - pageSize; int firstValueSize = serializedSize(ImmutableList.of(VARCHAR), page) - pageSize;
assertEquals(firstValueSize, 4 + 5 + 1); // length + "alice" + null assertEquals(firstValueSize, 4 + 5); // length + "alice"


// page with two values // page with two values
VARCHAR.writeString(builder, "bob"); VARCHAR.writeString(builder, "bob");
Expand Down
Expand Up @@ -35,6 +35,7 @@
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Optional;


import static com.facebook.presto.orc.metadata.Stream.StreamKind.DATA; import static com.facebook.presto.orc.metadata.Stream.StreamKind.DATA;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.DICTIONARY_DATA; import static com.facebook.presto.orc.metadata.Stream.StreamKind.DICTIONARY_DATA;
Expand Down Expand Up @@ -79,7 +80,7 @@ public class SliceDictionaryStreamReader
@Nonnull @Nonnull
private int[] stripeDictionaryOffsetVector = EMPTY_DICTIONARY_OFFSETS; private int[] stripeDictionaryOffsetVector = EMPTY_DICTIONARY_OFFSETS;


private VariableWidthBlock dictionaryBlock = new VariableWidthBlock(1, Slices.wrappedBuffer(EMPTY_DICTIONARY_DATA), EMPTY_DICTIONARY_OFFSETS, new boolean[]{true}); private VariableWidthBlock dictionaryBlock = new VariableWidthBlock(1, Slices.wrappedBuffer(EMPTY_DICTIONARY_DATA), EMPTY_DICTIONARY_OFFSETS, Optional.of(new boolean[]{true}));
private byte[] currentDictionaryData = EMPTY_DICTIONARY_DATA; private byte[] currentDictionaryData = EMPTY_DICTIONARY_DATA;


@Nonnull @Nonnull
Expand Down Expand Up @@ -206,7 +207,7 @@ private void setDictionaryBlockData(byte[] dictionaryData, int[] dictionaryOffse
boolean[] isNullVector = new boolean[positionCount]; boolean[] isNullVector = new boolean[positionCount];
isNullVector[positionCount - 1] = true; isNullVector[positionCount - 1] = true;
dictionaryOffsets[positionCount] = dictionaryOffsets[positionCount - 1]; dictionaryOffsets[positionCount] = dictionaryOffsets[positionCount - 1];
dictionaryBlock = new VariableWidthBlock(positionCount, Slices.wrappedBuffer(dictionaryData), dictionaryOffsets, isNullVector); dictionaryBlock = new VariableWidthBlock(positionCount, Slices.wrappedBuffer(dictionaryData), dictionaryOffsets, Optional.of(isNullVector));
currentDictionaryData = dictionaryData; currentDictionaryData = dictionaryData;
} }
} }
Expand Down
Expand Up @@ -33,8 +33,8 @@
import javax.annotation.Nullable; import javax.annotation.Nullable;


import java.io.IOException; import java.io.IOException;
import java.util.Arrays;
import java.util.List; import java.util.List;
import java.util.Optional;


import static com.facebook.presto.orc.metadata.Stream.StreamKind.DATA; import static com.facebook.presto.orc.metadata.Stream.StreamKind.DATA;
import static com.facebook.presto.orc.metadata.Stream.StreamKind.LENGTH; import static com.facebook.presto.orc.metadata.Stream.StreamKind.LENGTH;
Expand Down Expand Up @@ -119,7 +119,7 @@ public Block readBlock(Type type)
} }


// create new isNullVector and offsetVector for VariableWidthBlock // create new isNullVector and offsetVector for VariableWidthBlock
boolean[] isNullVector = new boolean[nextBatchSize]; boolean[] isNullVector = null;
int[] offsetVector = new int[nextBatchSize + 1]; int[] offsetVector = new int[nextBatchSize + 1];


// lengthVector is reused across calls // lengthVector is reused across calls
Expand All @@ -131,22 +131,29 @@ public Block readBlock(Type type)
if (lengthStream == null) { if (lengthStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but length stream is not present"); throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but length stream is not present");
} }
Arrays.fill(isNullVector, false);
lengthStream.nextIntVector(nextBatchSize, lengthVector); lengthStream.nextIntVector(nextBatchSize, lengthVector);
} }
else { else {
isNullVector = new boolean[nextBatchSize];
int nullValues = presentStream.getUnsetBits(nextBatchSize, isNullVector); int nullValues = presentStream.getUnsetBits(nextBatchSize, isNullVector);
if (nullValues != nextBatchSize) { if (nullValues != nextBatchSize) {
if (lengthStream == null) { if (lengthStream == null) {
throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but length stream is not present"); throw new OrcCorruptionException(streamDescriptor.getOrcDataSourceId(), "Value is not null but length stream is not present");
} }
lengthStream.nextIntVector(nextBatchSize, lengthVector, isNullVector);
if (nullValues == 0) {
isNullVector = null;
lengthStream.nextIntVector(nextBatchSize, lengthVector);
}
else {
lengthStream.nextIntVector(nextBatchSize, lengthVector, isNullVector);
}
} }
} }


long totalLength = 0; long totalLength = 0;
for (int i = 0; i < nextBatchSize; i++) { for (int i = 0; i < nextBatchSize; i++) {
if (!isNullVector[i]) { if (isNullVector == null || !isNullVector[i]) {
totalLength += lengthVector[i]; totalLength += lengthVector[i];
} }
} }
Expand All @@ -155,7 +162,7 @@ public Block readBlock(Type type)
readOffset = 0; readOffset = 0;
nextBatchSize = 0; nextBatchSize = 0;
if (totalLength == 0) { if (totalLength == 0) {
return new VariableWidthBlock(currentBatchSize, EMPTY_SLICE, offsetVector, isNullVector); return new VariableWidthBlock(currentBatchSize, EMPTY_SLICE, offsetVector, Optional.ofNullable(isNullVector));
} }
if (totalLength > ONE_GIGABYTE) { if (totalLength > ONE_GIGABYTE) {
throw new PrestoException(GENERIC_INTERNAL_ERROR, throw new PrestoException(GENERIC_INTERNAL_ERROR,
Expand All @@ -172,7 +179,7 @@ public Block readBlock(Type type)
// truncate string and update offsets // truncate string and update offsets
offsetVector[0] = 0; offsetVector[0] = 0;
for (int i = 0; i < currentBatchSize; i++) { for (int i = 0; i < currentBatchSize; i++) {
if (isNullVector[i]) { if (isNullVector != null && isNullVector[i]) {
offsetVector[i + 1] = offsetVector[i]; offsetVector[i + 1] = offsetVector[i];
continue; continue;
} }
Expand All @@ -189,7 +196,7 @@ public Block readBlock(Type type)
} }


// this can lead to over-retention but unlikely to happen given truncation rarely happens // this can lead to over-retention but unlikely to happen given truncation rarely happens
return new VariableWidthBlock(currentBatchSize, slice, offsetVector, isNullVector); return new VariableWidthBlock(currentBatchSize, slice, offsetVector, Optional.ofNullable(isNullVector));
} }


private void openRowGroup() private void openRowGroup()
Expand Down
Expand Up @@ -20,6 +20,7 @@
import org.testng.annotations.Test; import org.testng.annotations.Test;


import java.util.HashSet; import java.util.HashSet;
import java.util.Optional;
import java.util.Set; import java.util.Set;


import static io.airlift.slice.Slices.wrappedBuffer; import static io.airlift.slice.Slices.wrappedBuffer;
Expand All @@ -44,7 +45,7 @@ private class TestHashCollisionBlock
{ {
public TestHashCollisionBlock(int positionCount, Slice slice, int[] offsets, boolean[] valueIsNull) public TestHashCollisionBlock(int positionCount, Slice slice, int[] offsets, boolean[] valueIsNull)
{ {
super(positionCount, slice, offsets, valueIsNull); super(positionCount, slice, offsets, Optional.of(valueIsNull));
} }


@Override @Override
Expand Down
Expand Up @@ -131,15 +131,15 @@ public void writePositionTo(int position, BlockBuilder blockBuilder)
public Block getSingleValueBlock(int position) public Block getSingleValueBlock(int position)
{ {
if (isNull(position)) { if (isNull(position)) {
return new VariableWidthBlock(1, EMPTY_SLICE, new int[] {0, 0}, new boolean[] {true}); return new VariableWidthBlock(0, 1, EMPTY_SLICE, new int[] {0, 0}, new boolean[] {true});
} }


int offset = getPositionOffset(position); int offset = getPositionOffset(position);
int entrySize = getSliceLength(position); int entrySize = getSliceLength(position);


Slice copy = Slices.copyOf(getRawSlice(position), offset, entrySize); Slice copy = Slices.copyOf(getRawSlice(position), offset, entrySize);


return new VariableWidthBlock(1, copy, new int[] {0, copy.length()}, new boolean[] {false}); return new VariableWidthBlock(0, 1, copy, new int[] {0, copy.length()}, null);
} }


@Override @Override
Expand Down
Expand Up @@ -18,6 +18,9 @@
import io.airlift.slice.Slices; import io.airlift.slice.Slices;
import org.openjdk.jol.info.ClassLayout; import org.openjdk.jol.info.ClassLayout;


import javax.annotation.Nullable;

import java.util.Optional;
import java.util.function.BiConsumer; import java.util.function.BiConsumer;


import static com.facebook.presto.spi.block.BlockUtil.checkArrayRange; import static com.facebook.presto.spi.block.BlockUtil.checkArrayRange;
Expand All @@ -36,14 +39,15 @@ public class VariableWidthBlock
private final int positionCount; private final int positionCount;
private final Slice slice; private final Slice slice;
private final int[] offsets; private final int[] offsets;
@Nullable
private final boolean[] valueIsNull; private final boolean[] valueIsNull;


private final long retainedSizeInBytes; private final long retainedSizeInBytes;
private final long sizeInBytes; private final long sizeInBytes;


public VariableWidthBlock(int positionCount, Slice slice, int[] offsets, boolean[] valueIsNull) public VariableWidthBlock(int positionCount, Slice slice, int[] offsets, Optional<boolean[]> valueIsNull)
{ {
this(0, positionCount, slice, offsets, valueIsNull); this(0, positionCount, slice, offsets, valueIsNull.orElse(null));
} }


VariableWidthBlock(int arrayOffset, int positionCount, Slice slice, int[] offsets, boolean[] valueIsNull) VariableWidthBlock(int arrayOffset, int positionCount, Slice slice, int[] offsets, boolean[] valueIsNull)
Expand All @@ -67,7 +71,7 @@ public VariableWidthBlock(int positionCount, Slice slice, int[] offsets, boolean
} }
this.offsets = offsets; this.offsets = offsets;


if (valueIsNull.length - arrayOffset < positionCount) { if (valueIsNull != null && valueIsNull.length - arrayOffset < positionCount) {
throw new IllegalArgumentException("valueIsNull length is less than positionCount"); throw new IllegalArgumentException("valueIsNull length is less than positionCount");
} }
this.valueIsNull = valueIsNull; this.valueIsNull = valueIsNull;
Expand All @@ -89,10 +93,16 @@ public int getSliceLength(int position)
return getPositionOffset(position + 1) - getPositionOffset(position); return getPositionOffset(position + 1) - getPositionOffset(position);
} }


@Override
public boolean mayHaveNull()
{
return valueIsNull != null;
}

@Override @Override
protected boolean isEntryNull(int position) protected boolean isEntryNull(int position)
{ {
return valueIsNull[position + arrayOffset]; return valueIsNull != null && valueIsNull[position + arrayOffset];
} }


@Override @Override
Expand Down Expand Up @@ -124,7 +134,9 @@ public void retainedBytesForEachPart(BiConsumer<Object, Long> consumer)
{ {
consumer.accept(slice, slice.getRetainedSize()); consumer.accept(slice, slice.getRetainedSize());
consumer.accept(offsets, sizeOf(offsets)); consumer.accept(offsets, sizeOf(offsets));
consumer.accept(valueIsNull, sizeOf(valueIsNull)); if (valueIsNull != null) {
consumer.accept(valueIsNull, sizeOf(valueIsNull));
}
consumer.accept(this, (long) INSTANCE_SIZE); consumer.accept(this, (long) INSTANCE_SIZE);
} }


Expand All @@ -139,19 +151,22 @@ public Block copyPositions(int[] positions, int offset, int length)
} }
SliceOutput newSlice = Slices.allocate(finalLength).getOutput(); SliceOutput newSlice = Slices.allocate(finalLength).getOutput();
int[] newOffsets = new int[length + 1]; int[] newOffsets = new int[length + 1];
boolean[] newValueIsNull = new boolean[length]; boolean[] newValueIsNull = null;
if (valueIsNull != null) {
newValueIsNull = new boolean[length];
}


for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
int position = positions[offset + i]; int position = positions[offset + i];
if (isEntryNull(position)) { if (!isEntryNull(position)) {
newValueIsNull[i] = true;
}
else {
newSlice.appendBytes(slice.getBytes(getPositionOffset(position), getSliceLength(position))); newSlice.appendBytes(slice.getBytes(getPositionOffset(position), getSliceLength(position)));
} }
else if (newValueIsNull != null) {
newValueIsNull[i] = true;
}
newOffsets[i + 1] = newSlice.size(); newOffsets[i + 1] = newSlice.size();
} }
return new VariableWidthBlock(length, newSlice.slice(), newOffsets, newValueIsNull); return new VariableWidthBlock(0, length, newSlice.slice(), newOffsets, newValueIsNull);
} }


@Override @Override
Expand All @@ -176,12 +191,12 @@ public Block copyRegion(int positionOffset, int length)


int[] newOffsets = compactOffsets(offsets, positionOffset, length); int[] newOffsets = compactOffsets(offsets, positionOffset, length);
Slice newSlice = compactSlice(slice, offsets[positionOffset], newOffsets[length]); Slice newSlice = compactSlice(slice, offsets[positionOffset], newOffsets[length]);
boolean[] newValueIsNull = compactArray(valueIsNull, positionOffset, length); boolean[] newValueIsNull = valueIsNull == null ? null : compactArray(valueIsNull, positionOffset, length);


if (newOffsets == offsets && newSlice == slice && newValueIsNull == valueIsNull) { if (newOffsets == offsets && newSlice == slice && newValueIsNull == valueIsNull) {
return this; return this;
} }
return new VariableWidthBlock(length, newSlice, newOffsets, newValueIsNull); return new VariableWidthBlock(0, length, newSlice, newOffsets, newValueIsNull);
} }


@Override @Override
Expand Down
Expand Up @@ -54,6 +54,7 @@ public class VariableWidthBlockBuilder


private SliceOutput sliceOutput = new DynamicSliceOutput(0); private SliceOutput sliceOutput = new DynamicSliceOutput(0);


private boolean hasNullValue;
// it is assumed that the offsets array is one position longer than the valueIsNull array // it is assumed that the offsets array is one position longer than the valueIsNull array
private boolean[] valueIsNull = new boolean[0]; private boolean[] valueIsNull = new boolean[0];
private int[] offsets = new int[1]; private int[] offsets = new int[1];
Expand Down Expand Up @@ -145,7 +146,10 @@ public Block copyPositions(int[] positions, int offset, int length)
} }
SliceOutput newSlice = Slices.allocate(finalLength).getOutput(); SliceOutput newSlice = Slices.allocate(finalLength).getOutput();
int[] newOffsets = new int[length + 1]; int[] newOffsets = new int[length + 1];
boolean[] newValueIsNull = new boolean[length]; boolean[] newValueIsNull = null;
if (hasNullValue) {
newValueIsNull = new boolean[length];
}


for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
int position = positions[offset + i]; int position = positions[offset + i];
Expand All @@ -157,7 +161,7 @@ public Block copyPositions(int[] positions, int offset, int length)
} }
newOffsets[i + 1] = newSlice.size(); newOffsets[i + 1] = newSlice.size();
} }
return new VariableWidthBlock(length, newSlice.slice(), newOffsets, newValueIsNull); return new VariableWidthBlock(0, length, newSlice.slice(), newOffsets, newValueIsNull);
} }


@Override @Override
Expand Down Expand Up @@ -230,6 +234,7 @@ public BlockBuilder appendNull()
throw new IllegalStateException("Current entry must be closed before a null can be written"); throw new IllegalStateException("Current entry must be closed before a null can be written");
} }


hasNullValue = true;
entryAdded(0, true); entryAdded(0, true);
return this; return this;
} }
Expand Down Expand Up @@ -278,6 +283,12 @@ private void updateArraysDataSize()
arraysRetainedSizeInBytes = sizeOf(valueIsNull) + sizeOf(offsets); arraysRetainedSizeInBytes = sizeOf(valueIsNull) + sizeOf(offsets);
} }


@Override
public boolean mayHaveNull()
{
return hasNullValue;
}

@Override @Override
protected boolean isEntryNull(int position) protected boolean isEntryNull(int position)
{ {
Expand All @@ -290,7 +301,7 @@ public Block getRegion(int positionOffset, int length)
int positionCount = getPositionCount(); int positionCount = getPositionCount();
checkValidRegion(positionCount, positionOffset, length); checkValidRegion(positionCount, positionOffset, length);


return new VariableWidthBlock(positionOffset, length, sliceOutput.slice(), offsets, valueIsNull); return new VariableWidthBlock(positionOffset, length, sliceOutput.slice(), offsets, hasNullValue ? valueIsNull : null);
} }


@Override @Override
Expand All @@ -300,10 +311,13 @@ public Block copyRegion(int positionOffset, int length)
checkValidRegion(positionCount, positionOffset, length); checkValidRegion(positionCount, positionOffset, length);


int[] newOffsets = compactOffsets(offsets, positionOffset, length); int[] newOffsets = compactOffsets(offsets, positionOffset, length);
boolean[] newValueIsNull = compactArray(valueIsNull, positionOffset, length); boolean[] newValueIsNull = null;
if (hasNullValue) {
newValueIsNull = compactArray(valueIsNull, positionOffset, length);
}
Slice slice = compactSlice(sliceOutput.getUnderlyingSlice(), offsets[positionOffset], newOffsets[length]); Slice slice = compactSlice(sliceOutput.getUnderlyingSlice(), offsets[positionOffset], newOffsets[length]);


return new VariableWidthBlock(length, slice, newOffsets, newValueIsNull); return new VariableWidthBlock(0, length, slice, newOffsets, newValueIsNull);
} }


@Override @Override
Expand All @@ -312,7 +326,7 @@ public Block build()
if (currentEntrySize > 0) { if (currentEntrySize > 0) {
throw new IllegalStateException("Current entry must be closed before the block can be built"); throw new IllegalStateException("Current entry must be closed before the block can be built");
} }
return new VariableWidthBlock(positions, sliceOutput.slice(), offsets, valueIsNull); return new VariableWidthBlock(0, positions, sliceOutput.slice(), offsets, hasNullValue ? valueIsNull : null);
} }


@Override @Override
Expand Down
Expand Up @@ -65,11 +65,11 @@ public Block readBlock(BlockEncodingSerde blockEncodingSerde, SliceInput sliceIn
int[] offsets = new int[positionCount + 1]; int[] offsets = new int[positionCount + 1];
sliceInput.readBytes(Slices.wrappedIntArray(offsets), SIZE_OF_INT, positionCount * SIZE_OF_INT); sliceInput.readBytes(Slices.wrappedIntArray(offsets), SIZE_OF_INT, positionCount * SIZE_OF_INT);


boolean[] valueIsNull = decodeNullBits(sliceInput, positionCount).orElseGet(() -> new boolean[positionCount]); boolean[] valueIsNull = decodeNullBits(sliceInput, positionCount).orElse(null);


int blockSize = sliceInput.readInt(); int blockSize = sliceInput.readInt();
Slice slice = sliceInput.readSlice(blockSize); Slice slice = sliceInput.readSlice(blockSize);


return new VariableWidthBlock(positionCount, slice, offsets, valueIsNull); return new VariableWidthBlock(0, positionCount, slice, offsets, valueIsNull);
} }
} }
Expand Up @@ -19,6 +19,7 @@
import it.unimi.dsi.fastutil.objects.Object2LongOpenCustomHashMap; import it.unimi.dsi.fastutil.objects.Object2LongOpenCustomHashMap;
import org.testng.annotations.Test; import org.testng.annotations.Test;


import java.util.Optional;
import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BiConsumer; import java.util.function.BiConsumer;


Expand Down Expand Up @@ -186,6 +187,6 @@ private static Block createVariableWidthBlock(int entries)
dynamicSliceOutput.writeByte(i); dynamicSliceOutput.writeByte(i);
offsets[i + 1] = dynamicSliceOutput.size(); offsets[i + 1] = dynamicSliceOutput.size();
} }
return new VariableWidthBlock(entries, dynamicSliceOutput.slice(), offsets, new boolean[entries]); return new VariableWidthBlock(entries, dynamicSliceOutput.slice(), offsets, Optional.empty());
} }
} }
Expand Up @@ -24,6 +24,7 @@


import java.util.Arrays; import java.util.Arrays;
import java.util.Objects; import java.util.Objects;
import java.util.Optional;


import static com.facebook.presto.connector.thrift.api.datatypes.PrestoThriftTypeUtils.calculateOffsets; import static com.facebook.presto.connector.thrift.api.datatypes.PrestoThriftTypeUtils.calculateOffsets;
import static com.facebook.presto.connector.thrift.api.datatypes.PrestoThriftTypeUtils.sameSizeIfPresent; import static com.facebook.presto.connector.thrift.api.datatypes.PrestoThriftTypeUtils.sameSizeIfPresent;
Expand Down Expand Up @@ -76,7 +77,7 @@ public Block toBlock(Type desiredType)
numberOfRecords, numberOfRecords,
values, values,
calculateOffsets(sizes, nulls, numberOfRecords), calculateOffsets(sizes, nulls, numberOfRecords),
nulls == null ? new boolean[numberOfRecords] : nulls); Optional.ofNullable(nulls));
} }


@Override @Override
Expand Down

0 comments on commit 89673b0

Please sign in to comment.