Skip to content

Commit

Permalink
Fix bug in OrcOutputBuffer leading to severe undercompression of larg…
Browse files Browse the repository at this point in the history
…e blobs
  • Loading branch information
sdruzkin authored and ARUNACHALAM THIRUPATHI committed Feb 28, 2022
1 parent ae9f559 commit dd673a9
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 2 deletions.
Expand Up @@ -445,7 +445,7 @@ private void writeChunkToOutputStream(byte[] chunk, int offset, int length)
return;
}

checkArgument(length <= buffer.length, "Write chunk length must be less than compression buffer size");
checkArgument(length <= maxBufferSize, "Write chunk length must be less than max compression buffer size");

boolean isCompressed = false;
byte[] compressionBuffer = null;
Expand Down Expand Up @@ -497,7 +497,7 @@ private void writeDirectlyToOutputStream(byte[] bytes, int bytesOffset, int leng
}

while (length > 0) {
int chunkSize = Integer.min(length, buffer.length);
int chunkSize = Integer.min(length, maxBufferSize);
writeChunkToOutputStream(bytes, bytesOffset, chunkSize);
length -= chunkSize;
bytesOffset += chunkSize;
Expand Down
Expand Up @@ -20,10 +20,13 @@

import java.util.Arrays;
import java.util.Optional;
import java.util.OptionalInt;

import static io.airlift.slice.Slices.wrappedBuffer;
import static io.airlift.units.DataSize.Unit.BYTE;
import static io.airlift.units.DataSize.Unit.KILOBYTE;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;

public class TestOrcOutputBuffer
{
Expand All @@ -48,6 +51,30 @@ public void testWriteHugeByteChucks()
assertEquals(output.slice(), wrappedBuffer(largeByteArray, 100, size - 100));
}

@Test
public void testWriteHugeByteChucksUsesMaxCompressionBufferSizeChunks()
{
int size = 1024 * 1024;
byte[] largeByteArray = new byte[size];
Arrays.fill(largeByteArray, (byte) 0xA);
ColumnWriterOptions columnWriterOptions = ColumnWriterOptions.builder()
.setCompressionKind(CompressionKind.ZSTD)
.setCompressionLevel(OptionalInt.of(7))
.setCompressionMaxBufferSize(new DataSize(256, KILOBYTE))
.build();
OrcOutputBuffer sliceOutput = new OrcOutputBuffer(columnWriterOptions, Optional.empty());

// Before the fix the compressed result would be around 90KB, after the fix it went down to 117 bytes.
DynamicSliceOutput output = new DynamicSliceOutput(size);
sliceOutput.writeBytes(largeByteArray, 10, size - 10);
assertTrue(sliceOutput.writeDataTo(output) < 200);

sliceOutput.reset();
output.reset();
sliceOutput.writeBytes(wrappedBuffer(largeByteArray), 100, size - 100);
assertTrue(sliceOutput.writeDataTo(output) < 200);
}

@Test
public void testGrowCapacity()
{
Expand Down

0 comments on commit dd673a9

Please sign in to comment.