Use 8byte offsets in chunk based raw index creator (apache#5285)

* Use 8byte offsets in chunk based raw index creator * cleanup * fixed tests * Fix tests and address review comments * Use 8-byte offset for fixed-byte chunk writer. Add backward compatibility test Co-authored-by: Siddharth Teotia <steotia@steotia-mn1.linkedin.biz>
siddharthteotia · May 29, 2020 · 3751f11 · 3751f11
1 parent 7798182
commit 3751f11
Show file tree

Hide file tree

Showing 13 changed files with 127 additions and 46 deletions.
diff --git a/...ore/src/main/java/org/apache/pinot/core/io/reader/impl/v1/BaseChunkSingleValueReader.java b/...ore/src/main/java/org/apache/pinot/core/io/reader/impl/v1/BaseChunkSingleValueReader.java
@@ -18,12 +18,15 @@
  */
 package org.apache.pinot.core.io.reader.impl.v1;
 
+import com.google.common.base.Preconditions;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import org.apache.pinot.core.io.compression.ChunkCompressorFactory;
 import org.apache.pinot.core.io.compression.ChunkDecompressor;
 import org.apache.pinot.core.io.reader.BaseSingleColumnSingleValueReader;
 import org.apache.pinot.core.io.reader.impl.ChunkReaderContext;
+import org.apache.pinot.core.io.writer.impl.v1.BaseChunkSingleValueWriter;
+import org.apache.pinot.core.io.writer.impl.v1.VarByteChunkSingleValueWriter;
 import org.apache.pinot.core.segment.memory.PinotDataBuffer;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -47,6 +50,8 @@ public abstract class BaseChunkSingleValueReader extends BaseSingleColumnSingleV
   protected final int _numDocsPerChunk;
   protected final int _numChunks;
   protected final int _lengthOfLongestEntry;
+  private final int _version;
+  private final int _headerEntryChunkOffsetSize;
 
   /**
    * Constructor for the class.
@@ -57,7 +62,7 @@ public BaseChunkSingleValueReader(PinotDataBuffer pinotDataBuffer) {
     _dataBuffer = pinotDataBuffer;
 
     int headerOffset = 0;
-    int version = _dataBuffer.getInt(headerOffset);
+    _version = _dataBuffer.getInt(headerOffset);
     headerOffset += Integer.BYTES;
 
     _numChunks = _dataBuffer.getInt(headerOffset);
@@ -70,7 +75,7 @@ public BaseChunkSingleValueReader(PinotDataBuffer pinotDataBuffer) {
     headerOffset += Integer.BYTES;
 
     int dataHeaderStart = headerOffset;
-    if (version > 1) {
+    if (_version > 1) {
       _dataBuffer.getInt(headerOffset); // Total docs
       headerOffset += Integer.BYTES;
 
@@ -87,9 +92,10 @@ public BaseChunkSingleValueReader(PinotDataBuffer pinotDataBuffer) {
     }
 
     _chunkSize = (_lengthOfLongestEntry * _numDocsPerChunk);
+    _headerEntryChunkOffsetSize = BaseChunkSingleValueWriter.getHeaderEntryChunkOffsetSize(_version);
 
     // Slice out the header from the data buffer.
-    int dataHeaderLength = _numChunks * Integer.BYTES;
+    int dataHeaderLength = _numChunks * _headerEntryChunkOffsetSize;
     int rawDataStart = dataHeaderStart + dataHeaderLength;
     _dataHeader = _dataBuffer.view(dataHeaderStart, rawDataStart);
 
@@ -120,14 +126,14 @@ protected ByteBuffer getChunkForRow(int row, ChunkReaderContext context) {
     }
 
     int chunkSize;
-    int chunkPosition = getChunkPosition(chunkId);
+    long chunkPosition = getChunkPosition(chunkId);
 
     // Size of chunk can be determined using next chunks offset, or end of data buffer for last chunk.
     if (chunkId == (_numChunks - 1)) { // Last chunk.
       chunkSize = (int) (_dataBuffer.size() - chunkPosition);
     } else {
-      int nextChunkOffset = getChunkPosition(chunkId + 1);
-      chunkSize = nextChunkOffset - chunkPosition;
+      long nextChunkOffset = getChunkPosition(chunkId + 1);
+      chunkSize = (int)(nextChunkOffset - chunkPosition);
     }
 
     ByteBuffer decompressedBuffer = context.getChunkBuffer();
@@ -145,12 +151,15 @@ protected ByteBuffer getChunkForRow(int row, ChunkReaderContext context) {
 
   /**
    * Helper method to get the offset of the chunk in the data.
-   *
    * @param chunkId Id of the chunk for which to return the position.
    * @return Position (offset) of the chunk in the data.
    */
-  protected int getChunkPosition(int chunkId) {
-    return _dataHeader.getInt(chunkId * Integer.BYTES);
+  protected long getChunkPosition(int chunkId) {
+    if (_headerEntryChunkOffsetSize == Integer.BYTES) {
+      return _dataHeader.getInt(chunkId * _headerEntryChunkOffsetSize);
+    } else {
+      return _dataHeader.getLong(chunkId * _headerEntryChunkOffsetSize);
+    }
   }
 
   /**

diff --git a/.../src/main/java/org/apache/pinot/core/io/reader/impl/v1/VarByteChunkSingleValueReader.java b/.../src/main/java/org/apache/pinot/core/io/reader/impl/v1/VarByteChunkSingleValueReader.java
@@ -55,7 +55,7 @@ public String getString(int row, ChunkReaderContext context) {
     int chunkRowId = row % _numDocsPerChunk;
     ByteBuffer chunkBuffer = getChunkForRow(row, context);
 
-    int rowOffset = chunkBuffer.getInt(chunkRowId * Integer.BYTES);
+    int rowOffset = chunkBuffer.getInt(chunkRowId * VarByteChunkSingleValueWriter.CHUNK_HEADER_ENTRY_ROW_OFFSET_SIZE);
     int nextRowOffset = getNextRowOffset(chunkRowId, chunkBuffer);
 
     int length = nextRowOffset - rowOffset;
@@ -77,7 +77,7 @@ public byte[] getBytes(int row, ChunkReaderContext context) {
     int chunkRowId = row % _numDocsPerChunk;
     ByteBuffer chunkBuffer = getChunkForRow(row, context);
 
-    int rowOffset = chunkBuffer.getInt(chunkRowId * Integer.BYTES);
+    int rowOffset = chunkBuffer.getInt(chunkRowId * VarByteChunkSingleValueWriter.CHUNK_HEADER_ENTRY_ROW_OFFSET_SIZE);
     int nextRowOffset = getNextRowOffset(chunkRowId, chunkBuffer);
 
     int length = nextRowOffset - rowOffset;
@@ -109,7 +109,7 @@ private int getNextRowOffset(int currentRowId, ByteBuffer chunkBuffer) {
       // Last row in this trunk.
       nextRowOffset = chunkBuffer.limit();
     } else {
-      nextRowOffset = chunkBuffer.getInt((currentRowId + 1) * Integer.BYTES);
+      nextRowOffset = chunkBuffer.getInt((currentRowId + 1) * VarByteChunkSingleValueWriter.CHUNK_HEADER_ENTRY_ROW_OFFSET_SIZE);
       // For incomplete chunks, the next string's offset will be 0 as row offset for absent rows are 0.
       if (nextRowOffset == 0) {
         nextRowOffset = chunkBuffer.limit();

diff --git a/...ore/src/main/java/org/apache/pinot/core/io/writer/impl/v1/BaseChunkSingleValueWriter.java b/...ore/src/main/java/org/apache/pinot/core/io/writer/impl/v1/BaseChunkSingleValueWriter.java
@@ -18,6 +18,7 @@
  */
 package org.apache.pinot.core.io.writer.impl.v1;
 
+import com.google.common.base.Preconditions;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
@@ -37,6 +38,8 @@
  */
 public abstract class BaseChunkSingleValueWriter implements SingleColumnSingleValueWriter {
   private static final Logger LOGGER = LoggerFactory.getLogger(BaseChunkSingleValueWriter.class);
+  private static final int FILE_HEADER_ENTRY_CHUNK_OFFSET_SIZE_V1V2 = Integer.BYTES;
+  private static final int FILE_HEADER_ENTRY_CHUNK_OFFSET_SIZE_V3 = Long.BYTES;
 
   protected final FileChannel _dataFile;
   protected ByteBuffer _header;
@@ -45,7 +48,9 @@ public abstract class BaseChunkSingleValueWriter implements SingleColumnSingleVa
   protected final ChunkCompressor _chunkCompressor;
 
   protected int _chunkSize;
-  protected int _dataOffset;
+  protected long _dataOffset;
+
+  private final int _headerEntryChunkOffsetSize;
 
   /**
    * Constructor for the class.
@@ -64,13 +69,25 @@ protected BaseChunkSingleValueWriter(File file, ChunkCompressorFactory.Compressi
       throws FileNotFoundException {
     _chunkSize = chunkSize;
     _chunkCompressor = ChunkCompressorFactory.getCompressor(compressionType);
-
+    _headerEntryChunkOffsetSize = getHeaderEntryChunkOffsetSize(version);
     _dataOffset = writeHeader(compressionType, totalDocs, numDocsPerChunk, sizeOfEntry, version);
     _chunkBuffer = ByteBuffer.allocateDirect(chunkSize);
     _compressedBuffer = ByteBuffer.allocateDirect(chunkSize * 2);
     _dataFile = new RandomAccessFile(file, "rw").getChannel();
   }
 
+  public static int getHeaderEntryChunkOffsetSize(int version) {
+    switch (version) {
+      case 1:
+      case 2:
+        return FILE_HEADER_ENTRY_CHUNK_OFFSET_SIZE_V1V2;
+      case 3:
+        return FILE_HEADER_ENTRY_CHUNK_OFFSET_SIZE_V3;
+      default:
+        throw new IllegalStateException("Invalid version: " + version);
+    }
+  }
+
   @Override
   public void setChar(int row, char ch) {
     throw new UnsupportedOperationException();
@@ -139,7 +156,7 @@ public void close()
   private int writeHeader(ChunkCompressorFactory.CompressionType compressionType, int totalDocs, int numDocsPerChunk,
       int sizeOfEntry, int version) {
     int numChunks = (totalDocs + numDocsPerChunk - 1) / numDocsPerChunk;
-    int headerSize = (numChunks + 7) * Integer.BYTES; // 7 items written before chunk indexing.
+    int headerSize = (7 * Integer.BYTES) + (numChunks * _headerEntryChunkOffsetSize);
 
     _header = ByteBuffer.allocateDirect(headerSize);
 
@@ -196,7 +213,12 @@ protected void writeChunk() {
       throw new RuntimeException(e);
     }
 
-    _header.putInt(_dataOffset);
+    if (_headerEntryChunkOffsetSize == Integer.BYTES) {
+      _header.putInt((int)_dataOffset);
+    } else if (_headerEntryChunkOffsetSize == Long.BYTES) {
+      _header.putLong(_dataOffset);
+    }
+
     _dataOffset += sizeToWrite;
 
     _chunkBuffer.clear();

diff --git a/...rc/main/java/org/apache/pinot/core/io/writer/impl/v1/FixedByteChunkSingleValueWriter.java b/...rc/main/java/org/apache/pinot/core/io/writer/impl/v1/FixedByteChunkSingleValueWriter.java
@@ -40,7 +40,8 @@
  *   <li> Integer: Total number of docs (version 2 onwards). </li>
  *   <li> Integer: Compression type enum value (version 2 onwards). </li>
  *   <li> Integer: Start offset of data header (version 2 onwards). </li>
- *   <li> Integer array: Integer offsets for all chunks in the data .</li>
+ *   <li> Integer array: Integer offsets for all chunks in the data (upto version 2),
+ *   Long array: Long offsets for all chunks in the data (version 3 onwards) </li>
  * </ul>
  *
  * <p> Individual Chunks: </p>
@@ -53,7 +54,7 @@
 @NotThreadSafe
 public class FixedByteChunkSingleValueWriter extends BaseChunkSingleValueWriter {
 
-  private static final int CURRENT_VERSION = 2;
+  private static final int CURRENT_VERSION = 3;
   private int _chunkDataOffset;
 
   /**

diff --git a/.../src/main/java/org/apache/pinot/core/io/writer/impl/v1/VarByteChunkSingleValueWriter.java b/.../src/main/java/org/apache/pinot/core/io/writer/impl/v1/VarByteChunkSingleValueWriter.java
@@ -36,7 +36,11 @@
  *   <li> Integer: Total number of chunks. </li>
  *   <li> Integer: Number of docs per chunk. </li>
  *   <li> Integer: Length of longest entry (in bytes). </li>
- *   <li> Integer array: Integer offsets for all chunks in the data .</li>
+ *   <li> Integer: Total number of docs (version 2 onwards). </li>
+ *   <li> Integer: Compression type enum value (version 2 onwards). </li>
+ *   <li> Integer: Start offset of data header (version 2 onwards). </li>
+ *   <li> Integer array: Integer offsets for all chunks in the data (upto version 2),
+ *   Long array: Long offsets for all chunks in the data (version 3 onwards) </li>
  * </ul>
  *
  * <p> Individual Chunks: </p>
@@ -49,7 +53,7 @@
  */
 @NotThreadSafe
 public class VarByteChunkSingleValueWriter extends BaseChunkSingleValueWriter {
-  private static final int CURRENT_VERSION = 2;
+  private static final int CURRENT_VERSION = 3;
   public static final int CHUNK_HEADER_ENTRY_ROW_OFFSET_SIZE = Integer.BYTES;
 
   private final int _chunkHeaderSize;

diff --git a/pinot-core/src/main/java/org/apache/pinot/core/segment/memory/PinotNativeOrderLBuffer.java b/pinot-core/src/main/java/org/apache/pinot/core/segment/memory/PinotNativeOrderLBuffer.java
@@ -43,7 +43,7 @@ static PinotNativeOrderLBuffer loadFile(File file, long offset, long size)
     return buffer;
   }
 
-  static PinotNativeOrderLBuffer mapFile(File file, boolean readOnly, long offset, long size)
+  public static PinotNativeOrderLBuffer mapFile(File file, boolean readOnly, long offset, long size)
       throws IOException {
     if (readOnly) {
       return new PinotNativeOrderLBuffer(new MMapBuffer(file, offset, size, MMapMode.READ_ONLY), true, false);

diff --git a/...t-core/src/main/java/org/apache/pinot/core/segment/memory/PinotNonNativeOrderLBuffer.java b/...t-core/src/main/java/org/apache/pinot/core/segment/memory/PinotNonNativeOrderLBuffer.java
@@ -43,7 +43,7 @@ static PinotNonNativeOrderLBuffer loadFile(File file, long offset, long size)
     return buffer;
   }
 
-  static PinotNonNativeOrderLBuffer mapFile(File file, boolean readOnly, long offset, long size)
+  public static PinotNonNativeOrderLBuffer mapFile(File file, boolean readOnly, long offset, long size)
       throws IOException {
     if (readOnly) {
       return new PinotNonNativeOrderLBuffer(new MMapBuffer(file, offset, size, MMapMode.READ_ONLY), true, false);

diff --git a/...st/java/org/apache/pinot/index/readerwriter/FixedByteChunkSingleValueReaderWriteTest.java b/...st/java/org/apache/pinot/index/readerwriter/FixedByteChunkSingleValueReaderWriteTest.java
@@ -260,25 +260,32 @@ public void testBytes(ChunkCompressorFactory.CompressionType compressionType)
    * @throws IOException
    */
   @Test
-  public void testBackwardCompatibility()
-      throws IOException {
-    // Get v1 from resources folder
+  public void testBackwardCompatibilityV1()
+      throws Exception {
+    testBackwardCompatibilityHelper("data/fixedByteSVRDoubles.v1", 10009, 0);
+  }
+
+  @Test
+  public void testBackwardCompatibilityV2()
+      throws Exception {
+    testBackwardCompatibilityHelper("data/fixedByteCompressed.v2", 2000, 100.2356);
+    testBackwardCompatibilityHelper("data/fixedByteRaw.v2", 2000, 100.2356);
+  }
+
+  private void testBackwardCompatibilityHelper(String fileName, int numDocs, double startValue)
+      throws Exception {
     ClassLoader classLoader = getClass().getClassLoader();
-    String fileName = "data/fixedByteSVRDoubles.v1";
     URL resource = classLoader.getResource(fileName);
     if (resource == null) {
       throw new RuntimeException("Input file not found: " + fileName);
     }
-
     File file = new File(resource.getFile());
     try (FixedByteChunkSingleValueReader reader = new FixedByteChunkSingleValueReader(
         PinotDataBuffer.mapReadOnlyBigEndianFile(file))) {
       ChunkReaderContext context = reader.createContext();
-
-      int numEntries = 10009; // Number of entries in the input file.
-      for (int i = 0; i < numEntries; i++) {
+      for (int i = 0; i < numDocs; i++) {
         double actual = reader.getDouble(i, context);
-        Assert.assertEquals(actual, (double) i);
+        Assert.assertEquals(actual, i + startValue);
       }
     }
   }