[CARBONDATA-2099] Refactor query scan process to improve readability

Unified concepts in scan process flow: 1.QueryModel contains all parameter for scan, it is created by API in CarbonTable. (In future, CarbonTable will be the entry point for various table operations) 2.Use term ColumnChunk to represent one column in one blocklet, and use ChunkIndex in reader to read specified column chunk 3.Use term ColumnPage to represent one page in one ColumnChunk 4.QueryColumn => ProjectionColumn, indicating it is for projection This closes apache#1874
ravipesala · Mar 8, 2018 · bceb121 · bceb121
1 parent 47872e8
commit bceb121
Show file tree

Hide file tree

Showing 191 changed files with 2,862 additions and 4,418 deletions.
diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/AbstractDictionaryCache.java
@@ -260,8 +260,7 @@ private void loadDictionaryData(DictionaryInfo dictionaryInfo,
     DictionaryCacheLoader dictionaryCacheLoader =
         new DictionaryCacheLoaderImpl(dictionaryColumnUniqueIdentifier);
     dictionaryCacheLoader
-        .load(dictionaryInfo, dictionaryColumnUniqueIdentifier.getColumnIdentifier(),
-            dictionaryChunkStartOffset, dictionaryChunkEndOffset, loadSortIndex);
+        .load(dictionaryInfo, dictionaryChunkStartOffset, dictionaryChunkEndOffset, loadSortIndex);
   }
 
   /**

diff --git a/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryCacheLoader.java b/core/src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryCacheLoader.java
@@ -19,16 +19,13 @@
 
 import java.io.IOException;
 
-import org.apache.carbondata.core.metadata.ColumnIdentifier;
-
 public interface DictionaryCacheLoader {
 
   /**
    * This method will load the dictionary data for a given columnIdentifier
    *
    * @param dictionaryInfo             dictionary info object which will hold the required data
    *                                   for a given column
-   * @param columnIdentifier           column unique identifier
    * @param dictionaryChunkStartOffset start offset from where dictionary file has to
    *                                   be read
    * @param dictionaryChunkEndOffset   end offset till where dictionary file has to
@@ -37,7 +34,7 @@ public interface DictionaryCacheLoader {
    *                                   read in memory after dictionary loading
    * @throws IOException
    */
-  void load(DictionaryInfo dictionaryInfo, ColumnIdentifier columnIdentifier,
-      long dictionaryChunkStartOffset, long dictionaryChunkEndOffset, boolean loadSortIndex)
+  void load(DictionaryInfo dictionaryInfo, long dictionaryChunkStartOffset,
+      long dictionaryChunkEndOffset, boolean loadSortIndex)
       throws IOException;
 }
diff --git a/.../src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryCacheLoaderImpl.java b/.../src/main/java/org/apache/carbondata/core/cache/dictionary/DictionaryCacheLoaderImpl.java
@@ -23,7 +23,6 @@
 import java.util.List;
 
 import org.apache.carbondata.core.constants.CarbonCommonConstants;
-import org.apache.carbondata.core.metadata.ColumnIdentifier;
 import org.apache.carbondata.core.reader.CarbonDictionaryReader;
 import org.apache.carbondata.core.reader.sortindex.CarbonDictionarySortIndexReader;
 import org.apache.carbondata.core.service.CarbonCommonFactory;
@@ -43,8 +42,7 @@ public class DictionaryCacheLoaderImpl implements DictionaryCacheLoader {
   /**
    * @param dictionaryColumnUniqueIdentifier dictionary column identifier
    */
-  public DictionaryCacheLoaderImpl(
-      DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier) {
+  DictionaryCacheLoaderImpl(DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier) {
     this.dictionaryColumnUniqueIdentifier = dictionaryColumnUniqueIdentifier;
   }
 
@@ -53,7 +51,6 @@ public DictionaryCacheLoaderImpl(
    *
    * @param dictionaryInfo             dictionary info object which will hold the required data
    *                                   for a given column
-   * @param columnIdentifier           column unique identifier
    * @param dictionaryChunkStartOffset start offset from where dictionary file has to
    *                                   be read
    * @param dictionaryChunkEndOffset   end offset till where dictionary file has to
@@ -62,9 +59,9 @@ public DictionaryCacheLoaderImpl(
    *                                   read in memory after dictionary loading
    * @throws IOException
    */
-  @Override public void load(DictionaryInfo dictionaryInfo, ColumnIdentifier columnIdentifier,
-      long dictionaryChunkStartOffset, long dictionaryChunkEndOffset, boolean loadSortIndex)
-      throws IOException {
+  @Override
+  public void load(DictionaryInfo dictionaryInfo, long dictionaryChunkStartOffset,
+      long dictionaryChunkEndOffset, boolean loadSortIndex) throws IOException {
     Iterator<byte[]> columnDictionaryChunkWrapper =
         load(dictionaryColumnUniqueIdentifier, dictionaryChunkStartOffset,
             dictionaryChunkEndOffset);

diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/BTreeBuilderInfo.java b/core/src/main/java/org/apache/carbondata/core/datastore/BTreeBuilderInfo.java
@@ -43,16 +43,10 @@ public BTreeBuilderInfo(List<DataFileFooter> footerList,
     this.footerList = footerList;
   }
 
-  /**
-   * @return the eachDimensionBlockSize
-   */
   public int[] getDimensionColumnValueSize() {
     return dimensionColumnValueSize;
   }
 
-  /**
-   * @return the footerList
-   */
   public List<DataFileFooter> getFooterList() {
     return footerList;
   }

diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/DataRefNode.java b/core/src/main/java/org/apache/carbondata/core/datastore/DataRefNode.java
@@ -18,7 +18,6 @@
 
 import java.io.IOException;
 
-import org.apache.carbondata.core.cache.update.BlockletLevelDeleteDeltaDataCache;
 import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
 import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;
 
@@ -28,62 +27,63 @@
 public interface DataRefNode {
 
   /**
-   * Method to get the next block this can be used while scanning when
+   * Return the next data block in the tree, this can be used while scanning when
    * iterator of this class can be used iterate over blocks
-   *
-   * @return next block
    */
   DataRefNode getNextDataRefNode();
 
   /**
-   * to get the number of keys tuples present in the block
-   *
-   * @return number of keys in the block
+   * Return the number of rows in the data block
    */
-  int nodeSize();
+  int numRows();
 
   /**
-   * Method can be used to get the block index .This can be used when multiple
-   * thread can be used scan group of blocks in that can we can assign the
+   * Return the block index. This can be used when multiple
+   * thread can be used scan group of blocks in that can we can assign
    * some of the blocks to one thread and some to other
-   *
-   * @return block number
    */
-  long nodeNumber();
+  long nodeIndex();
+
+  /**
+   * Return the blocklet index in the node
+   */
+  short blockletIndex();
 
   /**
-   * Method is used for retreiving the BlockletId.
-   * @return the blockletid related to the data block.
+   * Return the number of pages
    */
-  String blockletId();
+  int numberOfPages();
 
   /**
-   * This method will be used to get the max value of all the columns this can
+   * Return the number of rows for a give page
+   */
+  int getPageRowCount(int pageNumber);
+
+  /**
+   * Return the max value of all the columns, this can
    * be used in case of filter query
-   *
    */
   byte[][] getColumnsMaxValue();
 
   /**
-   * This method will be used to get the min value of all the columns this can
+   * Return the min value of all the columns, this can
    * be used in case of filter query
-   *
    */
   byte[][] getColumnsMinValue();
 
   /**
    * Below method will be used to get the dimension chunks
    *
    * @param fileReader   file reader to read the chunks from file
-   * @param blockIndexes range indexes of the blocks need to be read
+   * @param columnIndexRange range indexes of the blocks need to be read
    *                     value can be {{0,10},{11,12},{13,13}}
    *                     here 0 to 10 and 11 to 12 column blocks will be read in one
    *                     IO operation 13th column block will be read separately
    *                     This will be helpful to reduce IO by reading bigger chunk of
-   *                     data in On IO
+   *                     data in one IO operation
    * @return dimension data chunks
    */
-  DimensionRawColumnChunk[] getDimensionChunks(FileHolder fileReader, int[][] blockIndexes)
+  DimensionRawColumnChunk[] readDimensionChunks(FileReader fileReader, int[][] columnIndexRange)
       throws IOException;
 
   /**
@@ -92,54 +92,31 @@ DimensionRawColumnChunk[] getDimensionChunks(FileHolder fileReader, int[][] bloc
    * @param fileReader file reader to read the chunk from file
    * @return dimension data chunk
    */
-  DimensionRawColumnChunk getDimensionChunk(FileHolder fileReader, int blockIndexes)
+  DimensionRawColumnChunk readDimensionChunk(FileReader fileReader, int columnIndex)
       throws IOException;
 
   /**
    * Below method will be used to get the measure chunk
    *
    * @param fileReader   file reader to read the chunk from file
-   * @param blockIndexes range indexes of the blocks need to be read
+   * @param columnIndexRange range indexes of the blocks need to be read
    *                     value can be {{0,10},{11,12},{13,13}}
    *                     here 0 to 10 and 11 to 12 column blocks will be read in one
    *                     IO operation 13th column block will be read separately
    *                     This will be helpful to reduce IO by reading bigger chunk of
-   *                     data in On IO
+   *                     data in one IO operation
    * @return measure column data chunk
    */
-  MeasureRawColumnChunk[] getMeasureChunks(FileHolder fileReader, int[][] blockIndexes)
+  MeasureRawColumnChunk[] readMeasureChunks(FileReader fileReader, int[][] columnIndexRange)
       throws IOException;
 
   /**
    * Below method will be used to read the measure chunk
    *
    * @param fileReader file read to read the file chunk
-   * @param blockIndex block index to be read from file
+   * @param columnIndex block index to be read from file
    * @return measure data chunk
    */
-  MeasureRawColumnChunk getMeasureChunk(FileHolder fileReader, int blockIndex) throws IOException;
+  MeasureRawColumnChunk readMeasureChunk(FileReader fileReader, int columnIndex) throws IOException;
 
-  /**
-   * @param deleteDeltaDataCache
-   */
-  void setDeleteDeltaDataCache(BlockletLevelDeleteDeltaDataCache deleteDeltaDataCache);
-
-  /**
-   * @return
-   */
-  BlockletLevelDeleteDeltaDataCache getDeleteDeltaDataCache();
-
-  /**
-   * number of pages in blocklet
-   * @return
-   */
-  int numberOfPages();
-
-  /**
-   * Return the number of rows for a give page
-   *
-   * @param pageNumber
-   * @return
-   */
-  int getPageRowCount(int pageNumber);
 }
diff --git a/...carbondata/core/datastore/FileHolder.java → ...carbondata/core/datastore/FileReader.java b/...carbondata/core/datastore/FileHolder.java → ...carbondata/core/datastore/FileReader.java
@@ -20,7 +20,7 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 
-public interface FileHolder {
+public interface FileReader {
 
   /**
    * This method will be used to reads the data to byteBuffer from file based on offset
@@ -98,10 +98,6 @@ ByteBuffer readByteBuffer(String filePath, long offset, int length)
    */
   void finish() throws IOException;
 
-  void setQueryId(String queryId);
-
-  String getQueryId();
-
   /**
    * Set the flag to read data page by page instead of whole blocklet.
    *