Skip to content

Commit

Permalink
[CARBONDATA-2099] Refactor query scan process to improve readability
Browse files Browse the repository at this point in the history
Unified concepts in scan process flow:

1.QueryModel contains all parameter for scan, it is created by API in CarbonTable. (In future, CarbonTable will be the entry point for various table operations)
2.Use term ColumnChunk to represent one column in one blocklet, and use ChunkIndex in reader to read specified column chunk
3.Use term ColumnPage to represent one page in one ColumnChunk
4.QueryColumn => ProjectionColumn, indicating it is for projection

This closes apache#1874
  • Loading branch information
jackylk authored and ravipesala committed Mar 8, 2018
1 parent 47872e8 commit bceb121
Show file tree
Hide file tree
Showing 191 changed files with 2,862 additions and 4,418 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,7 @@ private void loadDictionaryData(DictionaryInfo dictionaryInfo,
DictionaryCacheLoader dictionaryCacheLoader =
new DictionaryCacheLoaderImpl(dictionaryColumnUniqueIdentifier);
dictionaryCacheLoader
.load(dictionaryInfo, dictionaryColumnUniqueIdentifier.getColumnIdentifier(),
dictionaryChunkStartOffset, dictionaryChunkEndOffset, loadSortIndex);
.load(dictionaryInfo, dictionaryChunkStartOffset, dictionaryChunkEndOffset, loadSortIndex);
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,13 @@

import java.io.IOException;

import org.apache.carbondata.core.metadata.ColumnIdentifier;

public interface DictionaryCacheLoader {

/**
* This method will load the dictionary data for a given columnIdentifier
*
* @param dictionaryInfo dictionary info object which will hold the required data
* for a given column
* @param columnIdentifier column unique identifier
* @param dictionaryChunkStartOffset start offset from where dictionary file has to
* be read
* @param dictionaryChunkEndOffset end offset till where dictionary file has to
Expand All @@ -37,7 +34,7 @@ public interface DictionaryCacheLoader {
* read in memory after dictionary loading
* @throws IOException
*/
void load(DictionaryInfo dictionaryInfo, ColumnIdentifier columnIdentifier,
long dictionaryChunkStartOffset, long dictionaryChunkEndOffset, boolean loadSortIndex)
void load(DictionaryInfo dictionaryInfo, long dictionaryChunkStartOffset,
long dictionaryChunkEndOffset, boolean loadSortIndex)
throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import java.util.List;

import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.metadata.ColumnIdentifier;
import org.apache.carbondata.core.reader.CarbonDictionaryReader;
import org.apache.carbondata.core.reader.sortindex.CarbonDictionarySortIndexReader;
import org.apache.carbondata.core.service.CarbonCommonFactory;
Expand All @@ -43,8 +42,7 @@ public class DictionaryCacheLoaderImpl implements DictionaryCacheLoader {
/**
* @param dictionaryColumnUniqueIdentifier dictionary column identifier
*/
public DictionaryCacheLoaderImpl(
DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier) {
DictionaryCacheLoaderImpl(DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier) {
this.dictionaryColumnUniqueIdentifier = dictionaryColumnUniqueIdentifier;
}

Expand All @@ -53,7 +51,6 @@ public DictionaryCacheLoaderImpl(
*
* @param dictionaryInfo dictionary info object which will hold the required data
* for a given column
* @param columnIdentifier column unique identifier
* @param dictionaryChunkStartOffset start offset from where dictionary file has to
* be read
* @param dictionaryChunkEndOffset end offset till where dictionary file has to
Expand All @@ -62,9 +59,9 @@ public DictionaryCacheLoaderImpl(
* read in memory after dictionary loading
* @throws IOException
*/
@Override public void load(DictionaryInfo dictionaryInfo, ColumnIdentifier columnIdentifier,
long dictionaryChunkStartOffset, long dictionaryChunkEndOffset, boolean loadSortIndex)
throws IOException {
@Override
public void load(DictionaryInfo dictionaryInfo, long dictionaryChunkStartOffset,
long dictionaryChunkEndOffset, boolean loadSortIndex) throws IOException {
Iterator<byte[]> columnDictionaryChunkWrapper =
load(dictionaryColumnUniqueIdentifier, dictionaryChunkStartOffset,
dictionaryChunkEndOffset);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,10 @@ public BTreeBuilderInfo(List<DataFileFooter> footerList,
this.footerList = footerList;
}

/**
* @return the eachDimensionBlockSize
*/
public int[] getDimensionColumnValueSize() {
return dimensionColumnValueSize;
}

/**
* @return the footerList
*/
public List<DataFileFooter> getFooterList() {
return footerList;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import java.io.IOException;

import org.apache.carbondata.core.cache.update.BlockletLevelDeleteDeltaDataCache;
import org.apache.carbondata.core.datastore.chunk.impl.DimensionRawColumnChunk;
import org.apache.carbondata.core.datastore.chunk.impl.MeasureRawColumnChunk;

Expand All @@ -28,62 +27,63 @@
public interface DataRefNode {

/**
* Method to get the next block this can be used while scanning when
* Return the next data block in the tree, this can be used while scanning when
* iterator of this class can be used iterate over blocks
*
* @return next block
*/
DataRefNode getNextDataRefNode();

/**
* to get the number of keys tuples present in the block
*
* @return number of keys in the block
* Return the number of rows in the data block
*/
int nodeSize();
int numRows();

/**
* Method can be used to get the block index .This can be used when multiple
* thread can be used scan group of blocks in that can we can assign the
* Return the block index. This can be used when multiple
* thread can be used scan group of blocks in that can we can assign
* some of the blocks to one thread and some to other
*
* @return block number
*/
long nodeNumber();
long nodeIndex();

/**
* Return the blocklet index in the node
*/
short blockletIndex();

/**
* Method is used for retreiving the BlockletId.
* @return the blockletid related to the data block.
* Return the number of pages
*/
String blockletId();
int numberOfPages();

/**
* This method will be used to get the max value of all the columns this can
* Return the number of rows for a give page
*/
int getPageRowCount(int pageNumber);

/**
* Return the max value of all the columns, this can
* be used in case of filter query
*
*/
byte[][] getColumnsMaxValue();

/**
* This method will be used to get the min value of all the columns this can
* Return the min value of all the columns, this can
* be used in case of filter query
*
*/
byte[][] getColumnsMinValue();

/**
* Below method will be used to get the dimension chunks
*
* @param fileReader file reader to read the chunks from file
* @param blockIndexes range indexes of the blocks need to be read
* @param columnIndexRange range indexes of the blocks need to be read
* value can be {{0,10},{11,12},{13,13}}
* here 0 to 10 and 11 to 12 column blocks will be read in one
* IO operation 13th column block will be read separately
* This will be helpful to reduce IO by reading bigger chunk of
* data in On IO
* data in one IO operation
* @return dimension data chunks
*/
DimensionRawColumnChunk[] getDimensionChunks(FileHolder fileReader, int[][] blockIndexes)
DimensionRawColumnChunk[] readDimensionChunks(FileReader fileReader, int[][] columnIndexRange)
throws IOException;

/**
Expand All @@ -92,54 +92,31 @@ DimensionRawColumnChunk[] getDimensionChunks(FileHolder fileReader, int[][] bloc
* @param fileReader file reader to read the chunk from file
* @return dimension data chunk
*/
DimensionRawColumnChunk getDimensionChunk(FileHolder fileReader, int blockIndexes)
DimensionRawColumnChunk readDimensionChunk(FileReader fileReader, int columnIndex)
throws IOException;

/**
* Below method will be used to get the measure chunk
*
* @param fileReader file reader to read the chunk from file
* @param blockIndexes range indexes of the blocks need to be read
* @param columnIndexRange range indexes of the blocks need to be read
* value can be {{0,10},{11,12},{13,13}}
* here 0 to 10 and 11 to 12 column blocks will be read in one
* IO operation 13th column block will be read separately
* This will be helpful to reduce IO by reading bigger chunk of
* data in On IO
* data in one IO operation
* @return measure column data chunk
*/
MeasureRawColumnChunk[] getMeasureChunks(FileHolder fileReader, int[][] blockIndexes)
MeasureRawColumnChunk[] readMeasureChunks(FileReader fileReader, int[][] columnIndexRange)
throws IOException;

/**
* Below method will be used to read the measure chunk
*
* @param fileReader file read to read the file chunk
* @param blockIndex block index to be read from file
* @param columnIndex block index to be read from file
* @return measure data chunk
*/
MeasureRawColumnChunk getMeasureChunk(FileHolder fileReader, int blockIndex) throws IOException;
MeasureRawColumnChunk readMeasureChunk(FileReader fileReader, int columnIndex) throws IOException;

/**
* @param deleteDeltaDataCache
*/
void setDeleteDeltaDataCache(BlockletLevelDeleteDeltaDataCache deleteDeltaDataCache);

/**
* @return
*/
BlockletLevelDeleteDeltaDataCache getDeleteDeltaDataCache();

/**
* number of pages in blocklet
* @return
*/
int numberOfPages();

/**
* Return the number of rows for a give page
*
* @param pageNumber
* @return
*/
int getPageRowCount(int pageNumber);
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import java.io.IOException;
import java.nio.ByteBuffer;

public interface FileHolder {
public interface FileReader {

/**
* This method will be used to reads the data to byteBuffer from file based on offset
Expand Down Expand Up @@ -98,10 +98,6 @@ ByteBuffer readByteBuffer(String filePath, long offset, int length)
*/
void finish() throws IOException;

void setQueryId(String queryId);

String getQueryId();

/**
* Set the flag to read data page by page instead of whole blocklet.
*
Expand Down

0 comments on commit bceb121

Please sign in to comment.