diff --git a/pixels-common/src/main/resources/pixels.properties b/pixels-common/src/main/resources/pixels.properties
index cdfad1b847..b9f81ddce6 100644
--- a/pixels-common/src/main/resources/pixels.properties
+++ b/pixels-common/src/main/resources/pixels.properties
@@ -367,7 +367,7 @@ index.rocksdb.target.file.size.base=67108864
# rocksdb file size multiplier (default to 1)
index.rocksdb.target.file.size.multiplier=1
# rocksdb key fixed prefix length
-index.rocksdb.prefix.length=12
+index.rocksdb.prefix.length=4
# rocksdb max subcompactions
index.rocksdb.max.subcompactions=1
# rocksdb compression type (e.g. NO_COMPRESSION, SNAPPY_COMPRESSION, ZLIB_COMPRESSION, BZ2_COMPRESSION, LZ4_COMPRESSION, LZ4HC_COMPRESSION, ZSTD_COMPRESSION)
@@ -391,7 +391,7 @@ index.cache.capacity=10000000
# The expiration time (in seconds) of cache entries
index.cache.expiration.seconds=3600
# whether each index corresponds to its own column family
-index.rocksdb.multicf=false
+index.rocksdb.multicf=true
index.bucket.num=128
# the directory where the sqlite files of main index are stored, each main index is stored as a sqlite file
index.sqlite.path=/tmp/sqlite
diff --git a/pixels-index/pixels-index-rocksdb/pom.xml b/pixels-index/pixels-index-rocksdb/pom.xml
index 2e019d349f..7cf0f69986 100644
--- a/pixels-index/pixels-index-rocksdb/pom.xml
+++ b/pixels-index/pixels-index-rocksdb/pom.xml
@@ -15,8 +15,6 @@
8
8
UTF-8
-
- 10.2.1
diff --git a/pixels-index/pixels-index-rocksdb/src/main/java/io/pixelsdb/pixels/index/rocksdb/RocksDBFactory.java b/pixels-index/pixels-index-rocksdb/src/main/java/io/pixelsdb/pixels/index/rocksdb/RocksDBFactory.java
index a6441818c4..5b1fd91ed9 100644
--- a/pixels-index/pixels-index-rocksdb/src/main/java/io/pixelsdb/pixels/index/rocksdb/RocksDBFactory.java
+++ b/pixels-index/pixels-index-rocksdb/src/main/java/io/pixelsdb/pixels/index/rocksdb/RocksDBFactory.java
@@ -189,7 +189,9 @@ private static ColumnFamilyDescriptor createCFDescriptor(byte[] name, Integer ke
int fixedLengthPrefix = Integer.parseInt(config.getProperty("index.rocksdb.prefix.length"));
if (keyLen != null)
{
- fixedLengthPrefix = keyLen + Long.BYTES; // key buffer + index id
+ // Prefix must only cover the logical lookup key.
+ // It must not include the encoded timestamp suffix.
+ fixedLengthPrefix = keyLen + (multiCF ? 0 : Long.BYTES);
}
CompactionStyle compactionStyle = CompactionStyle.valueOf(config.getProperty("index.rocksdb.compaction.style"));
diff --git a/pixels-index/pixels-index-rocksdb/src/test/java/io/pixelsdb/pixels/index/rocksdb/TestRocksDBIndex.java b/pixels-index/pixels-index-rocksdb/src/test/java/io/pixelsdb/pixels/index/rocksdb/TestRocksDBIndex.java
index 67ed0306eb..4212bf054f 100644
--- a/pixels-index/pixels-index-rocksdb/src/test/java/io/pixelsdb/pixels/index/rocksdb/TestRocksDBIndex.java
+++ b/pixels-index/pixels-index-rocksdb/src/test/java/io/pixelsdb/pixels/index/rocksdb/TestRocksDBIndex.java
@@ -25,39 +25,52 @@
import io.pixelsdb.pixels.common.exception.SinglePointIndexException;
import io.pixelsdb.pixels.common.index.IndexOption;
import io.pixelsdb.pixels.common.index.SinglePointIndex;
+import io.pixelsdb.pixels.common.utils.ConfigFactory;
+import io.pixelsdb.pixels.common.utils.IndexUtils;
import io.pixelsdb.pixels.index.IndexProto;
import org.junit.jupiter.api.AfterEach;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
+import org.rocksdb.ColumnFamilyHandle;
import org.rocksdb.ReadOptions;
import org.rocksdb.RocksDB;
import org.rocksdb.RocksDBException;
+import org.rocksdb.RocksIterator;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
+import static io.pixelsdb.pixels.index.rocksdb.RocksDBIndex.toBuffer;
import static io.pixelsdb.pixels.index.rocksdb.RocksDBIndex.toKeyBuffer;
+import static io.pixelsdb.pixels.index.rocksdb.RocksDBIndex.startsWith;
import static org.junit.jupiter.api.Assertions.*;
public class TestRocksDBIndex
{
+ private static final boolean MULTI_CF =
+ Boolean.parseBoolean(ConfigFactory.Instance().getProperty("index.rocksdb.multicf"));
+ private static final Logger log = LoggerFactory.getLogger(TestRocksDBIndex.class);
private RocksDB rocksDB;
private static final long TABLE_ID = 100L;
private static final long INDEX_ID = 100L;
+ private static final int VNODE_ID = 0;
private SinglePointIndex uniqueIndex;
private SinglePointIndex nonUniqueIndex;
-
+ private ColumnFamilyHandle columnFamilyHandle;
@BeforeEach
public void setUp() throws RocksDBException,SinglePointIndexException
{
IndexOption option = IndexOption.builder()
- .vNodeId(0)
+ .vNodeId(VNODE_ID)
.build();
uniqueIndex = new RocksDBIndex(TABLE_ID, INDEX_ID, true, option);
nonUniqueIndex = new RocksDBIndex(TABLE_ID, INDEX_ID + 1, false, option);
rocksDB = RocksDBFactory.getRocksDB();
+ columnFamilyHandle = RocksDBFactory.getOrCreateColumnFamily(TABLE_ID, INDEX_ID, VNODE_ID);
}
@AfterEach
@@ -77,7 +90,7 @@ public void tearDown() throws SinglePointIndexException
public void testPutEntry() throws RocksDBException, SinglePointIndexException
{
// Create Entry
- byte[] key = "testPutEntry".getBytes();
+ byte[] key = ByteBuffer.allocate(4).putInt(1).array();
long timestamp = 1000L;
long rowId = 100L;
@@ -91,7 +104,7 @@ public void testPutEntry() throws RocksDBException, SinglePointIndexException
ByteBuffer valueBuffer = RocksDBThreadResources.getValueBuffer();
ReadOptions readOptions = new ReadOptions();
// Assert index has been written to rocksDB
- int ret = rocksDB.get(readOptions, keyBuffer, valueBuffer);
+ int ret = rocksDB.get(columnFamilyHandle, readOptions, keyBuffer, valueBuffer);
assertTrue(ret != RocksDB.NOT_FOUND);
long storedRowId = valueBuffer.getLong();
@@ -172,6 +185,50 @@ public void testGetUniqueRowId() throws SinglePointIndexException
assertEquals(rowId2, result, "getUniqueRowId should return the rowId of the latest timestamp entry");
}
+ @Test
+ public void testSeekFindsNextVersionWithSameLogicalPrefix() throws Exception
+ {
+ // Use Default Prefix Len = 4
+ byte[] key = ByteBuffer.allocate(4).putInt(7).array();
+ long[] storedTimestamps = {1L, 3L, 5L, 7L, 9L};
+ long[] seekTimestamps = {1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L};
+ long[] expectedTimestamps = {1L, 1L, 3L, 3L, 5L, 5L, 7L, 7L, 9L, 9L};
+
+ for (long timestamp : storedTimestamps)
+ {
+ IndexProto.IndexKey storedKey = IndexProto.IndexKey.newBuilder()
+ .setIndexId(INDEX_ID)
+ .setKey(ByteString.copyFrom(key))
+ .setTimestamp(timestamp)
+ .build();
+ uniqueIndex.putEntry(storedKey, timestamp);
+ }
+
+ ReadOptions readOptions = RocksDBThreadResources.getReadOptions();
+ readOptions.setPrefixSameAsStart(true)
+ .setTotalOrderSeek(false)
+ .setVerifyChecksums(false);
+
+ for (int i = 0; i < seekTimestamps.length; i++)
+ {
+ long seekTimestamp = seekTimestamps[i];
+ long expectedTimestamp = expectedTimestamps[i];
+ ByteBuffer seekKey = toKeyBuffer(indexKey(key, seekTimestamp));
+
+ try (RocksIterator iterator = rocksDB.newIterator(columnFamilyHandle, readOptions))
+ {
+ iterator.seek(seekKey);
+ assertTrue(iterator.isValid(), "seek should find a version for timestamp " + seekTimestamp);
+ assertTrue(startsWith(ByteBuffer.wrap(iterator.key()), seekKey),
+ "seek should remain within the same logical prefix for timestamp " + seekTimestamp);
+ long getTs = extractTimestampFromUniqueKey(iterator.key());
+ System.out.println("Timestamp: " + getTs);
+ assertEquals(expectedTimestamp, getTs,
+ "seek should land on the closest stored version for timestamp " + seekTimestamp);
+ }
+ }
+ }
+
@Test
public void testGetRowIds() throws SinglePointIndexException
{
@@ -207,6 +264,21 @@ public void testGetRowIds() throws SinglePointIndexException
assertTrue(rowIds.containsAll(result) && result.containsAll(rowIds), "getRowIds should return the rowId of all entries");
}
+ private static IndexProto.IndexKey indexKey(byte[] key, long timestamp)
+ {
+ return IndexProto.IndexKey.newBuilder()
+ .setIndexId(INDEX_ID)
+ .setKey(ByteString.copyFrom(key))
+ .setTimestamp(timestamp)
+ .build();
+ }
+
+ private static long extractTimestampFromUniqueKey(byte[] encodedKey)
+ {
+ ByteBuffer keyBuffer = ByteBuffer.wrap(encodedKey);
+ return Long.MAX_VALUE - keyBuffer.getLong(encodedKey.length - Long.BYTES);
+ }
+
@Test
public void testDeleteEntry() throws SinglePointIndexException
{
@@ -396,4 +468,4 @@ public void benchmarkDeleteEntry() throws SinglePointIndexException
double durationMs = (end - start) / 1_000_000.0;
System.out.printf("Deleted %,d entries in %.2f ms (%.2f ops/sec)%n", count, durationMs, count * 1000.0 / durationMs);
}
-}
\ No newline at end of file
+}
diff --git a/pom.xml b/pom.xml
index 78011c6a56..92e8d7c814 100644
--- a/pom.xml
+++ b/pom.xml
@@ -143,6 +143,8 @@
1.7.36
+ 10.2.1
+
4.13.2
1.8.2