Navigation Menu

Skip to content

Commit

Permalink
Support values > 32KB
Browse files Browse the repository at this point in the history
  • Loading branch information
justinsb committed Dec 10, 2013
1 parent d913414 commit 81a2ba6
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 16 deletions.
Expand Up @@ -11,7 +11,27 @@
import com.cloudata.keyvalue.KeyValueProto.KvAction;
import com.cloudata.util.Hex;
import com.google.common.collect.Lists;

import com.google.common.primitives.Shorts;

/**
* LeafPage stores a leaf of a btree
*
* The data format looks like this:
*
* short: # of entries
*
* (short short)*: start position of key and start position of value
*
* (short short): end position of last key and value
*
* key data
*
* value data
*
*
* There is a special case format when the # of entries is 1. Then instead of storing the (short short) with the end
* positions, we instead store an (int) with the length of the value. This allows for values > 32KB.
*/
public class LeafPage extends Page {
private static final Logger log = LoggerFactory.getLogger(LeafPage.class);

Expand Down Expand Up @@ -215,6 +235,7 @@ List<Page> split(WriteTransaction transaction, LeafPage original) {
}

public int getSerializedSize() {
// Notice that the alternate format still has the same header size :-)
int n = entries.size();
return 2 + (INDEX_ENTRY_SIZE * (n + 1)) + totalKeySize + totalValueSize;
}
Expand All @@ -225,8 +246,8 @@ public void write(ByteBuffer buffer) {
int n = entries.size();
buffer.putShort((short) n);

short keyStart = (short) (2 + (INDEX_ENTRY_SIZE * (n + 1)));
short valueStart = (short) (keyStart + totalKeySize);
short keyStart = Shorts.checkedCast(2 + (INDEX_ENTRY_SIZE * (n + 1)));
short valueStart = Shorts.checkedCast(keyStart + totalKeySize);

for (int i = 0; i < n; i++) {
buffer.putShort(keyStart);
Expand All @@ -237,10 +258,16 @@ public void write(ByteBuffer buffer) {
valueStart += entry.value.remaining();
}

// Write a dummy tail entry so we know the total sizes
// TODO: We can't do this if we want to use this for overflow values (>64KB)
buffer.putShort(keyStart);
buffer.putShort(valueStart);
if (n == 1) {
// Special case: we write the value length to allow huge values
assert totalValueSize == entries.get(0).value.remaining();
buffer.putInt(totalValueSize);
} else {
// Write a dummy tail entry so we know the total sizes
// TODO: We can't do this if we want to use this for overflow values (>64KB)
buffer.putShort(keyStart);
buffer.putShort(valueStart);
}

for (int i = 0; i < n; i++) {
Entry entry = entries.get(i);
Expand Down Expand Up @@ -392,11 +419,23 @@ private int getEntryCount() {
private ByteBuffer getKey(int i) {
assert mutable == null;

ByteBuffer ret = buffer.duplicate();
int offset = 2 + (i * INDEX_ENTRY_SIZE);
int start = ret.getShort(offset);
int end = ret.getShort(offset + INDEX_ENTRY_SIZE);
int n = getEntryCount();

assert 0 <= i && i < n;

ByteBuffer ret = buffer.duplicate();
int start;
int end;
if (n == 1) {
// Alternate format: the first key ends where the first value begins
// TODO: Should we instead have a 'blob' page? Could mean less copying around of data..
start = ret.getShort(2);
end = ret.getShort(4);
} else {
int offset = 2 + (i * INDEX_ENTRY_SIZE);
start = ret.getShort(offset);
end = ret.getShort(offset + INDEX_ENTRY_SIZE);
}
ret.position(start);
ret.limit(end);

Expand All @@ -406,11 +445,21 @@ private ByteBuffer getKey(int i) {
private ByteBuffer getValue(int i) {
assert mutable == null;

ByteBuffer ret = buffer.duplicate();
int offset = 2 + (i * INDEX_ENTRY_SIZE) + 2;
int start = ret.getShort(offset);
int end = ret.getShort(offset + INDEX_ENTRY_SIZE);
int n = getEntryCount();
assert 0 <= i && i < n;

ByteBuffer ret = buffer.duplicate();
int start;
int end;
if (n == 1) {
// Alternate format: the value length is a 32 bit int
start = ret.getShort(4);
end = start + ret.getInt(6);
} else {
int offset = 2 + (i * INDEX_ENTRY_SIZE) + 2;
start = ret.getShort(offset);
end = ret.getShort(offset + INDEX_ENTRY_SIZE);
}
ret.position(start);
ret.limit(end);

Expand Down
Expand Up @@ -92,11 +92,12 @@ public void testSetAndGet() throws Exception {
byte[] expected = buildValue(i);
Assert.assertArrayEquals(expected, data);
}

}

@Test
public void testPageSplit() throws Exception {
// We set values that are too big for one page (32KB currently),
// but aren't individually bigger than a page
String url = SERVERS[0].getHttpUrl();

long logId = newLogId();
Expand All @@ -121,7 +122,35 @@ public void testPageSplit() throws Exception {
byte[] expected = buildValue(i * 1000);
Assert.assertArrayEquals(expected, data);
}
}

@Test
public void testHugeValues() throws Exception {
// We set values that are too big for a short
String url = SERVERS[0].getHttpUrl();

long logId = newLogId();

KeyValueClient client = new KeyValueClient(url);

int n = 30;

for (int i = 1; i < n; i++) {
byte[] key = Integer.toString(i).getBytes();
byte[] data = buildValue(i * 10000);
client.put(logId, ByteString.copyFrom(key), ByteString.copyFrom(data));
}

// TODO: Remove the need for a sleep... wait for commit
Thread.sleep(1000);

for (int i = 1; i < n; i++) {
byte[] key = Integer.toString(i).getBytes();
KeyValueEntry entry = client.read(logId, ByteString.copyFrom(key));
byte[] data = entry.getValue().toByteArray();
byte[] expected = buildValue(i * 10000);
Assert.assertArrayEquals(expected, data);
}
}

@Test
Expand Down

0 comments on commit 81a2ba6

Please sign in to comment.