Skip to content

Commit

Permalink
fix "special" chars prefix handling
Browse files Browse the repository at this point in the history
some utf-8 chars serialize to a _negative_ byte value, but prefix matching must be resilient to negative byte values.

this commit refactors `StoreKey` to handle the special prefix-match scenario. it was necessary to refactor `StoreKey` to handle the raw `byte[]` representation.
  • Loading branch information
snazy committed May 10, 2023
1 parent dd96b13 commit 4739676
Show file tree
Hide file tree
Showing 7 changed files with 104 additions and 42 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.AbstractIterator;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.Comparator;
Expand Down Expand Up @@ -516,7 +515,7 @@ static <V> StoreIndex<V> deserializeStoreIndex(ByteBuffer serialized, ElementSer
static int serializedSize(StoreKey key) {
// 1st byte: number of elements
int size = 2;
size += key.rawString().getBytes(StandardCharsets.UTF_8).length;
size += key.rawLength();
return size;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,14 @@
import static java.lang.Character.isSurrogate;
import static java.lang.Character.isSurrogatePair;
import static java.lang.Character.toCodePoint;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.annotations.VisibleForTesting;
import java.nio.BufferOverflowException;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.List;
import org.projectnessie.nessie.relocated.protobuf.ByteString;

/**
* Represents a key in a {@link StoreIndex}, optimized for performance (CPU + heap).
Expand All @@ -39,24 +41,54 @@ public final class StoreKey implements Comparable<StoreKey> {
public static final int MAX_LENGTH = 500;

/** Contains the {@code char 0} separated key representation. */
private final String key;
private final byte[] key;

private StoreKey(String key) {
private final boolean prefixEnd;

private StoreKey(byte[] key, boolean prefixEnd) {
this.key = key;
this.prefixEnd = prefixEnd;
}

public byte[] rawBytes() {
return Arrays.copyOf(key, key.length);
}

public ByteString rawByteString() {
return ByteString.copyFrom(key);
}

public String rawString() {
return key;
return new String(key, UTF_8);
}

public int rawLength() {
return key.length;
}

@Override
public int compareTo(StoreKey that) {
return key.compareTo(that.key);
int diff = ByteBuffer.wrap(key).mismatch(ByteBuffer.wrap(that.key));
if (diff == -1) {
return 0;
}
if (diff >= key.length) {
if (prefixEnd && diff == key.length) {
return 1;
}
return -1;
}
if (diff >= that.key.length) {
return 1;
}
byte my = key[diff];
byte other = that.key[diff];
return Byte.compareUnsigned(my, other);
}

@Override
public int hashCode() {
return key.hashCode();
return Arrays.hashCode(key);
}

@Override
Expand All @@ -66,23 +98,43 @@ public boolean equals(Object obj) {
}

StoreKey that = (StoreKey) obj;
return this.key.equals(that.key);
return Arrays.equals(this.key, that.key);
}

public static StoreKey keyFromStringMax(String rawString) {
byte[] result = rawBytesFromString(rawString);
return new StoreKey(result, false);
}

public static StoreKey keyFromStringPrefixEnd(String rawString) {
byte[] result = rawBytesFromString(rawString);
return new StoreKey(result, true);
}

public static StoreKey keyFromString(String rawString) {
return new StoreKey(rawString);
byte[] result = rawBytesFromString(rawString);
return new StoreKey(result, false);
}

private static byte[] rawBytesFromString(String rawString) {
ByteBuffer temp = ByteBuffer.wrap(new byte[rawString.length() * 3]);
putString(temp, rawString);
byte[] result = new byte[temp.position()];
temp.flip();
temp.get(result);
return result;
}

public static StoreKey key(String... elements) {
for (String element : elements) {
checkElement(element);
}
return new StoreKey(String.join("\0", elements));
return keyFromString(String.join("\0", elements));
}

public static StoreKey key(List<String> elements) {
elements.forEach(StoreKey::checkElement);
return new StoreKey(String.join("\0", elements));
return keyFromString(String.join("\0", elements));
}

private static void checkElement(String element) {
Expand All @@ -92,7 +144,7 @@ private static void checkElement(String element) {
public ByteBuffer serialize(ByteBuffer keySerializationBuffer) {
keySerializationBuffer.clear();
try {
putString(keySerializationBuffer, key);
keySerializationBuffer.put(key);
keySerializationBuffer.put((byte) 0);
keySerializationBuffer.put((byte) 0);
} catch (BufferOverflowException e) {
Expand Down Expand Up @@ -190,18 +242,16 @@ public static StoreKey deserializeKey(ByteBuffer src) {
// ignore the trailing 0 of the element and trailing 0 of the "end of key"
int end = src.position() - 2;

String s;
int len = end - p0;
byte[] array = new byte[len];
if (src.hasArray()) {
s = new String(src.array(), src.arrayOffset() + p0, len, StandardCharsets.UTF_8);
System.arraycopy(src.array(), src.arrayOffset() + p0, array, 0, len);
} else {
byte[] array = new byte[len];
src.position(p0);
src.get(array);
s = new String(array, StandardCharsets.UTF_8);
}

return new StoreKey(s);
return new StoreKey(array, false);
}
break;
}
Expand All @@ -215,15 +265,16 @@ public static StoreKey deserializeKey(ByteBuffer src) {
* <p>This is not embedded into immutable's {@code .build()} check for performance reasons.
*/
public StoreKey check() {
checkState(key.length() <= MAX_LENGTH, "Key too long, max allowed length: %s", MAX_LENGTH);
checkState(key.length <= MAX_LENGTH, "Key too long, max allowed length: %s", MAX_LENGTH);
return this;
}

@Override
public String toString() {
StringBuilder sb = new StringBuilder(key.length());
for (int i = 0; i < key.length(); i++) {
char c = key.charAt(i);
StringBuilder sb = new StringBuilder(key.length);
String raw = rawString();
for (int i = 0; i < raw.length(); i++) {
char c = raw.charAt(i);
switch (c) {
case 0:
c = '/';
Expand All @@ -240,6 +291,13 @@ public String toString() {
}

public boolean startsWith(StoreKey prefix) {
return key.startsWith(prefix.key);
int diff = ByteBuffer.wrap(key).mismatch(ByteBuffer.wrap(prefix.key));
if (diff == -1) {
return true;
}
if (diff == prefix.key.length) {
return true;
}
return false;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
import static java.util.Objects.requireNonNull;
import static org.projectnessie.nessie.relocated.protobuf.ByteString.copyFromUtf8;
import static org.projectnessie.versioned.storage.common.indexes.StoreIndexElement.indexElement;
import static org.projectnessie.versioned.storage.common.indexes.StoreIndexes.indexFromStripes;
import static org.projectnessie.versioned.storage.common.indexes.StoreIndexes.newStoreIndex;
Expand Down Expand Up @@ -1032,7 +1031,7 @@ private StoreIndexElement<CommitOp> next(Iterator<StoreIndexElement<CommitOp>> i
@jakarta.annotation.Nonnull
@Override
public PagingToken tokenForKey(StoreKey key) {
return key != null ? pagingToken(copyFromUtf8(key.rawString())) : emptyPagingToken();
return key != null ? pagingToken(key.rawByteString()) : emptyPagingToken();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import static java.util.Objects.requireNonNull;
import static org.projectnessie.nessie.relocated.protobuf.ByteString.copyFromUtf8;
import static org.projectnessie.versioned.storage.common.indexes.StoreKey.key;
import static org.projectnessie.versioned.storage.common.indexes.StoreKey.keyFromString;
import static org.projectnessie.versioned.storage.common.logic.CommitConflict.ConflictType.KEY_EXISTS;
import static org.projectnessie.versioned.storage.common.logic.CommitRetry.commitRetry;
import static org.projectnessie.versioned.storage.common.logic.CreateCommit.Add.commitAdd;
Expand Down Expand Up @@ -212,7 +213,7 @@ public PagedResult<Reference, String> queryReferences(
pagingToken
.map(PagingToken::token)
.map(ByteString::toStringUtf8)
.map(StoreKey::key)
.map(StoreKey::keyFromString)
.orElse(prefix);

StoreIndex<CommitOp> index = createRefsIndexSupplier().get();
Expand Down Expand Up @@ -315,7 +316,7 @@ public void deleteReference(

Reference reference = persist.fetchReference(name);
if (reference == null) {
StoreKey nameKey = key(name);
StoreKey nameKey = keyFromString(name);
Supplier<StoreIndex<CommitOp>> indexSupplier = createRefsIndexSupplier();
StoreIndexElement<CommitOp> index = indexSupplier.get().get(nameKey);
if (index == null) {
Expand Down Expand Up @@ -389,7 +390,7 @@ CommitReferenceResult commitCreateReference(String name, ObjId pointer)
throw new RuntimeException(e);
}

StoreKey k = key(name);
StoreKey k = keyFromString(name);

Instant now = persist.config().clock().instant();
CreateCommit c =
Expand Down Expand Up @@ -419,7 +420,7 @@ CommitReferenceResult commitCreateReference(String name, ObjId pointer)
checkState(conflict.conflictType() == KEY_EXISTS, "Unexpected conflict type %s", conflict);

Supplier<StoreIndex<CommitOp>> indexSupplier = createRefsIndexSupplier();
StoreIndexElement<CommitOp> el = indexSupplier.get().get(key(name));
StoreIndexElement<CommitOp> el = indexSupplier.get().get(keyFromString(name));
checkNotNull(el, "Key %s missing in index", name);

Reference existing = persist.fetchReference(name);
Expand Down Expand Up @@ -466,7 +467,7 @@ void commitDeleteReference(Reference reference) throws RetryTimeoutException {
StoreIndex<CommitOp> index =
indexesLogic(persist).incrementalIndexForUpdate(commit, Optional.empty());

StoreKey key = key(reference.name());
StoreKey key = keyFromString(reference.name());

StoreIndexElement<CommitOp> indexElement = index.get(key);
if (indexElement != null) {
Expand Down Expand Up @@ -557,7 +558,7 @@ private Reference maybeRecover(
Reference ref,
@Nonnull @jakarta.annotation.Nonnull Supplier<StoreIndex<CommitOp>> refsIndexSupplier) {
if (ref == null) {
StoreIndexElement<CommitOp> commitOp = refsIndexSupplier.get().get(key(name));
StoreIndexElement<CommitOp> commitOp = refsIndexSupplier.get().get(keyFromString(name));

if (commitOp == null) {
// Reference not in index - nothing to do.
Expand Down Expand Up @@ -592,7 +593,7 @@ private Reference maybeRecover(
}

} else if (ref.deleted()) {
StoreIndexElement<CommitOp> commitOp = refsIndexSupplier.get().get(key(name));
StoreIndexElement<CommitOp> commitOp = refsIndexSupplier.get().get(keyFromString(name));

if (commitOp == null) {
throw new RuntimeException("Loaded Reference is marked as deleted, but not found in index");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
import static java.time.temporal.ChronoField.SECOND_OF_MINUTE;
import static java.time.temporal.ChronoField.YEAR;
import static java.util.Collections.emptyList;
import static org.projectnessie.versioned.storage.common.indexes.StoreKey.keyFromString;
import static org.projectnessie.versioned.storage.common.indexes.StoreKey.keyFromStringMax;
import static org.projectnessie.versioned.storage.common.indexes.StoreKey.keyFromStringPrefixEnd;
import static org.projectnessie.versioned.storage.common.objtypes.CommitHeaders.newCommitHeaders;

import com.google.common.base.Splitter;
Expand Down Expand Up @@ -135,40 +138,39 @@ public static StoreKey keyToStoreKeyVariant(
@Nonnull @jakarta.annotation.Nonnull ContentKey key, String discriminator) {
StringBuilder sb = keyToStoreKeyPrepare(key);
sb.append((char) 0).append(discriminator);
return StoreKey.keyFromString(sb.toString());
return keyFromString(sb.toString());
}

@Nonnull
@jakarta.annotation.Nonnull
public static StoreKey keyToStoreKeyMin(@Nonnull @jakarta.annotation.Nonnull ContentKey key) {
StringBuilder sb = keyToStoreKeyPrepare(key);
return StoreKey.keyFromString(sb.toString());
return keyFromString(sb.toString());
}

/** Computes the max-key. */
@Nonnull
@jakarta.annotation.Nonnull
public static StoreKey keyToStoreKeyMax(@Nonnull @jakarta.annotation.Nonnull ContentKey key) {
StringBuilder sb = keyToStoreKeyPrepare(key);
sb.append((char) 0).append((char) 255);
return StoreKey.keyFromString(sb.toString());
sb.append((char) 0).append((char) 127);
return keyFromStringMax(sb.toString());
}

@Nonnull
@jakarta.annotation.Nonnull
public static StoreKey keyToStoreKeyPrefixMin(
@Nonnull @jakarta.annotation.Nonnull ContentKey key) {
StringBuilder sb = keyToStoreKeyPrepare(key);
return StoreKey.keyFromString(sb.toString());
return keyFromString(sb.toString());
}

@Nonnull
@jakarta.annotation.Nonnull
public static StoreKey keyToStoreKeyPrefixEnd(
@Nonnull @jakarta.annotation.Nonnull ContentKey key) {
StringBuilder sb = keyToStoreKeyPrepare(key);
sb.append((char) 255);
return StoreKey.keyFromString(sb.toString());
return keyFromStringPrefixEnd(sb.toString());
}

@Nonnull
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -549,7 +549,7 @@ public String tokenForEntry(KeyEntry entry) {
}

private String token(StoreKey storeKey) {
return pagingToken(copyFromUtf8(storeKey.rawString())).asString();
return pagingToken(storeKey.rawByteString()).asString();
}
};
}
Expand Down Expand Up @@ -813,7 +813,7 @@ public String tokenForEntry(Diff entry) {
}

private String tokenFor(StoreKey storeKey) {
return pagingToken(copyFromUtf8(storeKey.rawString())).asString();
return pagingToken(storeKey.rawByteString()).asString();
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ public void max() {
soft.assertThat(keyRange.endStoreKey()).isLessThan(keyToStoreKey(ContentKey.of("bar", "baz")));
soft.assertThat(keyRange.endStoreKey()).isLessThan(keyToStoreKey(ContentKey.of("bar", "0")));
soft.assertThat(keyRange.endStoreKey()).isLessThan(keyToStoreKey(ContentKey.of("barbaz")));
soft.assertThat(keyRange.endStoreKey()).isLessThan(keyToStoreKey(ContentKey.of("bar\u03C0")));
soft.assertThat(keyRange.endStoreKey()).isLessThan(keyToStoreKey(ContentKey.of("bar0")));

soft.assertThat(keyRange.endStoreKey()).isLessThan(keyToStoreKey(ContentKey.of("bar0")));
Expand All @@ -104,11 +105,13 @@ public void prefix() {
.isGreaterThan(keyToStoreKey(ContentKey.of("bar", "baz")));
soft.assertThat(keyRange.endStoreKey()).isGreaterThan(keyToStoreKey(ContentKey.of("bar", "0")));
soft.assertThat(keyRange.endStoreKey()).isGreaterThan(keyToStoreKey(ContentKey.of("barbaz")));
soft.assertThat(keyRange.endStoreKey())
.isGreaterThan(keyToStoreKey(ContentKey.of("bar\u03C0")));
soft.assertThat(keyRange.endStoreKey()).isGreaterThan(keyToStoreKey(ContentKey.of("bar0")));

String storeKeyRawPrefix = MAIN_UNIVERSE + (char) 0 + "bar";
soft.assertThat(keyRange.endStoreKey()).isEqualTo(keyFromString(storeKeyRawPrefix));

soft.assertThat(keyRange.endStoreKey()).isGreaterThan(keyFromString(storeKeyRawPrefix));
soft.assertThat(keyRange.endStoreKey())
.isGreaterThan(keyFromString(storeKeyRawPrefix + (char) 1 + "baz"));
}
Expand Down

0 comments on commit 4739676

Please sign in to comment.