Skip to content

Commit

Permalink
Merge pull request #3824 from anuraaga/dev_rag
Browse files Browse the repository at this point in the history
[Java] Add a UTF-8 decoder that uses Unsafe to directly decode a byte buffer.
  • Loading branch information
xfxyjwf committed Nov 30, 2017
2 parents da89eb2 + 3e944ae commit 6de51ca
Show file tree
Hide file tree
Showing 6 changed files with 921 additions and 37 deletions.
1 change: 1 addition & 0 deletions Makefile.am
Expand Up @@ -286,6 +286,7 @@ java_EXTRA_DIST=
java/core/src/test/java/com/google/protobuf/CheckUtf8Test.java \
java/core/src/test/java/com/google/protobuf/CodedInputStreamTest.java \
java/core/src/test/java/com/google/protobuf/CodedOutputStreamTest.java \
java/core/src/test/java/com/google/protobuf/DecodeUtf8Test.java \
java/core/src/test/java/com/google/protobuf/DeprecatedFieldTest.java \
java/core/src/test/java/com/google/protobuf/DescriptorsTest.java \
java/core/src/test/java/com/google/protobuf/DiscardUnknownFieldsTest.java \
Expand Down
104 changes: 71 additions & 33 deletions java/core/src/main/java/com/google/protobuf/CodedInputStream.java
Expand Up @@ -64,6 +64,14 @@ public abstract class CodedInputStream {
// Integer.MAX_VALUE == 0x7FFFFFF == INT_MAX from limits.h
private static final int DEFAULT_SIZE_LIMIT = Integer.MAX_VALUE;

/**
* Whether to enable our custom UTF-8 decode codepath which does not use {@link StringCoding}.
* Enabled by default, disable by setting
* {@code -Dcom.google.protobuf.enableCustomutf8Decode=false} in JVM args.
*/
private static final boolean ENABLE_CUSTOM_UTF8_DECODE
= !"false".equals(System.getProperty("com.google.protobuf.enableCustomUtf8Decode"));

/** Visible for subclasses. See setRecursionLimit() */
int recursionDepth;

Expand Down Expand Up @@ -825,13 +833,19 @@ public String readString() throws IOException {
public String readStringRequireUtf8() throws IOException {
final int size = readRawVarint32();
if (size > 0 && size <= (limit - pos)) {
// TODO(martinrb): We could save a pass by validating while decoding.
if (!Utf8.isValidUtf8(buffer, pos, pos + size)) {
throw InvalidProtocolBufferException.invalidUtf8();
if (ENABLE_CUSTOM_UTF8_DECODE) {
String result = Utf8.decodeUtf8(buffer, pos, size);
pos += size;
return result;
} else {
// TODO(martinrb): We could save a pass by validating while decoding.
if (!Utf8.isValidUtf8(buffer, pos, pos + size)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
final int tempPos = pos;
pos += size;
return new String(buffer, tempPos, size, UTF_8);
}
final int tempPos = pos;
pos += size;
return new String(buffer, tempPos, size, UTF_8);
}

if (size == 0) {
Expand Down Expand Up @@ -1524,6 +1538,8 @@ public String readString() throws IOException {
final int size = readRawVarint32();
if (size > 0 && size <= remaining()) {
// TODO(nathanmittler): Is there a way to avoid this copy?
// TODO(anuraaga): It might be possible to share the optimized loop with
// readStringRequireUtf8 by implementing Java replacement logic there.
// The same as readBytes' logic
byte[] bytes = new byte[size];
UnsafeUtil.copyMemory(pos, bytes, 0, size);
Expand All @@ -1544,19 +1560,26 @@ public String readString() throws IOException {
@Override
public String readStringRequireUtf8() throws IOException {
final int size = readRawVarint32();
if (size >= 0 && size <= remaining()) {
// TODO(nathanmittler): Is there a way to avoid this copy?
// The same as readBytes' logic
byte[] bytes = new byte[size];
UnsafeUtil.copyMemory(pos, bytes, 0, size);
// TODO(martinrb): We could save a pass by validating while decoding.
if (!Utf8.isValidUtf8(bytes)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
if (size > 0 && size <= remaining()) {
if (ENABLE_CUSTOM_UTF8_DECODE) {
final int bufferPos = bufferPos(pos);
String result = Utf8.decodeUtf8(buffer, bufferPos, size);
pos += size;
return result;
} else {
// TODO(nathanmittler): Is there a way to avoid this copy?
// The same as readBytes' logic
byte[] bytes = new byte[size];
UnsafeUtil.copyMemory(pos, bytes, 0, size);
// TODO(martinrb): We could save a pass by validating while decoding.
if (!Utf8.isValidUtf8(bytes)) {
throw InvalidProtocolBufferException.invalidUtf8();
}

String result = new String(bytes, UTF_8);
pos += size;
return result;
String result = new String(bytes, UTF_8);
pos += size;
return result;
}
}

if (size == 0) {
Expand Down Expand Up @@ -2324,11 +2347,15 @@ public String readStringRequireUtf8() throws IOException {
bytes = readRawBytesSlowPath(size);
tempPos = 0;
}
// TODO(martinrb): We could save a pass by validating while decoding.
if (!Utf8.isValidUtf8(bytes, tempPos, tempPos + size)) {
throw InvalidProtocolBufferException.invalidUtf8();
if (ENABLE_CUSTOM_UTF8_DECODE) {
return Utf8.decodeUtf8(bytes, tempPos, size);
} else {
// TODO(martinrb): We could save a pass by validating while decoding.
if (!Utf8.isValidUtf8(bytes, tempPos, tempPos + size)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
return new String(bytes, tempPos, size, UTF_8);
}
return new String(bytes, tempPos, size, UTF_8);
}

@Override
Expand Down Expand Up @@ -3348,23 +3375,34 @@ public String readString() throws IOException {
public String readStringRequireUtf8() throws IOException {
final int size = readRawVarint32();
if (size > 0 && size <= currentByteBufferLimit - currentByteBufferPos) {
byte[] bytes = new byte[size];
UnsafeUtil.copyMemory(currentByteBufferPos, bytes, 0, size);
if (!Utf8.isValidUtf8(bytes)) {
throw InvalidProtocolBufferException.invalidUtf8();
if (ENABLE_CUSTOM_UTF8_DECODE) {
final int bufferPos = (int) (currentByteBufferPos - currentByteBufferStartPos);
String result = Utf8.decodeUtf8(currentByteBuffer, bufferPos, size);
currentByteBufferPos += size;
return result;
} else {
byte[] bytes = new byte[size];
UnsafeUtil.copyMemory(currentByteBufferPos, bytes, 0, size);
if (!Utf8.isValidUtf8(bytes)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
String result = new String(bytes, UTF_8);
currentByteBufferPos += size;
return result;
}
String result = new String(bytes, UTF_8);
currentByteBufferPos += size;
return result;
}
if (size >= 0 && size <= remaining()) {
byte[] bytes = new byte[size];
readRawBytesTo(bytes, 0, size);
if (!Utf8.isValidUtf8(bytes)) {
throw InvalidProtocolBufferException.invalidUtf8();
if (ENABLE_CUSTOM_UTF8_DECODE) {
return Utf8.decodeUtf8(bytes, 0, size);
} else {
if (!Utf8.isValidUtf8(bytes)) {
throw InvalidProtocolBufferException.invalidUtf8();
}
String result = new String(bytes, UTF_8);
return result;
}
String result = new String(bytes, UTF_8);
return result;
}

if (size == 0) {
Expand Down
35 changes: 32 additions & 3 deletions java/core/src/main/java/com/google/protobuf/UnsafeUtil.java
Expand Up @@ -33,7 +33,6 @@
import java.lang.reflect.Field;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.security.AccessController;
import java.security.PrivilegedExceptionAction;
import java.util.logging.Level;
Expand Down Expand Up @@ -72,6 +71,8 @@ final class UnsafeUtil {

private static final long BUFFER_ADDRESS_OFFSET = fieldOffset(bufferAddressField());

private static final long STRING_VALUE_OFFSET = fieldOffset(stringValueField());

private UnsafeUtil() {}

static boolean hasUnsafeArrayOperations() {
Expand Down Expand Up @@ -259,6 +260,26 @@ static long addressOffset(ByteBuffer buffer) {
return MEMORY_ACCESSOR.getLong(buffer, BUFFER_ADDRESS_OFFSET);
}

/**
* Returns a new {@link String} backed by the given {@code chars}. The char array should not
* be mutated any more after calling this function.
*/
static String moveToString(char[] chars) {
if (STRING_VALUE_OFFSET == -1) {
// In the off-chance that this JDK does not implement String as we'd expect, just do a copy.
return new String(chars);
}
final String str;
try {
str = (String) UNSAFE.allocateInstance(String.class);
} catch (InstantiationException e) {
// This should never happen, but return a copy as a fallback just in case.
return new String(chars);
}
putObject(str, STRING_VALUE_OFFSET, chars);
return str;
}

static Object getStaticObject(Field field) {
return MEMORY_ACCESSOR.getStaticObject(field);
}
Expand Down Expand Up @@ -375,7 +396,12 @@ private static <T> Class<T> getClassForName(String name) {

/** Finds the address field within a direct {@link Buffer}. */
private static Field bufferAddressField() {
return field(Buffer.class, "address");
return field(Buffer.class, "address", long.class);
}

/** Finds the value field within a {@link String}. */
private static Field stringValueField() {
return field(String.class, "value", char[].class);
}

/**
Expand All @@ -390,11 +416,14 @@ private static long fieldOffset(Field field) {
* Gets the field with the given name within the class, or {@code null} if not found. If found,
* the field is made accessible.
*/
private static Field field(Class<?> clazz, String fieldName) {
private static Field field(Class<?> clazz, String fieldName, Class<?> expectedType) {
Field field;
try {
field = clazz.getDeclaredField(fieldName);
field.setAccessible(true);
if (!field.getType().equals(expectedType)) {
return null;
}
} catch (Throwable t) {
// Failed to access the fields.
field = null;
Expand Down

0 comments on commit 6de51ca

Please sign in to comment.