Skip to content

Commit

Permalink
Improve ByteBufUtil#lastIndexOf (#13942)
Browse files Browse the repository at this point in the history
Motivation:
The performance of `#lastIndexOf` could be enhanced by applying SWAR.

Modification:
Utilized `SWARUtil` for byte search.

Result:
Enhanced performance.
  • Loading branch information
jchrys committed Apr 8, 2024
1 parent c1d0fd2 commit a38a85c
Show file tree
Hide file tree
Showing 3 changed files with 231 additions and 4 deletions.
78 changes: 75 additions & 3 deletions buffer/src/main/java/io/netty/buffer/ByteBufUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -714,20 +714,92 @@ public static ByteBuf readBytes(ByteBufAllocator alloc, ByteBuf buffer, int leng
}
}

static int lastIndexOf(AbstractByteBuf buffer, int fromIndex, int toIndex, byte value) {
static int lastIndexOf(final AbstractByteBuf buffer, int fromIndex, final int toIndex, final byte value) {
assert fromIndex > toIndex;
final int capacity = buffer.capacity();
fromIndex = Math.min(fromIndex, capacity);
if (fromIndex < 0 || capacity == 0) {
if (fromIndex <= 0) { // fromIndex is the exclusive upper bound.
return -1;
}
buffer.checkIndex(toIndex, fromIndex - toIndex);
final int length = fromIndex - toIndex;
buffer.checkIndex(toIndex, length);
if (!PlatformDependent.isUnaligned()) {
return linearLastIndexOf(buffer, fromIndex, toIndex, value);
}
final int longCount = length >>> 3;
if (longCount > 0) {
final ByteOrder nativeOrder = ByteOrder.nativeOrder();
final boolean isNative = nativeOrder == buffer.order();
final boolean useLE = nativeOrder == ByteOrder.LITTLE_ENDIAN;
final long pattern = SWARUtil.compilePattern(value);
for (int i = 0, offset = fromIndex - Long.BYTES; i < longCount; i++, offset -= Long.BYTES) {
// use the faster available getLong
final long word = useLE? buffer._getLongLE(offset) : buffer._getLong(offset);
final long result = SWARUtil.applyPattern(word, pattern);
if (result != 0) {
// used the oppoiste endianness since we are looking for the last index.
return offset + Long.BYTES - 1 - SWARUtil.getIndex(result, !isNative);
}
}
}
return unrolledLastIndexOf(buffer, fromIndex - (longCount << 3), length & 7, value);
}

private static int linearLastIndexOf(final AbstractByteBuf buffer, final int fromIndex, final int toIndex,
final byte value) {
for (int i = fromIndex - 1; i >= toIndex; i--) {
if (buffer._getByte(i) == value) {
return i;
}
}
return -1;
}

private static int unrolledLastIndexOf(final AbstractByteBuf buffer, final int fromIndex, final int byteCount,
final byte value) {
assert byteCount >= 0 && byteCount < 8;
if (byteCount == 0) {
return -1;
}
if (buffer._getByte(fromIndex - 1) == value) {
return fromIndex - 1;
}
if (byteCount == 1) {
return -1;
}
if (buffer._getByte(fromIndex - 2) == value) {
return fromIndex - 2;
}
if (byteCount == 2) {
return -1;
}
if (buffer._getByte(fromIndex - 3) == value) {
return fromIndex - 3;
}
if (byteCount == 3) {
return -1;
}
if (buffer._getByte(fromIndex - 4) == value) {
return fromIndex - 4;
}
if (byteCount == 4) {
return -1;
}
if (buffer._getByte(fromIndex - 5) == value) {
return fromIndex - 5;
}
if (byteCount == 5) {
return -1;
}
if (buffer._getByte(fromIndex - 6) == value) {
return fromIndex - 6;
}
if (byteCount == 6) {
return -1;
}
if (buffer._getByte(fromIndex - 7) == value) {
return fromIndex - 7;
}
return -1;
}

Expand Down
42 changes: 41 additions & 1 deletion buffer/src/test/java/io/netty/buffer/AbstractByteBufTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2293,11 +2293,51 @@ public void testSWARIndexOf() {
buffer.writeByte((byte) 2);
buffer.writeByte((byte) 3);
buffer.writeByte((byte) 4);
buffer.writeByte((byte) 1);
buffer.writeByte((byte) 1); // 15
assertEquals(11, buffer.indexOf(0, 12, (byte) 1));
assertEquals(12, buffer.indexOf(0, 16, (byte) 2));
assertEquals(-1, buffer.indexOf(0, 11, (byte) 1));
assertEquals(11, buffer.indexOf(0, 16, (byte) 1));

// lastIndexOf
assertEquals(15, buffer.indexOf(16, 0, (byte) 1));
assertEquals(12, buffer.indexOf(16, 0, (byte) 2));
assertEquals(11, buffer.indexOf(15, 0, (byte) 1));
assertEquals(-1, buffer.indexOf(11, 0, (byte) 1));
buffer.release();
}

@Test
public void testUnrolledSWARIndexOf() {
ByteBuf buffer = newBuffer(15);
buffer.clear();
// Ensure the buffer is completely zero'ed.
buffer.setZero(0, buffer.capacity());
buffer.writeByte((byte) 0); // 0
buffer.writeByte((byte) 1);
buffer.writeByte((byte) 2);
buffer.writeByte((byte) 3);
buffer.writeByte((byte) 4);
buffer.writeByte((byte) 5);
buffer.writeByte((byte) 6);
buffer.writeByte((byte) 7); // 7

buffer.writeByte((byte) 8); // 8
buffer.writeByte((byte) 9);
buffer.writeByte((byte) 10);
buffer.writeByte((byte) 11);
buffer.writeByte((byte) 12);
buffer.writeByte((byte) 13);
buffer.writeByte((byte) 14); // 14
assertEquals(15, buffer.capacity());
for (int i = 0; i < 14; ++i) {
assertEquals(i, buffer.indexOf(i, buffer.capacity(), (byte) i));
}

// lastIndexOf
for (int i = 0; i < 14; ++i) {
assertEquals(i, buffer.indexOf(buffer.capacity(), 0, (byte) i));
}
buffer.release();
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
/*
* Copyright 2024 The Netty Project
*
* The Netty Project licenses this file to you under the Apache License,
* version 2.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at:
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*/
package io.netty.microbench.buffer;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.ByteBufAllocator;
import io.netty.buffer.PooledByteBufAllocator;
import io.netty.buffer.UnpooledByteBufAllocator;
import io.netty.microbench.util.AbstractMicrobenchmark;
import io.netty.util.internal.SuppressJava6Requirement;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;

import java.util.SplittableRandom;
import java.util.concurrent.TimeUnit;

@State(Scope.Benchmark)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@Fork(2)
@Warmup(iterations = 5, time = 1)
@Measurement(iterations = 8, time = 1)
public class ByteBufLastIndexOfBenchmark extends AbstractMicrobenchmark {
@Param({ "7", "16", "23", "32" })
int size;

@Param({ "4", "11" })
int logPermutations;

@Param({ "1" })
int seed;

int permutations;

ByteBuf[] data;

private int i;

@Param({ "0" })
private byte needleByte;

@Param({ "true", "false" })
private boolean direct;

@Param({ "false", "true" })
private boolean noUnsafe;

@Param({ "false", "true" })
private boolean pooled;

@Setup(Level.Trial)
@SuppressJava6Requirement(reason = "using SplittableRandom to reliably produce data")
public void init() {
System.setProperty("io.netty.noUnsafe", Boolean.valueOf(noUnsafe).toString());
SplittableRandom random = new SplittableRandom(seed);
permutations = 1 << logPermutations;
this.data = new ByteBuf[permutations];
final ByteBufAllocator allocator = pooled? PooledByteBufAllocator.DEFAULT : UnpooledByteBufAllocator.DEFAULT;
for (int i = 0; i < permutations; ++i) {
data[i] = direct? allocator.directBuffer(size, size) : allocator.heapBuffer(size, size);
for (int j = 0; j < size; j++) {
int value = random.nextInt(Byte.MIN_VALUE, Byte.MAX_VALUE + 1);
// turn any found value into something different
if (value == needleByte) {
if (needleByte != 1) {
value = 1;
} else {
value = 0;
}
}
data[i].setByte(j, value);
}
final int foundIndex = random.nextInt(0, Math.min(8, size));
data[i].setByte(foundIndex, needleByte);
}
}

private ByteBuf getData() {
return data[i++ & (permutations - 1)];
}

@Benchmark
public int lastIndexOf() {
return getData().indexOf(size, 0, needleByte);
}

@TearDown
public void releaseBuffers() {
for (ByteBuf buffer : data) {
buffer.release();
}
}

}

0 comments on commit a38a85c

Please sign in to comment.