Skip to content

Commit

Permalink
Performance improvements for BytesRefHash
Browse files Browse the repository at this point in the history
Signed-off-by: Ketan Verma <ketan9495@gmail.com>
  • Loading branch information
ketanv3 committed Jul 19, 2023
1 parent 1d3b006 commit 0f9171f
Show file tree
Hide file tree
Showing 8 changed files with 1,004 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.util;

import net.openhft.hashing.LongHashFunction;
import org.apache.lucene.util.BytesRef;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;
import org.opensearch.common.lease.Releasable;
import org.opensearch.common.lease.Releasables;

import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Stream;

@Fork(value = 5)
@Warmup(iterations = 1, time = 2)
@Measurement(iterations = 3, time = 5)
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public class BytesRefHashBenchmark {
private static final int NUM_TABLES = 20; // run across many tables so that caches aren't effective
private static final int NUM_HITS = 1_000_000; // num hits per table

@Benchmark
public void add(Blackhole bh, Options opts) {
for (int hit = 0; hit < NUM_HITS; hit++) {
BytesRef key = opts.keys[hit % opts.keys.length];
for (HashTable table : opts.tables) {
bh.consume(table.add(key));
}
}
}

@State(Scope.Benchmark)
public static class Options {
@Param({ "baseline", "compact", "reorganizing" })
public String type;

@Param({
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"10",
"12",
"14",
"16",
"19",
"22",
"25",
"29",
"33",
"38",
"43",
"50",
"57",
"65",
"75",
"86",
"97",
"109",
"124",
"141",
"161",
"182",
"204",
"229",
"262",
"297",
"336",
"380",
"430",
"482",
"550",
"610",
"704",
"801",
"914",
"1042",
"1178",
"1343",
"1532",
"1716",
"1940",
"2173",
"2456",
"2751",
"3082",
"3514",
"4006",
"4487",
"5026",
"5730",
"6418",
"7317",
"8196",
"9180",
"10374",
"11723",
"13247",
"14837",
"16915",
"19114",
"21599",
"24623",
"28071",
"32001",
"36482",
"41590",
"46581" })
public Integer size;

@Param({ "8", "32", "128" })
public Integer length;

private HashTable[] tables;

private BytesRef[] keys;

@Setup
public void setup() {
assert size <= Math.pow(26, length) : "key length too small to generate the required number of keys";
tables = Stream.generate(this::newHashTable).limit(NUM_TABLES).toArray(HashTable[]::new);
Random random = new Random(0);
Set<BytesRef> seen = new HashSet<>();
keys = new BytesRef[size];
for (int i = 0; i < size; i++) {
BytesRef key;
do {
key = new BytesRef(
random.ints(97, 123)
.limit(length)
.collect(StringBuilder::new, StringBuilder::appendCodePoint, StringBuilder::append)
.toString()
);
} while (seen.contains(key));
keys[i] = key;
seen.add(key);
}
}

@TearDown
public void tearDown() {
Releasables.close(tables);
}

private HashTable newHashTable() {
switch (type) {
case "baseline":
return new HashTable() {
private final BytesRefHash table = new BytesRefHash(1, 0.6f, BigArrays.NON_RECYCLING_INSTANCE);

@Override
public long add(BytesRef key) {
return table.add(key);
}

@Override
public void close() {
table.close();
}
};
case "compact":
return new HashTable() {
private final CompactBytesRefHash table = new CompactBytesRefHash(
1,
0.6f,
key -> LongHashFunction.xx3().hashBytes(key.bytes, key.offset, key.length),
BigArrays.NON_RECYCLING_INSTANCE
);

@Override
public long add(BytesRef key) {
return table.add(key);
}

@Override
public void close() {
table.close();
}
};
case "reorganizing":
return new HashTable() {
private final ReorganizingBytesRefHash table = new ReorganizingBytesRefHash(
1,
0.6f,
key -> LongHashFunction.xx3().hashBytes(key.bytes, key.offset, key.length),
BigArrays.NON_RECYCLING_INSTANCE
);

@Override
public long add(BytesRef key) {
return table.add(key);
}

@Override
public void close() {
table.close();
}
};
default:
throw new IllegalArgumentException("invalid hash table type: " + type);
}
}
}

private interface HashTable extends Releasable {
long add(BytesRef key);
}
}
2 changes: 2 additions & 0 deletions buildSrc/version.properties
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,5 @@ resteasy = 6.2.4.Final
# opentelemetry dependencies
opentelemetry = 1.26.0

# hashing dependencies
zero_allocation_hashing = 0.16
3 changes: 3 additions & 0 deletions server/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@ dependencies {
//zstd
api "com.github.luben:zstd-jni:${versions.zstd}"

// hashing
api "net.openhft:zero-allocation-hashing:${versions.zero_allocation_hashing}"

testImplementation(project(":test:framework")) {
// tests use the locally compiled version of server
exclude group: 'org.opensearch', module: 'server'
Expand Down
1 change: 1 addition & 0 deletions server/licenses/zero-allocation-hashing-0.16.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0ca252f328160ed5d027f100a4fe525d6d21daaf

0 comments on commit 0f9171f

Please sign in to comment.