Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Performance improvements for BytesRefHash #8788

Merged
merged 13 commits into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Replace the deprecated IndexReader APIs with new storedFields() & termVectors() ([#7792](https://github.com/opensearch-project/OpenSearch/pull/7792))
- [Remote Store] Add support to restore only unassigned shards of an index ([#8792](https://github.com/opensearch-project/OpenSearch/pull/8792))
- Add safeguard limits for file cache during node level allocation ([#8208](https://github.com/opensearch-project/OpenSearch/pull/8208))
- Performance improvements for BytesRefHash ([#8788](https://github.com/opensearch-project/OpenSearch/pull/8788))
- Add support for aggregation profiler with concurrent aggregation ([#8801](https://github.com/opensearch-project/OpenSearch/pull/8801))
- [Remove] Deprecated Fractional ByteSizeValue support #9005 ([#9005](https://github.com/opensearch-project/OpenSearch/pull/9005))
- Add support for aggregation profiler with concurrent aggregation ([#8801](https://github.com/opensearch-project/OpenSearch/pull/8801))
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.hash;

import org.apache.lucene.util.StringHelper;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.infra.Blackhole;

import java.util.Random;

@Fork(value = 3)
@Warmup(iterations = 1, time = 1)
@Measurement(iterations = 3, time = 3)
@BenchmarkMode(Mode.Throughput)
public class HashFunctionBenchmark {

@Benchmark
public void hash(Blackhole bh, Options opts) {
bh.consume(opts.type.hash(opts.data));
}

@State(Scope.Benchmark)
public static class Options {
@Param({ "MURMUR3", "T1HA1" })
public Type type;

@Param({
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
"10",
"12",
"14",
"16",
"18",
"21",
"24",
"28",
"32",
"36",
"41",
"47",
"54",
"62",
"71",
"81",
"90",
"100",
"112",
"125",
"139",
"156",
"174",
"194",
"220",
"245",
"272",
"302",
"339",
"384",
"431",
"488",
"547",
"608",
"675",
"763",
"863",
"967",
"1084",
"1225",
"1372",
"1537",
"1737",
"1929",
"2142",
"2378",
"2664",
"3011",
"3343",
"3778",
"4232",
"4783",
"5310",
"5895",
"6662",
"7529",
"8508",
"9444",
"10483",
"11741",
"13150",
"14597",
"16495",
"18475",
"20877",
"23383",
"25956",
"29071",
"32560",
"36142",
"40841",
"46151",
"52151",
"57888",
"65414",
"72610",
"82050",
"91076",
"102006",
"114247",
"127957",
"143312",
"159077",
"176576",
"199531",
"223475",
"250292",
"277825",
"313943",
"351617",
"393812" })
public Integer length;
public byte[] data;

@Setup
public void setup() {
data = new byte[length];
new Random(0).nextBytes(data);
}
}

public enum Type {
MURMUR3((data, offset, length) -> StringHelper.murmurhash3_x86_32(data, offset, length, 0)),
T1HA1((data, offset, length) -> T1ha1.hash(data, offset, length, 0));

private final Hasher hasher;

Type(Hasher hasher) {
this.hasher = hasher;
}

public long hash(byte[] data) {
return hasher.hash(data, 0, data.length);
}
}

@FunctionalInterface
interface Hasher {
long hash(byte[] data, int offset, int length);
}
}