Skip to content

Commit

Permalink
Refactor HashFunctionTestCase
Browse files Browse the repository at this point in the history
Signed-off-by: Ketan Verma <ketan9495@gmail.com>
  • Loading branch information
ketanv3 committed Aug 25, 2023
1 parent 379cbcb commit 8268950
Show file tree
Hide file tree
Showing 8 changed files with 233 additions and 142 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

package org.opensearch.common.hash;

import org.opensearch.common.annotation.InternalApi;

import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
Expand All @@ -33,6 +35,7 @@
*
* @opensearch.internal
*/
@InternalApi
public final class T1ha1 {
private static final long SEED = System.nanoTime();
private static final Mux64 MUX_64_IMPL = fastestMux64Impl();
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,12 @@

import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodType;
import java.lang.invoke.VarHandle;
import java.nio.ByteOrder;

public class T1Ha1Tests extends HashFunctionTestCase {
private static final VarHandle LONG_HANDLE = MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN);
private final byte[] scratch = new byte[8];

/**
* Inspired from the tests defined in the reference implementation:
Expand Down Expand Up @@ -282,6 +286,18 @@ public void testSelfCheck() {
}
}

@Override
public byte[] hash(byte[] input) {
long hash = T1ha1.hash(input, 0, input.length);
LONG_HANDLE.set(scratch, 0, hash);
return scratch;
}

@Override
public int outputBits() {
return 64;
}

private static boolean hasUnsignedMultiplyHigh() {
try {
MethodHandles.publicLookup()
Expand All @@ -293,9 +309,4 @@ private static boolean hasUnsignedMultiplyHigh() {
throw new RuntimeException(e);
}
}

@Override
public long hash(byte[] input) {
return T1ha1.hash(input, 0, input.length);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@

import org.apache.lucene.util.BytesRef;
import org.opensearch.common.Numbers;
import org.opensearch.common.annotation.InternalApi;
import org.opensearch.common.hash.T1ha1;
import org.opensearch.common.lease.Releasable;
import org.opensearch.common.lease.Releasables;
Expand All @@ -52,6 +53,7 @@
*
* @opensearch.internal
*/
@InternalApi
public final class BytesRefHash implements Releasable {
private static final long MAX_CAPACITY = 1L << 32;
private static final long DEFAULT_INITIAL_CAPACITY = 32;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package org.opensearch.common.util;

import org.opensearch.common.Numbers;
import org.opensearch.common.annotation.InternalApi;
import org.opensearch.common.lease.Releasable;
import org.opensearch.common.lease.Releasables;

Expand All @@ -26,6 +27,7 @@
*
* @opensearch.internal
*/
@InternalApi
public class ReorganizingLongHash implements Releasable {
private static final long MAX_CAPACITY = 1L << 32;
private static final long DEFAULT_INITIAL_CAPACITY = 32;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.hash;

import java.util.Locale;

/**
* Represents the avalanche statistics of a hash function.
*/
public class AvalancheStats {
private final int inputBits;
private final int outputBits;
private final double bias;
private final double sumOfSquaredErrors;

public AvalancheStats(int[][] flips, int iterations) {
this.inputBits = flips.length;
this.outputBits = flips[0].length;
double sumOfBiases = 0;
double sumOfSquaredErrors = 0;

for (int i = 0; i < inputBits; i++) {
for (int o = 0; o < outputBits; o++) {
sumOfSquaredErrors += Math.pow(0.5 - ((double) flips[i][o] / iterations), 2);
sumOfBiases += 2 * ((double) flips[i][o] / iterations) - 1;
}
}

this.bias = Math.abs(sumOfBiases / (inputBits * outputBits));
this.sumOfSquaredErrors = sumOfSquaredErrors;
}

public double bias() {
return bias;
}

public double diffusion() {
return 1 - bias;
}

public double sumOfSquaredErrors() {
return sumOfSquaredErrors;
}

@Override
public String toString() {
return String.format(
Locale.ROOT,
"AvalancheStats{inputBits=%d, outputBits=%d, bias=%.4f%%, diffusion=%.4f%%, sumOfSquaredErrors=%.2f}",
inputBits,
outputBits,
bias() * 100,
diffusion() * 100,
sumOfSquaredErrors()
);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.common.hash;

import org.opensearch.common.Randomness;
import org.opensearch.test.OpenSearchTestCase;

import java.util.Arrays;
import java.util.Random;

/**
* Base class for testing the quality of hash functions.
*/
public abstract class HashFunctionTestCase extends OpenSearchTestCase {
private static final int[] INPUT_BITS = new int[] { 24, 32, 40, 48, 56, 64, 72, 80, 96, 112, 128, 160, 512, 1024 };
private static final int ITERATIONS = 1000;
private static final double BIAS_THRESHOLD = 0.01; // 1%

public abstract byte[] hash(byte[] input);

public abstract int outputBits();

/**
* Tests if the hash function shows an avalanche effect, i.e, flipping a single input bit
* should flip half the output bits.
*/
public void testAvalanche() {
for (int inputBits : INPUT_BITS) {
AvalancheStats stats = simulate(inputBits);
if (stats.bias() >= BIAS_THRESHOLD) {
fail("bias exceeds threshold: " + stats);
}
}
}

private AvalancheStats simulate(int inputBits) {
int outputBits = outputBits();
assert inputBits % 8 == 0; // using full bytes for simplicity
assert outputBits % 8 == 0; // using full bytes for simplicity
byte[] input = new byte[inputBits >>> 3];
Random random = Randomness.get();
int[][] flips = new int[inputBits][outputBits];

for (int iter = 0; iter < ITERATIONS; iter++) {
random.nextBytes(input);
byte[] hash = Arrays.copyOf(hash(input), outputBits >>> 3); // copying since the underlying byte-array is reused

for (int i = 0; i < inputBits; i++) {
flipBit(input, i); // flip one bit
byte[] newHash = hash(input); // recompute the hash; half the bits should have flipped
flipBit(input, i); // return to original

for (int o = 0; o < outputBits; o++) {
flips[i][o] += getBit(hash, o) ^ getBit(newHash, o);
}
}
}

return new AvalancheStats(flips, ITERATIONS);
}

private static void flipBit(byte[] input, int position) {
int offset = position / 8;
int bit = position & 7;
input[offset] ^= (1 << bit);
}

private static int getBit(byte[] input, int position) {
int offset = position / 8;
int bit = position & 7;
return (input[offset] >>> bit) & 1;
}
}

0 comments on commit 8268950

Please sign in to comment.