Skip to content
Permalink
Browse files

Use Perl's Hash for String#hash

Replace Murmur2 with Perl's Hash.  Perl's Hash is 10**5 times faster
than Murmur2.  Our understanding is Perl's Hash is not broken in regards
to HashDoS attack[1] so far.

For the case Perl's Hash is broken in the future, you can use
SipHash-2-4[2] by passing "jruby.siphash.enabled=true" system property
(false by default.)

[1] http://2012.appsec-forum.ch/conferences/#c17
[2] https://www.131002.net/siphash/
  • Loading branch information
nahi committed Dec 12, 2012
1 parent ab5b3bd commit b8b26d7d67c8f5ee416ed32a85c89a567d4a480e
@@ -287,14 +287,16 @@ private Ruby(RubyInstanceConfig config) {
this.out = config.getOutput();
this.err = config.getError();
this.objectSpaceEnabled = config.isObjectSpaceEnabled();
this.siphashEnabled = config.isSiphashEnabled();
this.profile = config.getProfile();
this.currentDirectory = config.getCurrentDirectory();
this.kcode = config.getKCode();
this.beanManager = BeanManagerFactory.create(this, config.isManagementEnabled());
this.jitCompiler = new JITCompiler(this);
this.parserStats = new ParserStats(this);

this.hashSeed = this.random.nextInt();
this.hashSeedK0 = this.random.nextLong();
this.hashSeedK1 = this.random.nextLong();

this.beanManager.register(new Config(this));
this.beanManager.register(parserStats);
@@ -3774,6 +3776,11 @@ public boolean isObjectSpaceEnabled() {
public void setObjectSpaceEnabled(boolean objectSpaceEnabled) {
this.objectSpaceEnabled = objectSpaceEnabled;
}

// You cannot set siphashEnabled property except via RubyInstanceConfig to avoid mixing hash functions.
public boolean isSiphashEnabled() {
return siphashEnabled;
}

public long getStartTime() {
return startTime;
@@ -4037,8 +4044,12 @@ public boolean isBooting() {
return booting;
}

public int getHashSeed() {
return hashSeed;
public long getHashSeedK0() {
return hashSeedK0;
}

public long getHashSeedK1() {
return hashSeedK1;
}

public CoverageData getCoverageData() {
@@ -4063,13 +4074,15 @@ public CoverageData getCoverageData() {
/** The seed object for the seed value of the runtime-local PRNG */
private RubyInteger randomSeed;
/** The runtime-local seed for hash randomization */
private int hashSeed = 0;
private long hashSeedK0;
private long hashSeedK1;

private final List<EventHook> eventHooks = new Vector<EventHook>();
private boolean hasEventHooks;
private boolean globalAbortOnExceptionEnabled = false;
private boolean doNotReverseLookupEnabled = false;
private volatile boolean objectSpaceEnabled;
private boolean siphashEnabled;

private final Set<Script> jittedMethods = Collections.synchronizedSet(new WeakHashSet<Script>());

@@ -160,6 +160,8 @@ public boolean shouldPrecompileAll() {
private Profile profile = Profile.DEFAULT;
private boolean objectSpaceEnabled
= SafePropertyAccessor.getBoolean("jruby.objectspace.enabled", false);
private boolean siphashEnabled
= SafePropertyAccessor.getBoolean("jruby.siphash.enabled", false);

private CompileMode compileMode = CompileMode.JIT;
private boolean runRubyInProcess = true;
@@ -921,6 +923,14 @@ public void setObjectSpaceEnabled(boolean newObjectSpaceEnabled) {
public boolean isObjectSpaceEnabled() {
return objectSpaceEnabled;
}

public void setSiphashEnabled(boolean newSiphashEnabled) {
siphashEnabled = newSiphashEnabled;
}

public boolean isSiphashEnabled() {
return siphashEnabled;
}

public void setEnvironment(Map newEnvironment) {
if (newEnvironment == null) newEnvironment = new HashMap();
@@ -95,7 +95,9 @@
import org.jruby.util.MurmurHash;

This comment has been minimized.

Copy link
@voxik

voxik Dec 12, 2012

Isn't this line superfluous?

This comment has been minimized.

Copy link
@nahi

nahi Dec 12, 2012

Author Owner

Indeed. Removed at f330cd8
Thank you!

This comment has been minimized.

Copy link
@voxik

voxik Dec 12, 2012

And src/org/jruby/util/MurmurHash.java file should be removed as well. Sorry for late notice.

This comment has been minimized.

Copy link
@nahi

nahi Dec 13, 2012

Author Owner

Yeah, you're right, it should be safe to remove. I considered to remove the MurmurHash impl but I left it because it wouldn't cause any problem if it keeps existing. Thank you!

import org.jruby.util.Numeric;
import org.jruby.util.Pack;
import org.jruby.util.PerlHash;
import org.jruby.util.RegexpOptions;
import org.jruby.util.SipHashInline;
import org.jruby.util.Sprintf;
import org.jruby.util.StringSupport;
import org.jruby.util.TypeConverter;
@@ -1181,11 +1183,15 @@ public int hashCode() {
}

private int strHashCode(Ruby runtime) {
int hash = MurmurHash.hash32(value.getUnsafeBytes(), value.getBegin(), value.getRealSize(), runtime.getHashSeed());
long hash = runtime.isSiphashEnabled() ? SipHashInline.hash24(runtime.getHashSeedK0(),
runtime.getHashSeedK1(), value.getUnsafeBytes(), value.getBegin(),
value.getRealSize()) : PerlHash.hash(runtime.getHashSeedK0(),
value.getUnsafeBytes(), value.getBegin(), value.getRealSize());
if (runtime.is1_9()) {
hash ^= (value.getEncoding().isAsciiCompatible() && scanForCodeRange() == CR_7BIT ? 0 : value.getEncoding().getIndex());
hash ^= (value.getEncoding().isAsciiCompatible() && scanForCodeRange() == CR_7BIT ? 0
: value.getEncoding().getIndex());
}
return hash;
return (int) hash;
}

@Override
@@ -0,0 +1,20 @@
package org.jruby.util;

/**
* Perl's Hash implementation.
*
* @author nahi@ruby-lang.org
*/
public class PerlHash {
public static long hash(long key, byte[] src, int offset, int length) {
for (int idx = 0; idx < length; ++idx) {
key += (src[offset + idx] & 0xFF);
key += (key << 10);
key ^= (key >>> 6);
}
key += (key << 3);
key ^= (key >>> 11);
key += (key << 15);
return key;
}
}
@@ -0,0 +1,174 @@
package org.jruby.util;

/**
* SipHash implementation with hand inlining the SIPROUND.
*
* To know details about SipHash, see;
* "a fast short-input PRF" https://www.131002.net/siphash/
*
* @author nahi@ruby-lang.org
*/
public class SipHashInline {

public static long hash24(long k0, long k1, byte[] data) {
return hash24(k0, k1, data, 0, data.length);
}

public static long hash24(long k0, long k1, byte[] src, int offset, int length) {
long v0 = 0x736f6d6570736575L ^ k0;
long v1 = 0x646f72616e646f6dL ^ k1;
long v2 = 0x6c7967656e657261L ^ k0;
long v3 = 0x7465646279746573L ^ k1;
long m;
int last = offset + length / 8 * 8;
int i = offset;

// processing 8 bytes blocks in data
while (i < last) {
// pack a block to long, as LE 8 bytes
m = (long) src[i++] |
(long) src[i++] << 8 |
(long) src[i++] << 16 |
(long) src[i++] << 24 |
(long) src[i++] << 32 |
(long) src[i++] << 40 |
(long) src[i++] << 48 |
(long) src[i++] << 56 ;
// MSGROUND {
v3 ^= m;

/* SIPROUND wih hand reordering
*
* SIPROUND in siphash24.c:
* A: v0 += v1;
* B: v1=ROTL(v1,13);
* C: v1 ^= v0;
* D: v0=ROTL(v0,32);
* E: v2 += v3;
* F: v3=ROTL(v3,16);
* G: v3 ^= v2;
* H: v0 += v3;
* I: v3=ROTL(v3,21);
* J: v3 ^= v0;
* K: v2 += v1;
* L: v1=ROTL(v1,17);
* M: v1 ^= v2;
* N: v2=ROTL(v2,32);
*
* Each dependency:
* B -> A
* C -> A, B
* D -> C
* F -> E
* G -> E, F
* H -> D, G
* I -> H
* J -> H, I
* K -> C, G
* L -> K
* M -> K, L
* N -> M
*
* Dependency graph:
* D -> C -> B -> A
* G -> F -> E
* J -> I -> H -> D, G
* N -> M -> L -> K -> C, G
*
* Resulting parallel friendly execution order:
* -> ABCDHIJ
* -> EFGKLMN
*/

// SIPROUND {
v0 += v1; v2 += v3;
v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
v1 ^= v0; v3 ^= v2;
v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
// }
// SIPROUND {
v0 += v1; v2 += v3;
v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
v1 ^= v0; v3 ^= v2;
v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
// }
v0 ^= m;
// }
}

// packing the last block to long, as LE 0-7 bytes + the length in the top byte
m = 0;
for (i = offset + length - 1; i >= last; --i) {
m <<= 8; m |= (long) src[i];
}
m |= (long) length << 56;
// MSGROUND {
v3 ^= m;
// SIPROUND {
v0 += v1; v2 += v3;
v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
v1 ^= v0; v3 ^= v2;
v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
// }
// SIPROUND {
v0 += v1; v2 += v3;
v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
v1 ^= v0; v3 ^= v2;
v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
// }
v0 ^= m;
// }

// finishing...
v2 ^= 0xff;
// SIPROUND {
v0 += v1; v2 += v3;
v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
v1 ^= v0; v3 ^= v2;
v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
// }
// SIPROUND {
v0 += v1; v2 += v3;
v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
v1 ^= v0; v3 ^= v2;
v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
// }
// SIPROUND {
v0 += v1; v2 += v3;
v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
v1 ^= v0; v3 ^= v2;
v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
// }
// SIPROUND {
v0 += v1; v2 += v3;
v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
v1 ^= v0; v3 ^= v2;
v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
// }
return v0 ^ v1 ^ v2 ^ v3;
}
}
@@ -0,0 +1,51 @@
package org.jruby.util;

import junit.framework.TestCase;

public class SipHashInlineTest extends TestCase {
private long[] EXPECTED = new long[] { 0x726fdb47dd0e0e31L, 0x74f839c593dc67fdL,
0x0d6c8009d9a94f5aL, 0x85676696d7fb7e2dL, 0xcf2794e0277187b7L, 0x18765564cd99a68dL,
0xcbc9466e58fee3ceL, 0xab0200f58b01d137L, 0x93f5f5799a932462L, 0x9e0082df0ba9e4b0L,
0x7a5dbbc594ddb9f3L, 0xf4b32f46226bada7L, 0x751e8fbc860ee5fbL, 0x14ea5627c0843d90L,
0xf723ca908e7af2eeL, 0xa129ca6149be45e5L, 0x3f2acc7f57c29bdbL, 0x699ae9f52cbe4794L,
0x4bc1b3f0968dd39cL, 0xbb6dc91da77961bdL, 0xbed65cf21aa2ee98L, 0xd0f2cbb02e3b67c7L,
0x93536795e3a33e88L, 0xa80c038ccd5ccec8L, 0xb8ad50c6f649af94L, 0xbce192de8a85b8eaL,
0x17d835b85bbb15f3L, 0x2f2e6163076bcfadL, 0xde4daaaca71dc9a5L, 0xa6a2506687956571L,
0xad87a3535c49ef28L, 0x32d892fad841c342L, 0x7127512f72f27cceL, 0xa7f32346f95978e3L,
0x12e0b01abb051238L, 0x15e034d40fa197aeL, 0x314dffbe0815a3b4L, 0x027990f029623981L,
0xcadcd4e59ef40c4dL, 0x9abfd8766a33735cL, 0x0e3ea96b5304a7d0L, 0xad0c42d6fc585992L,
0x187306c89bc215a9L, 0xd4a60abcf3792b95L, 0xf935451de4f21df2L, 0xa9538f0419755787L,
0xdb9acddff56ca510L, 0xd06c98cd5c0975ebL, 0xe612a3cb9ecba951L, 0xc766e62cfcadaf96L,
0xee64435a9752fe72L, 0xa192d576b245165aL, 0x0a8787bf8ecb74b2L, 0x81b3e73d20b49b6fL,
0x7fa8220ba3b2eceaL, 0x245731c13ca42499L, 0xb78dbfaf3a8d83bdL, 0xea1ad565322a1a0bL,
0x60e61c23a3795013L, 0x6606d7e446282b93L, 0x6ca4ecb15c5f91e1L, 0x9f626da15c9625f3L,
0xe51b38608ef25f57L, 0x958a324ceb064572L };

// Ported from test vectors in siphash24.c at
// https://www.131002.net/siphash/siphash24.c
public void testVectors() {
long k0 = 0x0706050403020100L;
long k1 = 0x0f0e0d0c0b0a0908L;
for (int i = 0; i < EXPECTED.length; ++i) {
byte[] msg = new byte[i];
for (int j = 0; j < i; ++j) {
msg[j] = (byte) j;
}
assertEquals(EXPECTED[i], SipHashInline.hash24(k0, k1, msg));
}
}

public void testOffset() {
long k0 = 0x0706050403020100L;
long k1 = 0x0f0e0d0c0b0a0908L;
for (int i = 0; i < EXPECTED.length; ++i) {
for (int of = 0; of < 16; ++of) {
byte[] msg = new byte[i + of];
for (int j = 0; j < i; ++j) {
msg[j + of] = (byte) j;
}
assertEquals(EXPECTED[i], SipHashInline.hash24(k0, k1, msg, of, i));
}
}
}
}

0 comments on commit b8b26d7

Please sign in to comment.
You can’t perform that action at this time.