Permalink
Browse files

Use Perl's Hash for String#hash

Replace Murmur2 with Perl's Hash.  Perl's Hash is 10**5 times faster
than Murmur2.  Our understanding is Perl's Hash is not broken in regards
to HashDoS attack[1] so far.

For the case Perl's Hash is broken in the future, you can use
SipHash-2-4[2] by passing "jruby.siphash.enabled=true" system property
(false by default.)

[1] http://2012.appsec-forum.ch/conferences/#c17
[2] https://www.131002.net/siphash/
  • Loading branch information...
nahi committed Dec 12, 2012
1 parent ab5b3bd commit b8b26d7d67c8f5ee416ed32a85c89a567d4a480e
View
@@ -287,14 +287,16 @@ private Ruby(RubyInstanceConfig config) {
this.out = config.getOutput();
this.err = config.getError();
this.objectSpaceEnabled = config.isObjectSpaceEnabled();
+ this.siphashEnabled = config.isSiphashEnabled();
this.profile = config.getProfile();
this.currentDirectory = config.getCurrentDirectory();
this.kcode = config.getKCode();
this.beanManager = BeanManagerFactory.create(this, config.isManagementEnabled());
this.jitCompiler = new JITCompiler(this);
this.parserStats = new ParserStats(this);
- this.hashSeed = this.random.nextInt();
+ this.hashSeedK0 = this.random.nextLong();
+ this.hashSeedK1 = this.random.nextLong();
this.beanManager.register(new Config(this));
this.beanManager.register(parserStats);
@@ -3774,6 +3776,11 @@ public boolean isObjectSpaceEnabled() {
public void setObjectSpaceEnabled(boolean objectSpaceEnabled) {
this.objectSpaceEnabled = objectSpaceEnabled;
}
+
+ // You cannot set siphashEnabled property except via RubyInstanceConfig to avoid mixing hash functions.
+ public boolean isSiphashEnabled() {
+ return siphashEnabled;
+ }
public long getStartTime() {
return startTime;
@@ -4037,8 +4044,12 @@ public boolean isBooting() {
return booting;
}
- public int getHashSeed() {
- return hashSeed;
+ public long getHashSeedK0() {
+ return hashSeedK0;
+ }
+
+ public long getHashSeedK1() {
+ return hashSeedK1;
}
public CoverageData getCoverageData() {
@@ -4063,13 +4074,15 @@ public CoverageData getCoverageData() {
/** The seed object for the seed value of the runtime-local PRNG */
private RubyInteger randomSeed;
/** The runtime-local seed for hash randomization */
- private int hashSeed = 0;
+ private long hashSeedK0;
+ private long hashSeedK1;
private final List<EventHook> eventHooks = new Vector<EventHook>();
private boolean hasEventHooks;
private boolean globalAbortOnExceptionEnabled = false;
private boolean doNotReverseLookupEnabled = false;
private volatile boolean objectSpaceEnabled;
+ private boolean siphashEnabled;
private final Set<Script> jittedMethods = Collections.synchronizedSet(new WeakHashSet<Script>());
@@ -160,6 +160,8 @@ public boolean shouldPrecompileAll() {
private Profile profile = Profile.DEFAULT;
private boolean objectSpaceEnabled
= SafePropertyAccessor.getBoolean("jruby.objectspace.enabled", false);
+ private boolean siphashEnabled
+ = SafePropertyAccessor.getBoolean("jruby.siphash.enabled", false);
private CompileMode compileMode = CompileMode.JIT;
private boolean runRubyInProcess = true;
@@ -921,6 +923,14 @@ public void setObjectSpaceEnabled(boolean newObjectSpaceEnabled) {
public boolean isObjectSpaceEnabled() {
return objectSpaceEnabled;
}
+
+ public void setSiphashEnabled(boolean newSiphashEnabled) {
+ siphashEnabled = newSiphashEnabled;
+ }
+
+ public boolean isSiphashEnabled() {
+ return siphashEnabled;
+ }
public void setEnvironment(Map newEnvironment) {
if (newEnvironment == null) newEnvironment = new HashMap();
@@ -95,7 +95,9 @@
import org.jruby.util.MurmurHash;

This comment has been minimized.

Show comment
Hide comment
@voxik

voxik Dec 12, 2012

Isn't this line superfluous?

@voxik

voxik Dec 12, 2012

Isn't this line superfluous?

This comment has been minimized.

Show comment
Hide comment
@nahi

nahi Dec 12, 2012

Owner

Indeed. Removed at f330cd8
Thank you!

@nahi

nahi Dec 12, 2012

Owner

Indeed. Removed at f330cd8
Thank you!

This comment has been minimized.

Show comment
Hide comment
@voxik

voxik Dec 12, 2012

And src/org/jruby/util/MurmurHash.java file should be removed as well. Sorry for late notice.

@voxik

voxik Dec 12, 2012

And src/org/jruby/util/MurmurHash.java file should be removed as well. Sorry for late notice.

This comment has been minimized.

Show comment
Hide comment
@nahi

nahi Dec 13, 2012

Owner

Yeah, you're right, it should be safe to remove. I considered to remove the MurmurHash impl but I left it because it wouldn't cause any problem if it keeps existing. Thank you!

@nahi

nahi Dec 13, 2012

Owner

Yeah, you're right, it should be safe to remove. I considered to remove the MurmurHash impl but I left it because it wouldn't cause any problem if it keeps existing. Thank you!

import org.jruby.util.Numeric;
import org.jruby.util.Pack;
+import org.jruby.util.PerlHash;
import org.jruby.util.RegexpOptions;
+import org.jruby.util.SipHashInline;
import org.jruby.util.Sprintf;
import org.jruby.util.StringSupport;
import org.jruby.util.TypeConverter;
@@ -1181,11 +1183,15 @@ public int hashCode() {
}
private int strHashCode(Ruby runtime) {
- int hash = MurmurHash.hash32(value.getUnsafeBytes(), value.getBegin(), value.getRealSize(), runtime.getHashSeed());
+ long hash = runtime.isSiphashEnabled() ? SipHashInline.hash24(runtime.getHashSeedK0(),
+ runtime.getHashSeedK1(), value.getUnsafeBytes(), value.getBegin(),
+ value.getRealSize()) : PerlHash.hash(runtime.getHashSeedK0(),
+ value.getUnsafeBytes(), value.getBegin(), value.getRealSize());
if (runtime.is1_9()) {
- hash ^= (value.getEncoding().isAsciiCompatible() && scanForCodeRange() == CR_7BIT ? 0 : value.getEncoding().getIndex());
+ hash ^= (value.getEncoding().isAsciiCompatible() && scanForCodeRange() == CR_7BIT ? 0
+ : value.getEncoding().getIndex());
}
- return hash;
+ return (int) hash;
}
@Override
@@ -0,0 +1,20 @@
+package org.jruby.util;
+
+/**
+ * Perl's Hash implementation.
+ *
+ * @author nahi@ruby-lang.org
+ */
+public class PerlHash {
+ public static long hash(long key, byte[] src, int offset, int length) {
+ for (int idx = 0; idx < length; ++idx) {
+ key += (src[offset + idx] & 0xFF);
+ key += (key << 10);
+ key ^= (key >>> 6);
+ }
+ key += (key << 3);
+ key ^= (key >>> 11);
+ key += (key << 15);
+ return key;
+ }
+}
@@ -0,0 +1,174 @@
+package org.jruby.util;
+
+/**
+ * SipHash implementation with hand inlining the SIPROUND.
+ *
+ * To know details about SipHash, see;
+ * "a fast short-input PRF" https://www.131002.net/siphash/
+ *
+ * @author nahi@ruby-lang.org
+ */
+public class SipHashInline {
+
+ public static long hash24(long k0, long k1, byte[] data) {
+ return hash24(k0, k1, data, 0, data.length);
+ }
+
+ public static long hash24(long k0, long k1, byte[] src, int offset, int length) {
+ long v0 = 0x736f6d6570736575L ^ k0;
+ long v1 = 0x646f72616e646f6dL ^ k1;
+ long v2 = 0x6c7967656e657261L ^ k0;
+ long v3 = 0x7465646279746573L ^ k1;
+ long m;
+ int last = offset + length / 8 * 8;
+ int i = offset;
+
+ // processing 8 bytes blocks in data
+ while (i < last) {
+ // pack a block to long, as LE 8 bytes
+ m = (long) src[i++] |
+ (long) src[i++] << 8 |
+ (long) src[i++] << 16 |
+ (long) src[i++] << 24 |
+ (long) src[i++] << 32 |
+ (long) src[i++] << 40 |
+ (long) src[i++] << 48 |
+ (long) src[i++] << 56 ;
+ // MSGROUND {
+ v3 ^= m;
+
+ /* SIPROUND wih hand reordering
+ *
+ * SIPROUND in siphash24.c:
+ * A: v0 += v1;
+ * B: v1=ROTL(v1,13);
+ * C: v1 ^= v0;
+ * D: v0=ROTL(v0,32);
+ * E: v2 += v3;
+ * F: v3=ROTL(v3,16);
+ * G: v3 ^= v2;
+ * H: v0 += v3;
+ * I: v3=ROTL(v3,21);
+ * J: v3 ^= v0;
+ * K: v2 += v1;
+ * L: v1=ROTL(v1,17);
+ * M: v1 ^= v2;
+ * N: v2=ROTL(v2,32);
+ *
+ * Each dependency:
+ * B -> A
+ * C -> A, B
+ * D -> C
+ * F -> E
+ * G -> E, F
+ * H -> D, G
+ * I -> H
+ * J -> H, I
+ * K -> C, G
+ * L -> K
+ * M -> K, L
+ * N -> M
+ *
+ * Dependency graph:
+ * D -> C -> B -> A
+ * G -> F -> E
+ * J -> I -> H -> D, G
+ * N -> M -> L -> K -> C, G
+ *
+ * Resulting parallel friendly execution order:
+ * -> ABCDHIJ
+ * -> EFGKLMN
+ */
+
+ // SIPROUND {
+ v0 += v1; v2 += v3;
+ v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
+ v1 ^= v0; v3 ^= v2;
+ v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
+ v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
+ v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
+ v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
+ // }
+ // SIPROUND {
+ v0 += v1; v2 += v3;
+ v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
+ v1 ^= v0; v3 ^= v2;
+ v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
+ v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
+ v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
+ v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
+ // }
+ v0 ^= m;
+ // }
+ }
+
+ // packing the last block to long, as LE 0-7 bytes + the length in the top byte
+ m = 0;
+ for (i = offset + length - 1; i >= last; --i) {
+ m <<= 8; m |= (long) src[i];
+ }
+ m |= (long) length << 56;
+ // MSGROUND {
+ v3 ^= m;
+ // SIPROUND {
+ v0 += v1; v2 += v3;
+ v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
+ v1 ^= v0; v3 ^= v2;
+ v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
+ v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
+ v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
+ v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
+ // }
+ // SIPROUND {
+ v0 += v1; v2 += v3;
+ v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
+ v1 ^= v0; v3 ^= v2;
+ v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
+ v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
+ v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
+ v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
+ // }
+ v0 ^= m;
+ // }
+
+ // finishing...
+ v2 ^= 0xff;
+ // SIPROUND {
+ v0 += v1; v2 += v3;
+ v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
+ v1 ^= v0; v3 ^= v2;
+ v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
+ v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
+ v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
+ v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
+ // }
+ // SIPROUND {
+ v0 += v1; v2 += v3;
+ v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
+ v1 ^= v0; v3 ^= v2;
+ v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
+ v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
+ v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
+ v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
+ // }
+ // SIPROUND {
+ v0 += v1; v2 += v3;
+ v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
+ v1 ^= v0; v3 ^= v2;
+ v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
+ v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
+ v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
+ v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
+ // }
+ // SIPROUND {
+ v0 += v1; v2 += v3;
+ v1 = (v1 << 13) | v1 >>> 51; v3 = (v3 << 16) | v3 >>> 48;
+ v1 ^= v0; v3 ^= v2;
+ v0 = (v0 << 32) | v0 >>> 32; v2 += v1;
+ v0 += v3; v1 = (v1 << 17) | v1 >>> 47;
+ v3 = (v3 << 21) | v3 >>> 43; v1 ^= v2;
+ v3 ^= v0; v2 = (v2 << 32) | v2 >>> 32;
+ // }
+ return v0 ^ v1 ^ v2 ^ v3;
+ }
+}
@@ -0,0 +1,51 @@
+package org.jruby.util;
+
+import junit.framework.TestCase;
+
+public class SipHashInlineTest extends TestCase {
+ private long[] EXPECTED = new long[] { 0x726fdb47dd0e0e31L, 0x74f839c593dc67fdL,
+ 0x0d6c8009d9a94f5aL, 0x85676696d7fb7e2dL, 0xcf2794e0277187b7L, 0x18765564cd99a68dL,
+ 0xcbc9466e58fee3ceL, 0xab0200f58b01d137L, 0x93f5f5799a932462L, 0x9e0082df0ba9e4b0L,
+ 0x7a5dbbc594ddb9f3L, 0xf4b32f46226bada7L, 0x751e8fbc860ee5fbL, 0x14ea5627c0843d90L,
+ 0xf723ca908e7af2eeL, 0xa129ca6149be45e5L, 0x3f2acc7f57c29bdbL, 0x699ae9f52cbe4794L,
+ 0x4bc1b3f0968dd39cL, 0xbb6dc91da77961bdL, 0xbed65cf21aa2ee98L, 0xd0f2cbb02e3b67c7L,
+ 0x93536795e3a33e88L, 0xa80c038ccd5ccec8L, 0xb8ad50c6f649af94L, 0xbce192de8a85b8eaL,
+ 0x17d835b85bbb15f3L, 0x2f2e6163076bcfadL, 0xde4daaaca71dc9a5L, 0xa6a2506687956571L,
+ 0xad87a3535c49ef28L, 0x32d892fad841c342L, 0x7127512f72f27cceL, 0xa7f32346f95978e3L,
+ 0x12e0b01abb051238L, 0x15e034d40fa197aeL, 0x314dffbe0815a3b4L, 0x027990f029623981L,
+ 0xcadcd4e59ef40c4dL, 0x9abfd8766a33735cL, 0x0e3ea96b5304a7d0L, 0xad0c42d6fc585992L,
+ 0x187306c89bc215a9L, 0xd4a60abcf3792b95L, 0xf935451de4f21df2L, 0xa9538f0419755787L,
+ 0xdb9acddff56ca510L, 0xd06c98cd5c0975ebL, 0xe612a3cb9ecba951L, 0xc766e62cfcadaf96L,
+ 0xee64435a9752fe72L, 0xa192d576b245165aL, 0x0a8787bf8ecb74b2L, 0x81b3e73d20b49b6fL,
+ 0x7fa8220ba3b2eceaL, 0x245731c13ca42499L, 0xb78dbfaf3a8d83bdL, 0xea1ad565322a1a0bL,
+ 0x60e61c23a3795013L, 0x6606d7e446282b93L, 0x6ca4ecb15c5f91e1L, 0x9f626da15c9625f3L,
+ 0xe51b38608ef25f57L, 0x958a324ceb064572L };
+
+ // Ported from test vectors in siphash24.c at
+// https://www.131002.net/siphash/siphash24.c
+ public void testVectors() {
+ long k0 = 0x0706050403020100L;
+ long k1 = 0x0f0e0d0c0b0a0908L;
+ for (int i = 0; i < EXPECTED.length; ++i) {
+ byte[] msg = new byte[i];
+ for (int j = 0; j < i; ++j) {
+ msg[j] = (byte) j;
+ }
+ assertEquals(EXPECTED[i], SipHashInline.hash24(k0, k1, msg));
+ }
+ }
+
+ public void testOffset() {
+ long k0 = 0x0706050403020100L;
+ long k1 = 0x0f0e0d0c0b0a0908L;
+ for (int i = 0; i < EXPECTED.length; ++i) {
+ for (int of = 0; of < 16; ++of) {
+ byte[] msg = new byte[i + of];
+ for (int j = 0; j < i; ++j) {
+ msg[j + of] = (byte) j;
+ }
+ assertEquals(EXPECTED[i], SipHashInline.hash24(k0, k1, msg, of, i));
+ }
+ }
+ }
+}

0 comments on commit b8b26d7

Please sign in to comment.