Permalink
Browse files

Fixing hashing consistency across JVM instances.

Several of the core JRuby classes calculate hash codes based on java or ruby
object ids. This doesn't produce consistent hashing across JVM instances, which
is needed for distributed frameworks. For example, Hadoop uses hashCode values
to distribute keys from the map phase to the same reducer task (partitioning).

This commit adds hashCode (and ruby's hash method) implementations for
RubyBoolean, RubyNil, and RubySymbol. RubyBoolean and RubyNil simply return
static, randomly-generated hashCode values that are hard-coded. This replaces
the default java Object#hashCode.

For RubySymbol, the previous implementation of hashCode returned the symbol's
id, which could be different depending on the order in which symbols are
created. This updates it to calculate a hashCode based on the raw symbolBytes
like the RubyString implementation, but with a RubySymbol-specific seed and
without the encoding addition for 1.9. This value is calculated when symbols
are instantiated so the performance impact should be minimal.

This commit also adds a RubyInstanceConfig setting and CLI option for
consistent hashing, jruby.consistent.hashing.enabled, which controls whether
the Ruby runtime's hash seeds (k0 and k1) are generated randomly. When set to
true, they are set to static values. These hash seeds are used to hash
RubyString objects, so this will make string hash codes consistent across JVMs.
  • Loading branch information...
1 parent ed0a495 commit a74232aa9cc3acae3def628c03516eac76efcc97 @rdblue committed Mar 17, 2013
View
@@ -216,8 +216,14 @@ private Ruby(RubyInstanceConfig config) {
myRandom = new Random();
}
this.random = myRandom;
- this.hashSeedK0 = this.random.nextLong();
- this.hashSeedK1 = this.random.nextLong();
+
+ if (RubyInstanceConfig.CONSISTENT_HASHING_ENABLED) {
+ this.hashSeedK0 = -561135208506705104l;
+ this.hashSeedK1 = 7114160726623585955l;
+ } else {
+ this.hashSeedK0 = this.random.nextLong();
+ this.hashSeedK1 = this.random.nextLong();
+ }
this.beanManager.register(new Config(this));
this.beanManager.register(parserStats);
@@ -161,6 +161,20 @@ public static IRubyObject true_to_s(IRubyObject t) {
}
}
+ @JRubyMethod(name = "hash")
+ public RubyFixnum hash(ThreadContext context) {
+ return context.runtime.newFixnum(hashCode());
+ }
+
+ @Override
+ public int hashCode() {
+ if ((flags & FALSE_F) == 0) {
+ return 155;
+ } else {
+ return -48;
+ }
+ }
+
@Override
public RubyFixnum id() {
if ((flags & FALSE_F) == 0) {
@@ -1653,6 +1653,14 @@ public boolean shouldPrecompileAll() {
public static final boolean COROUTINE_FIBERS = Options.FIBER_COROUTINES.load();
+ /**
+ * Whether to calculate consistent hashes across JVM instances, or to ensure
+ * un-predicatable hash values using SecureRandom.
+ *
+ * Set with the <tt>jruby.consistent.hashing.enabled</tt> system property.
+ */
+ public static final boolean CONSISTENT_HASHING_ENABLED = Options.CONSISTENT_HASHING_ENABLED.load();
+
private static volatile boolean loadedNativeExtensions = false;
////////////////////////////////////////////////////////////////////////////
View
@@ -181,7 +181,17 @@ public static RubyBoolean op_xor(IRubyObject recv, IRubyObject obj) {
public IRubyObject nil_p() {
return getRuntime().getTrue();
}
-
+
+ @JRubyMethod(name = "hash")
+ public RubyFixnum hash(ThreadContext context) {
+ return context.runtime.newFixnum(hashCode());
+ }
+
+ @Override
+ public int hashCode() {
+ return 34;
+ }
+
@Override
public RubyFixnum id() {
return getRuntime().newFixnum(4);
@@ -65,15 +65,20 @@
import org.jruby.runtime.callsite.NormalCachingCallSite;
import org.jruby.runtime.marshal.UnmarshalStream;
import org.jruby.util.ByteList;
+import org.jruby.util.PerlHash;
+import org.jruby.util.SipHashInline;
/**
* Represents a Ruby symbol (e.g. :bar)
*/
@JRubyClass(name="Symbol")
public class RubySymbol extends RubyObject {
+ public static final long symbolHashSeedK0 = 5238926673095087190l;
+
private final String symbol;
private final int id;
private final ByteList symbolBytes;
+ private final int hashCode;
/**
*
@@ -99,6 +104,13 @@ private RubySymbol(Ruby runtime, String internedSymbol, ByteList symbolBytes) {
this.symbol = internedSymbol;
this.symbolBytes = symbolBytes;
this.id = runtime.allocSymbolId();
+
+ long hash = runtime.isSiphashEnabled() ? SipHashInline.hash24(
+ symbolHashSeedK0, 0, symbolBytes.getUnsafeBytes(),
+ symbolBytes.getBegin(), symbolBytes.getRealSize()) :
+ PerlHash.hash(symbolHashSeedK0, symbolBytes.getUnsafeBytes(),
+ symbolBytes.getBegin(), symbolBytes.getRealSize());
+ this.hashCode = (int) hash;
}
private RubySymbol(Ruby runtime, String internedSymbol) {
@@ -296,7 +308,7 @@ public RubyFixnum hash(ThreadContext context) {
@Override
public int hashCode() {
- return id;
+ return hashCode;
}
public int getId() {
@@ -213,6 +213,7 @@ public static String dump() {
public static final Option<Boolean> FIBER_COROUTINES = bool(MISCELLANEOUS, "fiber.coroutines", false, "Use JVM coroutines for Fiber.");
public static final Option<Boolean> GLOBAL_REQUIRE_LOCK = bool(MISCELLANEOUS, "global.require.lock", false, "Use a single global lock for requires.");
public static final Option<Boolean> NATIVE_EXEC = bool(MISCELLANEOUS, "native.exec", true, "Do a true process-obliterating native exec for Kernel#exec.");
+ public static final Option<Boolean> CONSISTENT_HASHING_ENABLED = bool(MISCELLANEOUS, "consistent.hashing.enabled", false, "Generate consistent object hashes across JVMs");
public static final Option<Boolean> DEBUG_LOADSERVICE = bool(DEBUG, "debug.loadService", false, "Log require/load file searches.");
public static final Option<Boolean> DEBUG_LOADSERVICE_TIMING = bool(DEBUG, "debug.loadService.timing", false, "Log require/load parse+evaluate times.");
@@ -38,6 +38,8 @@
import org.jruby.Ruby;
import org.jruby.RubyFixnum;
import org.jruby.RubyNil;
+import org.jruby.javasupport.util.RuntimeHelpers;
+import org.jruby.runtime.builtin.IRubyObject;
/**
* @author chadfowler
@@ -98,4 +100,14 @@ public void testOpXOr() {
assertTrue(RubyNil.op_xor(rubyNil, runtime.getTrue()).isTrue());
assertTrue(RubyNil.op_xor(rubyNil, runtime.getFalse()).isFalse());
}
+
+ public void testHash() {
+ IRubyObject hash = RuntimeHelpers.invoke(
+ runtime.getCurrentContext(), rubyNil, "hash");
+ assertEquals(RubyFixnum.newFixnum(runtime, 34), hash);
+ }
+
+ public void testHashCode() {
+ assertEquals(34, rubyNil.hashCode());
+ }
}
@@ -62,4 +62,13 @@ public void testSymbolTable() throws Exception {
assertSame(another, st.getSymbol("another_name"));
assertSame(another, st.fastGetSymbol("another_name"));
}
+
+ public void testSymbolHashCode() {
+ RubySymbol sym = RubySymbol.newSymbol(runtime, "somename");
+ assertTrue(sym.hashCode() != 0);
+ assertTrue(sym.hashCode() != sym.getId());
+ if (runtime.isSiphashEnabled()) {
+ assertEquals(1706472664, sym.hashCode());
+ }
+ }
}

0 comments on commit a74232a

Please sign in to comment.