Skip to content
This repository
Browse code

Use MurmurHash 3 for String hashing

This gives us a fast hashing algorithm with good distribution.
It also allows for using a seed value to randomize the hash
output so it mitigates hash collision DoS attacks.
  • Loading branch information...
commit 1d69526c484cc9435a7198e41b8995db6c3acf1a 1 parent 4aa59c4
Dirkjan Bussink authored February 27, 2012
2  vm/builtin/bignum.cpp
@@ -1276,7 +1276,7 @@ namespace rubinius {
1276 1276
        that unused memory.  This might only be a problem if calculations
1277 1277
        are leaving cruft in those unused bits.  However, since Bignums
1278 1278
        are immutable, this shouldn't happen to us. */
1279  
-    return String::hash_str((unsigned char *)a->dp, a->used * sizeof(mp_digit));
  1279
+    return String::hash_str(state, (unsigned char *)a->dp, a->used * sizeof(mp_digit));
1280 1280
   }
1281 1281
 
1282 1282
   size_t Bignum::managed_memory_size(STATE) {
4  vm/builtin/object.cpp
@@ -344,7 +344,7 @@ namespace rubinius {
344 344
   hashval Object::hash(STATE) {
345 345
     if(!reference_p()) {
346 346
 
347  
-#ifdef _LP64
  347
+#ifdef IS_X8664
348 348
       uintptr_t key = reinterpret_cast<uintptr_t>(this);
349 349
       key = (~key) + (key << 21); // key = (key << 21) - key - 1;
350 350
       key = key ^ (key >> 24);
@@ -372,7 +372,7 @@ namespace rubinius {
372 372
       } else if(Bignum* bignum = try_as<Bignum>(this)) {
373 373
         return bignum->hash_bignum(state);
374 374
       } else if(Float* flt = try_as<Float>(this)) {
375  
-        return String::hash_str((unsigned char *)(&(flt->val)), sizeof(double));
  375
+        return String::hash_str(state, (unsigned char *)(&(flt->val)), sizeof(double));
376 376
       } else {
377 377
         return id(state)->to_native();
378 378
       }
18  vm/builtin/randomizer.cpp
@@ -88,7 +88,7 @@ namespace rubinius {
88 88
     Randomizer* r = state->new_object<Randomizer>(G(randomizer));
89 89
     r->lock_ = RBX_SPINLOCK_INIT;
90 90
     r->rng_data(state, ByteArray::create(state, sizeof(struct random_state)));
91  
-
  91
+    random_init_single(r->rng_state(), 5489UL);
92 92
     return r;
93 93
   }
94 94
 
@@ -99,8 +99,6 @@ namespace rubinius {
99 99
       randomizer->klass(state, cls);
100 100
     }
101 101
 
102  
-    random_init_single(randomizer->rng_state(), 5489UL);
103  
-
104 102
     return randomizer;
105 103
   }
106 104
 
@@ -194,5 +192,19 @@ namespace rubinius {
194 192
   Float* Randomizer::rand_float(STATE) {
195 193
     return Float::create(state, rb_genrand_real());
196 194
   }
  195
+
  196
+  /*
  197
+   * Return a random value without depending
  198
+   * on anything externally so it can be used
  199
+   * also outside the context of the VM
  200
+   */
  201
+  uint32_t Randomizer::random_uint32() {
  202
+    uint32_t seed[] = { 0, 0, 0, 0 };
  203
+    random_seed(seed, 4);
  204
+    struct random_state rng;
  205
+    random_init_array(&rng, seed, 4);
  206
+    return random_gen_uint32(&rng);
  207
+  }
  208
+
197 209
 }
198 210
 
2  vm/builtin/randomizer.hpp
@@ -52,6 +52,8 @@ namespace rubinius {
52 52
     // Rubinius.primitive :randomizer_rand_float
53 53
     Float* rand_float(STATE);
54 54
 
  55
+    static uint32_t random_uint32();
  56
+
55 57
     class Info : public TypeInfo {
56 58
     public:
57 59
       BASIC_TYPEINFO(TypeInfo)
49  vm/builtin/string.cpp
@@ -15,6 +15,8 @@
15 15
 #include "builtin/symbol.hpp"
16 16
 #include "builtin/tuple.hpp"
17 17
 
  18
+#include "util/murmur_hash3.hpp"
  19
+
18 20
 #include "configuration.hpp"
19 21
 #include "vm.hpp"
20 22
 #include "object_utils.hpp"
@@ -666,52 +668,21 @@ namespace rubinius {
666 668
 
667 669
     unsigned char* bp = (unsigned char*)(byte_address());
668 670
 
669  
-    hashval h = hash_str(bp, byte_size());
  671
+    hashval h = hash_str(state, bp, byte_size());
670 672
     hash_value(state, Fixnum::from(h));
671 673
 
672 674
     return h;
673 675
   }
674 676
 
675  
-  // see http://isthe.com/chongo/tech/comp/fnv/#FNV-param
676  
-#ifdef _LP64
677  
-  const static unsigned long FNVOffsetBasis = 14695981039346656037UL;
678  
-  const static unsigned long FNVHashPrime = 1099511628211UL;
  677
+  hashval String::hash_str(const unsigned char *bp, unsigned int sz, uint32_t seed) {
  678
+#ifdef IS_X8664
  679
+    hashval hv[2];
  680
+    MurmurHash3_x64_128(bp, sz, seed, hv);
679 681
 #else
680  
-  const static unsigned long FNVOffsetBasis = 2166136261UL;
681  
-  const static unsigned long FNVHashPrime = 16777619UL;
  682
+    hashval hv[1];
  683
+    MurmurHash3_x86_32(bp, sz, seed, hv);
682 684
 #endif
683  
-
684  
-  static inline unsigned long update_hash(unsigned long hv,
685  
-                                          unsigned char byte)
686  
-  {
687  
-    return (hv ^ byte) * FNVHashPrime;
688  
-  }
689  
-
690  
-  static inline unsigned long finish_hash(unsigned long hv) {
691  
-    return (hv>>FIXNUM_WIDTH) ^ (hv & FIXNUM_MAX);
692  
-  }
693  
-
694  
-  hashval String::hash_str(const char *bp) {
695  
-    hashval hv;
696  
-
697  
-    hv = FNVOffsetBasis;
698  
-
699  
-    while(*bp) {
700  
-      hv = update_hash(hv, *bp++);
701  
-    }
702  
-
703  
-    return finish_hash(hv);
704  
-  }
705  
-
706  
-  hashval String::hash_str(const unsigned char *bp, unsigned int sz) {
707  
-    unsigned char* be = (unsigned char*)bp + sz;
708  
-    hashval hv = FNVOffsetBasis;
709  
-
710  
-    while(bp < be) {
711  
-      hv = update_hash(hv, *bp++);
712  
-    }
713  
-
714  
-    return finish_hash(hv);
  685
+    return hv[0] & FIXNUM_MAX;
715 686
   }
716 687
 
717 688
   Symbol* String::to_sym(STATE) {
9  vm/builtin/string.hpp
@@ -92,11 +92,12 @@ namespace rubinius {
92 92
     static String* create_pinned(STATE, Fixnum* size);
93 93
     static String* create_reserved(STATE, native_int bytes);
94 94
 
95  
-    // Hash the NUL-terminated string _bp_.
96  
-    static hashval hash_str(const char *bp);
97  
-
98 95
     // Hash the byte array _bp_ which contains _sz_ bytes.
99  
-    static hashval hash_str(const unsigned char *bp, unsigned int sz);
  96
+    static hashval hash_str(const unsigned char *bp, unsigned int sz, uint32_t seed);
  97
+
  98
+    static hashval hash_str(STATE, const unsigned char *bp, unsigned int sz) {
  99
+      return hash_str(bp, sz, state->hash_seed());
  100
+    }
100 101
 
101 102
     // Rubinius.primitive :string_equal
102 103
     Object* equal(STATE, String* other) {
2  vm/llvm/state.cpp
@@ -732,7 +732,7 @@ namespace rubinius {
732 732
   }
733 733
 
734 734
   Symbol* LLVMState::symbol(const std::string sym) {
735  
-    return symbols_.lookup(sym);
  735
+    return symbols_.lookup(&shared_, sym);
736 736
   }
737 737
 
738 738
   std::string LLVMState::symbol_debug_str(const Symbol* sym) {
5  vm/shared_state.cpp
@@ -15,6 +15,7 @@
15 15
 
16 16
 #include "agent.hpp"
17 17
 #include "world_state.hpp"
  18
+#include "builtin/randomizer.hpp"
18 19
 
19 20
 #ifdef ENABLE_LLVM
20 21
 #include "llvm/state.hpp"
@@ -38,8 +39,8 @@ namespace rubinius {
38 39
     , tool_broker_(new tooling::ToolBroker)
39 40
     , ruby_critical_set_(false)
40 41
     , check_gc_(false)
41  
-
42 42
     , om(0)
  43
+
43 44
     , global_cache(new GlobalCache)
44 45
     , config(config)
45 46
     , user_variables(cp)
@@ -50,6 +51,8 @@ namespace rubinius {
50 51
     for(int i = 0; i < Primitives::cTotalPrimitives; i++) {
51 52
       primitive_hits_[i] = 0;
52 53
     }
  54
+
  55
+    hash_seed = Randomizer::random_uint32();
53 56
   }
54 57
 
55 58
   SharedState::~SharedState() {
1  vm/shared_state.hpp
@@ -102,6 +102,7 @@ namespace rubinius {
102 102
     SymbolTable symbols;
103 103
     LLVMState* llvm_state;
104 104
     Stats stats;
  105
+    uint32_t hash_seed;
105 106
 
106 107
   public:
107 108
     SharedState(Environment* env, Configuration& config, ConfigParser& cp);
4  vm/state.hpp
@@ -50,6 +50,10 @@ namespace rubinius {
50 50
       return vm_->symbol(str);
51 51
     }
52 52
 
  53
+    uint32_t hash_seed() {
  54
+      return shared_.hash_seed;
  55
+    }
  56
+
53 57
     template <class T>
54 58
       T* new_object(Class *cls) {
55 59
         return reinterpret_cast<T*>(vm_->new_object_typed(cls, sizeof(T), T::type));
12  vm/symboltable.cpp
@@ -62,7 +62,7 @@ namespace rubinius {
62 62
       return NULL;
63 63
     }
64 64
 
65  
-    return lookup(str, length);
  65
+    return lookup(str, length, state->hash_seed());
66 66
   }
67 67
 
68 68
   struct SpecialOperator {
@@ -96,15 +96,15 @@ namespace rubinius {
96 96
     return 0;
97 97
   }
98 98
 
99  
-  Symbol* SymbolTable::lookup(std::string str) {
100  
-    return lookup(str.data(), str.size());
  99
+  Symbol* SymbolTable::lookup(SharedState* shared, std::string str) {
  100
+    return lookup(str.data(), str.size(), shared->hash_seed);
101 101
   }
102 102
 
103 103
   Symbol* SymbolTable::lookup(STATE, std::string str) {
104  
-    return lookup(str.data(), str.size());
  104
+    return lookup(str.data(), str.size(), state->hash_seed());
105 105
   }
106 106
 
107  
-  Symbol* SymbolTable::lookup(const char* str, size_t length) {
  107
+  Symbol* SymbolTable::lookup(const char* str, size_t length, uint32_t seed) {
108 108
     size_t sym;
109 109
 
110 110
     if(const char* op = find_special(str, length)) {
@@ -112,7 +112,7 @@ namespace rubinius {
112 112
       length = strlen(str);
113 113
     }
114 114
 
115  
-    hashval hash = String::hash_str((unsigned char*)str, length);
  115
+    hashval hash = String::hash_str((unsigned char*)str, length, seed);
116 116
 
117 117
     // Symbols can be looked up by multiple threads at the same time.
118 118
     // This is fast operation, so we protect this with a spinlock.
6  vm/symboltable.hpp
@@ -29,6 +29,7 @@ namespace rubinius {
29 29
   class Array;
30 30
   class String;
31 31
   class Symbol;
  32
+  class SharedState;
32 33
 
33 34
   typedef std::vector<std::string> SymbolStrings;
34 35
   typedef std::vector<std::size_t> SymbolIds;
@@ -58,14 +59,15 @@ namespace rubinius {
58 59
     thread::SpinLock lock_;
59 60
     size_t bytes_used_;
60 61
 
  62
+    Symbol* lookup(const char* str, size_t length, uint32_t seed);
  63
+
61 64
   public:
62 65
     size_t& bytes_used() {
63 66
       return bytes_used_;
64 67
     }
65 68
 
66  
-    Symbol* lookup(std::string str);
  69
+    Symbol* lookup(SharedState* shared, std::string str);
67 70
     Symbol* lookup(STATE, std::string str);
68  
-    Symbol* lookup(const char* str, size_t length);
69 71
     Symbol* lookup(STATE, const char* str, size_t length);
70 72
     Symbol* lookup(STATE, String* str);
71 73
     String* lookup_string(STATE, const Symbol* sym);
4  vm/test/test_symboltable.hpp
@@ -74,8 +74,8 @@ class TestSymbolTable : public CxxTest::TestSuite, public VMTest {
74 74
     const char* str = "__uint_fast64_t";
75 75
     const char* str2 = "TkIF_MOD";
76 76
 
77  
-    TS_ASSERT_EQUALS(String::hash_str((unsigned char*)str, strlen(str)),
78  
-                     String::hash_str((unsigned char*)str2, strlen(str2)));
  77
+    TS_ASSERT_EQUALS(String::hash_str((unsigned char*)str, strlen(str), 0),
  78
+                     String::hash_str((unsigned char*)str2, strlen(str2), 0));
79 79
 
80 80
     sym  = symbols->lookup(state, std::string(str));
81 81
     sym2 = symbols->lookup(state, std::string(str2));
334  vm/util/murmur_hash3.cpp
... ...
@@ -0,0 +1,334 @@
  1
+//-----------------------------------------------------------------------------
  2
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
  3
+// domain. The author hereby disclaims copyright to this source code.
  4
+
  5
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
  6
+// algorithms are optimized for their respective platforms. You can still
  7
+// compile and run any of them on any platform, but your performance with the
  8
+// non-native version will be less than optimal.
  9
+
  10
+#include "murmur_hash3.hpp"
  11
+
  12
+//-----------------------------------------------------------------------------
  13
+// Platform-specific functions and macros
  14
+
  15
+// Microsoft Visual Studio
  16
+
  17
+#if defined(_MSC_VER)
  18
+
  19
+#define FORCE_INLINE    __forceinline
  20
+
  21
+#include <stdlib.h>
  22
+
  23
+#define ROTL32(x,y)     _rotl(x,y)
  24
+#define ROTL64(x,y)     _rotl64(x,y)
  25
+
  26
+#define BIG_CONSTANT(x) (x)
  27
+
  28
+// Other compilers
  29
+
  30
+#else   // defined(_MSC_VER)
  31
+
  32
+#define FORCE_INLINE __attribute__((always_inline))
  33
+
  34
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
  35
+{
  36
+  return (x << r) | (x >> (32 - r));
  37
+}
  38
+
  39
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
  40
+{
  41
+  return (x << r) | (x >> (64 - r));
  42
+}
  43
+
  44
+#define ROTL32(x,y)     rotl32(x,y)
  45
+#define ROTL64(x,y)     rotl64(x,y)
  46
+
  47
+#define BIG_CONSTANT(x) (x##LLU)
  48
+
  49
+#endif // !defined(_MSC_VER)
  50
+
  51
+//-----------------------------------------------------------------------------
  52
+// Block read - if your platform needs to do endian-swapping or can only
  53
+// handle aligned reads, do the conversion here
  54
+
  55
+FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
  56
+{
  57
+  return p[i];
  58
+}
  59
+
  60
+FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
  61
+{
  62
+  return p[i];
  63
+}
  64
+
  65
+//-----------------------------------------------------------------------------
  66
+// Finalization mix - force all bits of a hash block to avalanche
  67
+
  68
+FORCE_INLINE uint32_t fmix ( uint32_t h )
  69
+{
  70
+  h ^= h >> 16;
  71
+  h *= 0x85ebca6b;
  72
+  h ^= h >> 13;
  73
+  h *= 0xc2b2ae35;
  74
+  h ^= h >> 16;
  75
+
  76
+  return h;
  77
+}
  78
+
  79
+//----------
  80
+
  81
+FORCE_INLINE uint64_t fmix ( uint64_t k )
  82
+{
  83
+  k ^= k >> 33;
  84
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
  85
+  k ^= k >> 33;
  86
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
  87
+  k ^= k >> 33;
  88
+
  89
+  return k;
  90
+}
  91
+
  92
+//-----------------------------------------------------------------------------
  93
+
  94
+void MurmurHash3_x86_32 ( const void * key, int len,
  95
+                          uint32_t seed, void * out )
  96
+{
  97
+  const uint8_t * data = (const uint8_t*)key;
  98
+  const int nblocks = len / 4;
  99
+
  100
+  uint32_t h1 = seed;
  101
+
  102
+  uint32_t c1 = 0xcc9e2d51;
  103
+  uint32_t c2 = 0x1b873593;
  104
+
  105
+  //----------
  106
+  // body
  107
+
  108
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
  109
+
  110
+  for(int i = -nblocks; i; i++)
  111
+  {
  112
+    uint32_t k1 = getblock(blocks,i);
  113
+
  114
+    k1 *= c1;
  115
+    k1 = ROTL32(k1,15);
  116
+    k1 *= c2;
  117
+    
  118
+    h1 ^= k1;
  119
+    h1 = ROTL32(h1,13); 
  120
+    h1 = h1*5+0xe6546b64;
  121
+  }
  122
+
  123
+  //----------
  124
+  // tail
  125
+
  126
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
  127
+
  128
+  uint32_t k1 = 0;
  129
+
  130
+  switch(len & 3)
  131
+  {
  132
+  case 3: k1 ^= tail[2] << 16;
  133
+  case 2: k1 ^= tail[1] << 8;
  134
+  case 1: k1 ^= tail[0];
  135
+          k1 *= c1; k1 = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
  136
+  };
  137
+
  138
+  //----------
  139
+  // finalization
  140
+
  141
+  h1 ^= len;
  142
+
  143
+  h1 = fmix(h1);
  144
+
  145
+  *(uint32_t*)out = h1;
  146
+} 
  147
+
  148
+//-----------------------------------------------------------------------------
  149
+
  150
+void MurmurHash3_x86_128 ( const void * key, const int len,
  151
+                           uint32_t seed, void * out )
  152
+{
  153
+  const uint8_t * data = (const uint8_t*)key;
  154
+  const int nblocks = len / 16;
  155
+
  156
+  uint32_t h1 = seed;
  157
+  uint32_t h2 = seed;
  158
+  uint32_t h3 = seed;
  159
+  uint32_t h4 = seed;
  160
+
  161
+  uint32_t c1 = 0x239b961b; 
  162
+  uint32_t c2 = 0xab0e9789;
  163
+  uint32_t c3 = 0x38b34ae5; 
  164
+  uint32_t c4 = 0xa1e38b93;
  165
+
  166
+  //----------
  167
+  // body
  168
+
  169
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
  170
+
  171
+  for(int i = -nblocks; i; i++)
  172
+  {
  173
+    uint32_t k1 = getblock(blocks,i*4+0);
  174
+    uint32_t k2 = getblock(blocks,i*4+1);
  175
+    uint32_t k3 = getblock(blocks,i*4+2);
  176
+    uint32_t k4 = getblock(blocks,i*4+3);
  177
+
  178
+    k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
  179
+
  180
+    h1 = ROTL32(h1,19); h1 += h2; h1 = h1*5+0x561ccd1b;
  181
+
  182
+    k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
  183
+
  184
+    h2 = ROTL32(h2,17); h2 += h3; h2 = h2*5+0x0bcaa747;
  185
+
  186
+    k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
  187
+
  188
+    h3 = ROTL32(h3,15); h3 += h4; h3 = h3*5+0x96cd1c35;
  189
+
  190
+    k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
  191
+
  192
+    h4 = ROTL32(h4,13); h4 += h1; h4 = h4*5+0x32ac3b17;
  193
+  }
  194
+
  195
+  //----------
  196
+  // tail
  197
+
  198
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
  199
+
  200
+  uint32_t k1 = 0;
  201
+  uint32_t k2 = 0;
  202
+  uint32_t k3 = 0;
  203
+  uint32_t k4 = 0;
  204
+
  205
+  switch(len & 15)
  206
+  {
  207
+  case 15: k4 ^= tail[14] << 16;
  208
+  case 14: k4 ^= tail[13] << 8;
  209
+  case 13: k4 ^= tail[12] << 0;
  210
+           k4 *= c4; k4  = ROTL32(k4,18); k4 *= c1; h4 ^= k4;
  211
+
  212
+  case 12: k3 ^= tail[11] << 24;
  213
+  case 11: k3 ^= tail[10] << 16;
  214
+  case 10: k3 ^= tail[ 9] << 8;
  215
+  case  9: k3 ^= tail[ 8] << 0;
  216
+           k3 *= c3; k3  = ROTL32(k3,17); k3 *= c4; h3 ^= k3;
  217
+
  218
+  case  8: k2 ^= tail[ 7] << 24;
  219
+  case  7: k2 ^= tail[ 6] << 16;
  220
+  case  6: k2 ^= tail[ 5] << 8;
  221
+  case  5: k2 ^= tail[ 4] << 0;
  222
+           k2 *= c2; k2  = ROTL32(k2,16); k2 *= c3; h2 ^= k2;
  223
+
  224
+  case  4: k1 ^= tail[ 3] << 24;
  225
+  case  3: k1 ^= tail[ 2] << 16;
  226
+  case  2: k1 ^= tail[ 1] << 8;
  227
+  case  1: k1 ^= tail[ 0] << 0;
  228
+           k1 *= c1; k1  = ROTL32(k1,15); k1 *= c2; h1 ^= k1;
  229
+  };
  230
+
  231
+  //----------
  232
+  // finalization
  233
+
  234
+  h1 ^= len; h2 ^= len; h3 ^= len; h4 ^= len;
  235
+
  236
+  h1 += h2; h1 += h3; h1 += h4;
  237
+  h2 += h1; h3 += h1; h4 += h1;
  238
+
  239
+  h1 = fmix(h1);
  240
+  h2 = fmix(h2);
  241
+  h3 = fmix(h3);
  242
+  h4 = fmix(h4);
  243
+
  244
+  h1 += h2; h1 += h3; h1 += h4;
  245
+  h2 += h1; h3 += h1; h4 += h1;
  246
+
  247
+  ((uint32_t*)out)[0] = h1;
  248
+  ((uint32_t*)out)[1] = h2;
  249
+  ((uint32_t*)out)[2] = h3;
  250
+  ((uint32_t*)out)[3] = h4;
  251
+}
  252
+
  253
+//-----------------------------------------------------------------------------
  254
+
  255
+void MurmurHash3_x64_128 ( const void * key, const int len,
  256
+                           const uint32_t seed, void * out )
  257
+{
  258
+  const uint8_t * data = (const uint8_t*)key;
  259
+  const int nblocks = len / 16;
  260
+
  261
+  uint64_t h1 = seed;
  262
+  uint64_t h2 = seed;
  263
+
  264
+  uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
  265
+  uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
  266
+
  267
+  //----------
  268
+  // body
  269
+
  270
+  const uint64_t * blocks = (const uint64_t *)(data);
  271
+
  272
+  for(int i = 0; i < nblocks; i++)
  273
+  {
  274
+    uint64_t k1 = getblock(blocks,i*2+0);
  275
+    uint64_t k2 = getblock(blocks,i*2+1);
  276
+
  277
+    k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
  278
+
  279
+    h1 = ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
  280
+
  281
+    k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
  282
+
  283
+    h2 = ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
  284
+  }
  285
+
  286
+  //----------
  287
+  // tail
  288
+
  289
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
  290
+
  291
+  uint64_t k1 = 0;
  292
+  uint64_t k2 = 0;
  293
+
  294
+  switch(len & 15)
  295
+  {
  296
+  case 15: k2 ^= uint64_t(tail[14]) << 48;
  297
+  case 14: k2 ^= uint64_t(tail[13]) << 40;
  298
+  case 13: k2 ^= uint64_t(tail[12]) << 32;
  299
+  case 12: k2 ^= uint64_t(tail[11]) << 24;
  300
+  case 11: k2 ^= uint64_t(tail[10]) << 16;
  301
+  case 10: k2 ^= uint64_t(tail[ 9]) << 8;
  302
+  case  9: k2 ^= uint64_t(tail[ 8]) << 0;
  303
+           k2 *= c2; k2  = ROTL64(k2,33); k2 *= c1; h2 ^= k2;
  304
+
  305
+  case  8: k1 ^= uint64_t(tail[ 7]) << 56;
  306
+  case  7: k1 ^= uint64_t(tail[ 6]) << 48;
  307
+  case  6: k1 ^= uint64_t(tail[ 5]) << 40;
  308
+  case  5: k1 ^= uint64_t(tail[ 4]) << 32;
  309
+  case  4: k1 ^= uint64_t(tail[ 3]) << 24;
  310
+  case  3: k1 ^= uint64_t(tail[ 2]) << 16;
  311
+  case  2: k1 ^= uint64_t(tail[ 1]) << 8;
  312
+  case  1: k1 ^= uint64_t(tail[ 0]) << 0;
  313
+           k1 *= c1; k1  = ROTL64(k1,31); k1 *= c2; h1 ^= k1;
  314
+  };
  315
+
  316
+  //----------
  317
+  // finalization
  318
+
  319
+  h1 ^= len; h2 ^= len;
  320
+
  321
+  h1 += h2;
  322
+  h2 += h1;
  323
+
  324
+  h1 = fmix(h1);
  325
+  h2 = fmix(h2);
  326
+
  327
+  h1 += h2;
  328
+  h2 += h1;
  329
+
  330
+  ((uint64_t*)out)[0] = h1;
  331
+  ((uint64_t*)out)[1] = h2;
  332
+}
  333
+
  334
+//-----------------------------------------------------------------------------
37  vm/util/murmur_hash3.hpp
... ...
@@ -0,0 +1,37 @@
  1
+//-----------------------------------------------------------------------------
  2
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
  3
+// domain. The author hereby disclaims copyright to this source code.
  4
+
  5
+#ifndef _MURMURHASH3_H_
  6
+#define _MURMURHASH3_H_
  7
+
  8
+//-----------------------------------------------------------------------------
  9
+// Platform-specific functions and macros
  10
+
  11
+// Microsoft Visual Studio
  12
+
  13
+#if defined(_MSC_VER)
  14
+
  15
+typedef unsigned char uint8_t;
  16
+typedef unsigned long uint32_t;
  17
+typedef unsigned __int64 uint64_t;
  18
+
  19
+// Other compilers
  20
+
  21
+#else   // defined(_MSC_VER)
  22
+
  23
+#include <stdint.h>
  24
+
  25
+#endif // !defined(_MSC_VER)
  26
+
  27
+//-----------------------------------------------------------------------------
  28
+
  29
+void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
  30
+
  31
+void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
  32
+
  33
+void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
  34
+
  35
+//-----------------------------------------------------------------------------
  36
+
  37
+#endif // _MURMURHASH3_H_
3  vm/vm.cpp
@@ -237,7 +237,8 @@ namespace rubinius {
237 237
   }
238 238
 
239 239
   Symbol* VM::symbol(const char* str) {
240  
-    return shared.symbols.lookup(str, strlen(str));
  240
+    State state(this);
  241
+    return shared.symbols.lookup(&state, str, strlen(str));
241 242
   }
242 243
 
243 244
   Symbol* VM::symbol(std::string str) {

0 notes on commit 1d69526

Please sign in to comment.
Something went wrong with that request. Please try again.