Permalink
Browse files

added native murmur and fnv hashing.

  • Loading branch information...
1 parent 7a41a65 commit fecb101934641a358a39fdaf83b5ac47cf7b942a Cliff Moon committed Jan 31, 2009
Showing with 462 additions and 20 deletions.
  1. +3 −1 AUTHORS
  2. +13 −13 Rakefile
  3. +18 −0 c/fnv.c
  4. +3 −0 c/fnv.h
  5. +80 −0 c/fnv_drv.c
  6. +64 −0 c/murmur.c
  7. +3 −0 c/murmur.h
  8. +80 −0 c/murmur_drv.c
  9. +4 −2 elibs/dynomite.erl
  10. +79 −0 elibs/fnv.erl
  11. +4 −3 elibs/lib_misc.erl
  12. +79 −0 elibs/murmur.erl
  13. +32 −1 etest/lib_misc_test.erl
View
@@ -4,4 +4,6 @@ Authors of dynomite.
Todd Lipcon <todd@lipcon.org>
Jason Pellerin <jpellerin@gmail.com>
-The plumbing for the couch storage engine is from the couchdb project: http://incubator.apache.org/couchdb/
+The plumbing for the couch storage engine is from the couchdb project: http://incubator.apache.org/couchdb/
+
+MurmurHash2 is courtesy of Austin Appleby
View
@@ -2,10 +2,15 @@
require 'fileutils'
require 'rubygems'
require 'rake'
+require 'rake/clean'
ERLC_TEST_FLAGS = "-pa deps/eunit/ebin -I deps/eunit/include -DTEST"
ERLC_FLAGS = "+debug_info -W0 -I include -pa deps/mochiweb/ebin -I deps/mochiweb/include -pa deps/rfc4627/ebin -I deps/rfc4627/include -I gen-erl/ -o ebin"
+CLEAN.include("ebin/*.beam")
+CLEAN.include("c/*.o")
+CLEAN.include("lib/*.so")
+
task :default => [:build_deps, :build_c_drivers] do
puts "building #{ENV['TEST']}"
sh "erlc #{ERLC_FLAGS} #{ENV['TEST'] ? ERLC_TEST_FLAGS : ''} elibs/*.erl gen-erl/*.erl"
@@ -18,15 +23,6 @@ task :default => [:build_deps, :build_c_drivers] do
# end
end
-task :clean_test do
- sh "rm -rf etest/log/*"
-end
-
-task :clean => [:clean_test] do
- sh "rm -f ebin/*.beam"
- sh "rm -f etest/*.beam"
-end
-
task :test_env => [:build_test_deps, :test_config] do
puts "test env"
ENV['TEST'] = 'test'
@@ -115,7 +111,7 @@ task :c_env do
ERLDIR = `awk -F= '/ROOTDIR=/ { print $2; exit; }' #{ERL}`.chomp
ERTSBASE = `erl -noshell -noinput -eval 'io:format (\"~s\", [[ \"/\" ++ filename:join (lists:reverse ([ \"erts-\" ++ erlang:system_info (version) | tl (lists:reverse (string:tokens (code:lib_dir (), \"/\"))) ])) ]]).' -s erlang halt `.chomp
ERL_INTERFACE = `ls #{ERLDIR}/lib`.split("\n").grep(/erl_interface/).last
- CPPFLAGS = "-I #{ERTSBASE}/include -I #{ERLDIR}/lib/#{ERL_INTERFACE}/include -Wall -fPIC -I./"
+ CPPFLAGS = "-I #{ERTSBASE}/include -I #{ERLDIR}/lib/#{ERL_INTERFACE}/include -Wall -g -O2 -fPIC -I./"
LIBEI = "#{ERLDIR}/lib/#{ERL_INTERFACE}/lib/libei.a"
if `uname` =~ /Linux/
LDFLAGS = " -shared"
@@ -164,14 +160,18 @@ DRIVERS = FileList['c/*_drv.c'].pathmap("%{c,lib}X.so")
directory "lib"
-rule ".so" => '%{lib,c}X.o' do |t|
- puts "cc #{CPPFLAGS} #{LDFLAGS} -o #{t.name} #{t.source} #{LIBEI}"
- sh "cc #{CPPFLAGS} #{LDFLAGS} -o #{t.name} #{t.source} #{LIBEI}"
+# task "lib/murmur_drv.c" => ["c/murmur.o"]
+
+rule ".so" => ['%{lib,c}X.o', 'c/murmur.o', 'c/fnv.o'] do |t|
+ puts "cc #{CPPFLAGS} #{LDFLAGS} -o #{t.name} #{t.prerequisites.join(' ')} #{LIBEI}"
+ sh "cc #{CPPFLAGS} #{LDFLAGS} -o #{t.name} #{t.prerequisites.join(' ')} #{LIBEI}"
end
rule ".o" => ".c" do |t|
puts "cc #{CPPFLAGS} -c -o #{t.name} #{t.source}"
sh "cc #{CPPFLAGS} -c -o #{t.name} #{t.source}"
end
+
+
task :build_c_drivers => [:c_env, "lib"] + DRIVERS
View
18 c/fnv.c
@@ -0,0 +1,18 @@
+
+#include "fnv.h"
+
+#define FNV_PRIME 16777619
+#define MAX 4294967296
+
+unsigned int fnv_hash(const void* key, int length, unsigned int seed) {
+ const unsigned char * data = (const unsigned char*) key;
+ int n;
+ unsigned int xord;
+ unsigned int hash = seed;
+
+ for(n=0; n < length; n++) {
+ xord = hash ^ data[n];
+ hash = (xord * FNV_PRIME) % MAX;
+ }
+ return hash;
+}
View
@@ -0,0 +1,3 @@
+
+
+unsigned int fnv_hash(const void* key, int length, unsigned int seed);
View
@@ -0,0 +1,80 @@
+
+#include "fnv.h"
+#include <erl_driver.h>
+#include <ei.h>
+#include <stdio.h>
+
+
+static ErlDrvData init(ErlDrvPort port, char *cmd);
+static void stop(ErlDrvData handle);
+static void outputv(ErlDrvData handle, ErlIOVec *ev);
+static void send_hash(ErlDrvPort port, unsigned long hash);
+
+static ErlDrvData init(ErlDrvPort port, char *cmd) {
+ return (ErlDrvData) port;
+}
+
+static void stop(ErlDrvData handle) {
+ //noop
+}
+
+static void outputv(ErlDrvData handle, ErlIOVec *ev) {
+ ErlDrvPort port = (ErlDrvPort) handle;
+ SysIOVec *bin;
+ int i, n, index = 0;
+ unsigned long hash;
+ unsigned long seed;
+ //first piece of the iovec is the seed
+ // printf("ev->size %d\n", ev->size);
+ // printf("ev-vsize %d\n", ev->vsize);
+ //apparently we start counting at 1 round here?
+ bin = &ev->iov[1];
+ // printf("bin->orig_size %d\n", bin->iov_len);
+ // printf("bin->iov_base %s\n", bin->iov_base);
+ ei_decode_version(bin->iov_base, &index, NULL);
+ ei_decode_ulong(bin->iov_base, &index, &seed);
+ hash = (unsigned int) seed;
+ if (index < bin->iov_len) {
+ hash = fnv_hash(&bin->iov_base[index], bin->iov_len - index, hash);
+ }
+ // printf("hash %d\n", hash);
+ for (i=2; i<ev->vsize; i++) {
+ bin = &ev->iov[i];
+ // printf("bin->orig_size %d\n", bin->iov_len);
+ hash = fnv_hash(bin->iov_base, bin->iov_len, hash);
+ // printf("hashed %d\n", i);
+ }
+ send_hash(port, hash);
+}
+
+static void send_hash(ErlDrvPort port, unsigned long hash) {
+ ei_x_buff x;
+ ei_x_new_with_version(&x);
+ ei_x_encode_ulong(&x, hash);
+ driver_output(port, x.buff, x.index);
+ // printf("sent hash %d\n", hash);
+ ei_x_free(&x);
+}
+
+static ErlDrvEntry fnv_driver_entry = {
+ NULL, /* init */
+ init,
+ stop,
+ NULL, /* output */
+ NULL, /* ready_input */
+ NULL, /* ready_output */
+ "fnv_drv", /* the name of the driver */
+ NULL, /* finish */
+ NULL, /* handle */
+ NULL, /* control */
+ NULL, /* timeout */
+ outputv, /* outputv */
+ NULL, /* ready_async */
+ NULL, /* flush */
+ NULL, /* call */
+ NULL /* event */
+};
+
+DRIVER_INIT(fnv_driver) {
+ return &fnv_driver_entry;
+}
View
@@ -0,0 +1,64 @@
+//-----------------------------------------------------------------------------
+// MurmurHash2, by Austin Appleby
+
+// Note - This code makes a few assumptions about how your machine behaves -
+
+// 1. We can read a 4-byte value from any address without crashing
+// 2. sizeof(int) == 4
+
+// And it has a few limitations -
+
+// 1. It will not work incrementally.
+// 2. It will not produce the same results on little-endian and big-endian
+// machines.
+
+unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed )
+{
+ // 'm' and 'r' are mixing constants generated offline.
+ // They're not really 'magic', they just happen to work well.
+
+ const unsigned int m = 0x5bd1e995;
+ const int r = 24;
+
+ // Initialize the hash to a 'random' value
+
+ unsigned int h = seed ^ len;
+
+ // Mix 4 bytes at a time into the hash
+
+ const unsigned char * data = (const unsigned char *)key;
+
+ while(len >= 4)
+ {
+ unsigned int k = *(unsigned int *)data;
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h *= m;
+ h ^= k;
+
+ data += 4;
+ len -= 4;
+ }
+
+ // Handle the last few bytes of the input array
+
+ switch(len)
+ {
+ case 3: h ^= data[2] << 16;
+ case 2: h ^= data[1] << 8;
+ case 1: h ^= data[0];
+ h *= m;
+ };
+
+ // Do a few final mixes of the hash to ensure the last few
+ // bytes are well-incorporated.
+
+ h ^= h >> 13;
+ h *= m;
+ h ^= h >> 15;
+
+ return h;
+}
View
@@ -0,0 +1,3 @@
+
+
+unsigned int MurmurHash2 ( const void * key, int len, unsigned int seed );
View
@@ -0,0 +1,80 @@
+
+#include "murmur.h"
+#include <erl_driver.h>
+#include <ei.h>
+#include <stdio.h>
+
+
+static ErlDrvData init(ErlDrvPort port, char *cmd);
+static void stop(ErlDrvData handle);
+static void outputv(ErlDrvData handle, ErlIOVec *ev);
+static void send_hash(ErlDrvPort port, unsigned long hash);
+
+static ErlDrvData init(ErlDrvPort port, char *cmd) {
+ return (ErlDrvData) port;
+}
+
+static void stop(ErlDrvData handle) {
+ //noop
+}
+
+static void outputv(ErlDrvData handle, ErlIOVec *ev) {
+ ErlDrvPort port = (ErlDrvPort) handle;
+ SysIOVec *bin;
+ int i, n, index = 0;
+ unsigned long hash;
+ unsigned long seed;
+ //first piece of the iovec is the seed
+ // printf("ev->size %d\n", ev->size);
+ // printf("ev-vsize %d\n", ev->vsize);
+ //apparently we start counting at 1 round here?
+ bin = &ev->iov[1];
+ // printf("bin->orig_size %d\n", bin->iov_len);
+ // printf("bin->iov_base %s\n", bin->iov_base);
+ ei_decode_version(bin->iov_base, &index, NULL);
+ ei_decode_ulong(bin->iov_base, &index, &seed);
+ hash = (unsigned int) seed;
+ if (index < bin->iov_len) {
+ hash = MurmurHash2(&bin->iov_base[index], bin->iov_len - index, hash);
+ }
+ // printf("hash %d\n", hash);
+ for (i=2; i<ev->vsize; i++) {
+ bin = &ev->iov[i];
+ // printf("bin->orig_size %d\n", bin->iov_len);
+ hash = MurmurHash2(bin->iov_base, bin->iov_len, hash);
+ // printf("hashed %d\n", i);
+ }
+ send_hash(port, hash);
+}
+
+static void send_hash(ErlDrvPort port, unsigned long hash) {
+ ei_x_buff x;
+ ei_x_new_with_version(&x);
+ ei_x_encode_ulong(&x, hash);
+ driver_output(port, x.buff, x.index);
+ // printf("sent hash %d\n", hash);
+ ei_x_free(&x);
+}
+
+static ErlDrvEntry murmur_driver_entry = {
+ NULL, /* init */
+ init,
+ stop,
+ NULL, /* output */
+ NULL, /* ready_input */
+ NULL, /* ready_output */
+ "murmur_drv", /* the name of the driver */
+ NULL, /* finish */
+ NULL, /* handle */
+ NULL, /* control */
+ NULL, /* timeout */
+ outputv, /* outputv */
+ NULL, /* ready_async */
+ NULL, /* flush */
+ NULL, /* call */
+ NULL /* event */
+};
+
+DRIVER_INIT(murmur_driver) {
+ return &murmur_driver_entry;
+}
View
@@ -8,15 +8,17 @@ start() ->
application:load(dynomite),
% spawn(fun() -> collect_loop() end),
crypto:start(),
+ murmur:start(),
+ fnv:start(),
application:start(dynomite).
collect_loop() ->
process_flag(trap_exit, true),
- Filename = io_lib:format("/home/cliff/~w-dyn.dump", [lib_misc:now_int()]),
+ Filename = io_lib:format("/home/cliff/dumps/~w-dyn.dump", [lib_misc:now_int()]),
sys_info(Filename),
receive
nothing -> ok
- after 15000 -> collect_loop()
+ after 5000 -> collect_loop()
end.
sys_info(Filename) ->
Oops, something went wrong.

0 comments on commit fecb101

Please sign in to comment.