Added test files for a composable hash.

scooby · Jun 16, 2012 · cae9607 · cae9607
1 parent b7f410e
commit cae9607
Show file tree

Hide file tree

Showing 3 changed files with 86 additions and 0 deletions.
diff --git a/python/comp_hash.py b/python/comp_hash.py
@@ -0,0 +1,64 @@
+"""
+Experimenting with a composable hash for a rope class.
+
+What I need is for different orders of concatenations of different flats to not affect the hash value.
+
+Requirements:  h((a . b) . c) == h(a . (b . c))
+Preferred:     h(a . b) != h(b . a)
+Options:  define h(expr) = f(g(expr), len(expr)), so that the length further hashes the result
+
+
+"""
+
+def test(f, g, h):
+    a = "What I need is for different "
+    b = "orders of concatenations of different flats "
+    c = "to not affect the hash value."
+    ab = a + b
+    bc = b + c
+    abc = a + b + c
+    f_a = f(a)
+    f_b = f(b)
+    f_c = f(c)
+    f_ab = f(ab)
+    f_bc = f(bc)
+    f_abc = f(abc)
+    g_fa_fbc = g(f_a, f_bc)
+    g_fab_fc = g(f_ab, f_c)
+    h_fabc = h(f_abc, len(abc))
+    h_a_bc = h(g_fa_fbc, len(abc))
+    h_ab_c = h(g_fab_fc, len(abc))
+    return """f(a)             = %x
+f(b)             = %x
+f(c)             = %x
+f(ab)            = %x
+f(bc)            = %x
+f(abc)           = %x
+g(f(a), f(bc))   = %x
+g(f(ab), f(c))   = %x
+h(f(abc))        = %x
+h(g(f(a), f(bc)) = %x
+h(g(f(ab), f(c)) = %x""" % (f_a, f_b, f_c, f_ab, f_bc, f_abc,
+                            g_fa_fbc, g_fab_fc, h_fabc, h_a_bc,
+                            h_ab_c)
+
+bound = (1 << 32) - 5
+mask = (1 << 32) - 1
+
+def flat(string):
+    x = 1
+    for char in string:
+        x = (x * (2 + ord(char))) % bound
+    return x
+
+def mult(hash_a, hash_b):
+    return (hash_a * hash_b) % bound
+
+def blorg(num, base=1627):
+    """ Takes a value and gets something very large to xor against. """
+    return pow(base, num, bound)
+
+def final(num, length):
+    return (num ^ blorg(length + 4)) & mask
+
+print(test(flat, mult, final))
diff --git a/python/exp_dist.ods b/python/exp_dist.ods
diff --git a/python/powtest.py b/python/powtest.py
@@ -0,0 +1,22 @@
+""" Test how evenly distributed a base is over many values.
+
+Pushing them into bins, a simple linear regression finds no
+bias. Should probably do a chi^2 test.
+"""
+
+base = 1627
+top = 1 << 32
+
+bins = 1024
+vals = [0 for _ in range(0, bins)]
+
+num = 655360
+for p in range(num):
+    vals[pow(base, p, top) >> 22] += 1
+
+count = sum(vals)
+assert count == num
+avg = count / bins
+
+for i, c in enumerate(vals):
+    print("%d\t%d\t%d" % (i, c - avg, c))