Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Fork pgvihash and convert to return int64.

  • Loading branch information...
commit c12588b0357783509809dcdc50741293412f2c07 0 parents
@theory authored
13 INSTALL
@@ -0,0 +1,13 @@
+Simple installation:
+
+ make
+ make install
+
+If you have multiple versions installed, set the environment variable
+PG_CONFIG to the pg_config program that corresponds to the
+installation. For example, in a Debian setup:
+
+ make PG_CONFIG=/usr/lib/postgresql/8.4/bin/pg_config
+ make install PG_CONFIG=/usr/lib/postgresql/8.4/bin/pg_config
+
+To run the test suite, use make installcheck.
22 LICENSE
@@ -0,0 +1,22 @@
+Portions Copyright (c) 2010, Peter Eisentraut
+
+Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
+
+Portions Copyright (c) 1994, The Regents of the University of California
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose, without fee, and without a written agreement
+is hereby granted, provided that the above copyright notice and this
+paragraph and the following two paragraphs appear in all copies.
+
+IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
+DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
+LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
+DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
+INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
+AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
+ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO
+PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
8 Makefile
@@ -0,0 +1,8 @@
+MODULES = vihash
+DATA_built = vihash.sql
+DATA = uninstall_vihash.sql
+REGRESS = vihash
+
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
45 README
@@ -0,0 +1,45 @@
+Version-independent hash functions for PostgreSQL
+=================================================
+
+This package contains a PostgreSQL extension function vihashtext()
+that is similar to the built-in hashtext() but is guaranteed not to
+change over major releases, so that it can be used safely for
+partitioning or sharding, for example with PL/Proxy, in installations
+involving multiple PostgreSQL major versions.
+
+A PL/Proxy function usually looks something like this:
+
+CREATE FUNCTION ...
+LANGUAGE plproxy
+AS $$
+ CLUSTER '...';
+ RUN ON hashtext(arg);
+ SELECT ...
+$$;
+
+The problem is that the hashtext() function is an implementation
+detail of PostgreSQL and may produce different results for the same
+input in different PostgreSQL major versions. When that happens, you
+cannot upgrade your PL/Proxy installation to a new major PostgreSQL
+version without redistributing the data. A change like this has in
+particular happened between PostgreSQL 8.3 and 8.4. See also
+<http://petereisentraut.blogspot.com/2009/10/attention-plproxy-users-hash-functions.html>.
+
+This package basically just wraps the hashtext() function from
+PostgreSQL 8.3 into a separate module. This way, you have a hash
+function that is backward compatible to the built-in hashtext() from
+PostgreSQL 8.0 through 8.3, and that can be used without concerns in
+the future. If you are already using PostgreSQL 8.4 or later, you
+cannot change to use this package without redistributing your data.
+
+To use it, install it into the database by running
+
+\i vihash.sql
+
+and replace
+
+ RUN ON hashtext(arg);
+
+by
+
+ RUN ON vihashtext(arg);
35 expected/vihash.out
@@ -0,0 +1,35 @@
+\i vihash.sql
+SET search_path = public;
+CREATE OR REPLACE FUNCTION vihashtext(text) RETURNS int
+AS '$libdir/vihash'
+STRICT IMMUTABLE
+LANGUAGE C;
+CREATE TABLE values (x text);
+COPY values FROM stdin;
+SELECT x, vihashtext(x) FROM values ORDER BY x;
+ x | vihashtext
+------------------------------------------+-------------
+ 0daa7934b6c9652fe55bd95ba58d7b6f87bb459a | -1924906263
+ 2315cad49156618409f050c3e00fb0c85111b512 | 871673209
+ 2de24e276386a72b5992746de8abc2e47ac47cae | 758286169
+ 42c2b8a22cab3fc71b7891738deb5179fa8acf6e | 715258594
+ 4d2a6da43e8f6a686c5043bc1d4fdfa4439ae009 | -1722713656
+ 51108fa5f34b112a6f067effdbe8169c7ed65fa8 | -117518199
+ 51d064e012e2d73fc67020c27471fd913688d4f6 | -1767487240
+ 5e68fbd76d795e29c103c822b8e15bc38c4dc7d2 | 284919874
+ 60c94aae40a4b59e78d3fceb62402d44512d0e08 | -324638288
+ 8f274e341a12737612d213fca68d75e0d157d9af | 811040808
+ 94a57e57d1cd446830dad3976c8030caa6b27f30 | 1165161974
+ 9d503bde29403f6b9bdba366c2e1cbed6f37001d | 1932212666
+ a069aff5802a60049555b7e814d9a55274d86b85 | 1803707486
+ a32d67aad65b217aebb73e0b0051576ab8ae2202 | -92880232
+ acc8a7e79936892d202418e2241c68767b8934ce | 713166768
+ b093f0ebcf84556b4c6c25293c3dea9034c47134 | -961334026
+ cb2b985b634593cb1d2a8eb624cd39cbc3b157fe | -1402537744
+ dc68143621155fedccfa35a6b3d1d20f4b7a06ec | 595597921
+ e864880b82aa65d84dcf854b63bc11f345915984 | 1078029294
+ e95834d58502eef64493655abc380bd7bdc5092b | -1172968319
+ f7ebce0490a3e3c4748a1affe2bc31c3f4937e60 | -766249494
+ f9cad6f96effaa1b9d52c42d380d90650c584874 | 5979018
+(22 rows)
+
35 results/vihash.out
@@ -0,0 +1,35 @@
+\i vihash.sql
+SET search_path = public;
+CREATE OR REPLACE FUNCTION vihashtext(text) RETURNS bigint
+AS '$libdir/vihash'
+STRICT IMMUTABLE
+LANGUAGE C;
+CREATE TABLE values (x text);
+COPY values FROM stdin;
+SELECT x, vihashtext(x) FROM values ORDER BY x;
+ x | vihashtext
+------------------------------------------+----------------------
+ 0daa7934b6c9652fe55bd95ba58d7b6f87bb459a | 3034666371711206838
+ 2315cad49156618409f050c3e00fb0c85111b512 | 388078559820946772
+ 2de24e276386a72b5992746de8abc2e47ac47cae | 9135372853080328717
+ 42c2b8a22cab3fc71b7891738deb5179fa8acf6e | -918990046352584740
+ 4d2a6da43e8f6a686c5043bc1d4fdfa4439ae009 | -1678654894378952720
+ 51108fa5f34b112a6f067effdbe8169c7ed65fa8 | -6119007386423907673
+ 51d064e012e2d73fc67020c27471fd913688d4f6 | 1521621210454236105
+ 5e68fbd76d795e29c103c822b8e15bc38c4dc7d2 | 5853740058471931571
+ 60c94aae40a4b59e78d3fceb62402d44512d0e08 | 8953876759192132724
+ 8f274e341a12737612d213fca68d75e0d157d9af | 5125388575237978742
+ 94a57e57d1cd446830dad3976c8030caa6b27f30 | -3967839149564995146
+ 9d503bde29403f6b9bdba366c2e1cbed6f37001d | 2378079294917463068
+ a069aff5802a60049555b7e814d9a55274d86b85 | 2146377480881039868
+ a32d67aad65b217aebb73e0b0051576ab8ae2202 | 5763434209275758087
+ acc8a7e79936892d202418e2241c68767b8934ce | -6180389030194858780
+ b093f0ebcf84556b4c6c25293c3dea9034c47134 | -6875526042952961938
+ cb2b985b634593cb1d2a8eb624cd39cbc3b157fe | 8976487453099947332
+ dc68143621155fedccfa35a6b3d1d20f4b7a06ec | 3067258912523554511
+ e864880b82aa65d84dcf854b63bc11f345915984 | -3450594930267918689
+ e95834d58502eef64493655abc380bd7bdc5092b | -6324448229207784613
+ f7ebce0490a3e3c4748a1affe2bc31c3f4937e60 | -4817165975068141520
+ f9cad6f96effaa1b9d52c42d380d90650c584874 | 2241267823073115581
+(22 rows)
+
29 sql/vihash.sql
@@ -0,0 +1,29 @@
+\i vihash.sql
+
+CREATE TABLE values (x text);
+COPY values FROM stdin;
+0daa7934b6c9652fe55bd95ba58d7b6f87bb459a
+2315cad49156618409f050c3e00fb0c85111b512
+2de24e276386a72b5992746de8abc2e47ac47cae
+42c2b8a22cab3fc71b7891738deb5179fa8acf6e
+4d2a6da43e8f6a686c5043bc1d4fdfa4439ae009
+51108fa5f34b112a6f067effdbe8169c7ed65fa8
+51d064e012e2d73fc67020c27471fd913688d4f6
+5e68fbd76d795e29c103c822b8e15bc38c4dc7d2
+60c94aae40a4b59e78d3fceb62402d44512d0e08
+8f274e341a12737612d213fca68d75e0d157d9af
+94a57e57d1cd446830dad3976c8030caa6b27f30
+9d503bde29403f6b9bdba366c2e1cbed6f37001d
+a069aff5802a60049555b7e814d9a55274d86b85
+a32d67aad65b217aebb73e0b0051576ab8ae2202
+acc8a7e79936892d202418e2241c68767b8934ce
+b093f0ebcf84556b4c6c25293c3dea9034c47134
+cb2b985b634593cb1d2a8eb624cd39cbc3b157fe
+dc68143621155fedccfa35a6b3d1d20f4b7a06ec
+e864880b82aa65d84dcf854b63bc11f345915984
+e95834d58502eef64493655abc380bd7bdc5092b
+f7ebce0490a3e3c4748a1affe2bc31c3f4937e60
+f9cad6f96effaa1b9d52c42d380d90650c584874
+\.
+
+SELECT x, vihashtext(x) FROM values ORDER BY x;
1  uninstall_vihash.sql
@@ -0,0 +1 @@
+DROP FUNCTION IF EXISTS vihashtext(text);
348 vihash.c
@@ -0,0 +1,348 @@
+/*
+ * vihash
+ *
+ * Portions Copyright (c) 2010, Peter Eisentraut
+ * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ */
+
+#include "postgres.h"
+#include "fmgr.h"
+
+PG_MODULE_MAGIC;
+
+
+PG_FUNCTION_INFO_V1(vihashtext);
+
+Datum vihashtext(PG_FUNCTION_ARGS);
+Datum vihash_any(register const unsigned char *k, register int keylen);
+
+
+Datum
+vihashtext(PG_FUNCTION_ARGS)
+{
+ text *key = PG_GETARG_TEXT_PP(0);
+ Datum result;
+
+ /*
+ * Note: this is currently identical in behavior to hashvarlena, but keep
+ * it as a separate function in case we someday want to do something
+ * different in non-C locales. (See also hashbpchar, if so.)
+ */
+ result = vihash_any((unsigned char *) VARDATA_ANY(key),
+ VARSIZE_ANY_EXHDR(key));
+
+ /* Avoid leaking memory for toasted inputs */
+ PG_FREE_IF_COPY(key, 0);
+
+ return result;
+}
+
+/*
+ * This hash function was written by Bob Jenkins
+ * (bob_jenkins@burtleburtle.net), and superficially adapted
+ * for PostgreSQL by Neil Conway. For more information on this
+ * hash function, see http://burtleburtle.net/bob/hash/doobs.html,
+ * or Bob's article in Dr. Dobb's Journal, Sept. 1997.
+ *
+ * In the current code, we have adopted Bob's 2006 update of his hash
+ * function to fetch the data a word at a time when it is suitably aligned.
+ * This makes for a useful speedup, at the cost of having to maintain
+ * four code paths (aligned vs unaligned, and little-endian vs big-endian).
+ * It also uses two separate mixing functions mix() and final(), instead
+ * of a slower multi-purpose function.
+ */
+
+/* Get a bit mask of the bits set in non-uint32 aligned addresses */
+#define UINT32_ALIGN_MASK (sizeof(uint32) - 1)
+
+/* Rotate a uint32 value left by k bits - note multiple evaluation! */
+#define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
+
+/*----------
+ * mix -- mix 3 32-bit values reversibly.
+ *
+ * This is reversible, so any information in (a,b,c) before mix() is
+ * still in (a,b,c) after mix().
+ *
+ * If four pairs of (a,b,c) inputs are run through mix(), or through
+ * mix() in reverse, there are at least 32 bits of the output that
+ * are sometimes the same for one pair and different for another pair.
+ * This was tested for:
+ * * pairs that differed by one bit, by two bits, in any combination
+ * of top bits of (a,b,c), or in any combination of bottom bits of
+ * (a,b,c).
+ * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ * is commonly produced by subtraction) look like a single 1-bit
+ * difference.
+ * * the base values were pseudorandom, all zero but one bit set, or
+ * all zero plus a counter that starts at zero.
+ *
+ * This does not achieve avalanche. There are input bits of (a,b,c)
+ * that fail to affect some output bits of (a,b,c), especially of a. The
+ * most thoroughly mixed value is c, but it doesn't really even achieve
+ * avalanche in c.
+ *
+ * This allows some parallelism. Read-after-writes are good at doubling
+ * the number of bits affected, so the goal of mixing pulls in the opposite
+ * direction from the goal of parallelism. I did what I could. Rotates
+ * seem to cost as much as shifts on every machine I could lay my hands on,
+ * and rotates are much kinder to the top and bottom bits, so I used rotates.
+ *----------
+ */
+#define mix(a,b,c) \
+{ \
+ a -= c; a ^= rot(c, 4); c += b; \
+ b -= a; b ^= rot(a, 6); a += c; \
+ c -= b; c ^= rot(b, 8); b += a; \
+ a -= c; a ^= rot(c,16); c += b; \
+ b -= a; b ^= rot(a,19); a += c; \
+ c -= b; c ^= rot(b, 4); b += a; \
+}
+
+/*----------
+ * final -- final mixing of 3 32-bit values (a,b,c) into c
+ *
+ * Pairs of (a,b,c) values differing in only a few bits will usually
+ * produce values of c that look totally different. This was tested for
+ * * pairs that differed by one bit, by two bits, in any combination
+ * of top bits of (a,b,c), or in any combination of bottom bits of
+ * (a,b,c).
+ * * "differ" is defined as +, -, ^, or ~^. For + and -, I transformed
+ * the output delta to a Gray code (a^(a>>1)) so a string of 1's (as
+ * is commonly produced by subtraction) look like a single 1-bit
+ * difference.
+ * * the base values were pseudorandom, all zero but one bit set, or
+ * all zero plus a counter that starts at zero.
+ *
+ * The use of separate functions for mix() and final() allow for a
+ * substantial performance increase since final() does not need to
+ * do well in reverse, but is does need to affect all output bits.
+ * mix(), on the other hand, does not need to affect all output
+ * bits (affecting 32 bits is enough). The original hash function had
+ * a single mixing operation that had to satisfy both sets of requirements
+ * and was slower as a result.
+ *----------
+ */
+#define final(a,b,c) \
+{ \
+ c ^= b; c -= rot(b,14); \
+ a ^= c; a -= rot(c,11); \
+ b ^= a; b -= rot(a,25); \
+ c ^= b; c -= rot(b,16); \
+ a ^= c; a -= rot(c, 4); \
+ b ^= a; b -= rot(a,14); \
+ c ^= b; c -= rot(b,24); \
+}
+
+/*
+ * hash_any() -- hash a variable-length key into a 32-bit value
+ * k : the key (the unaligned variable-length array of bytes)
+ * len : the length of the key, counting by bytes
+ *
+ * Returns a uint32 value. Every bit of the key affects every bit of
+ * the return value. Every 1-bit and 2-bit delta achieves avalanche.
+ * About 6*len+35 instructions. The best hash table sizes are powers
+ * of 2. There is no need to do mod a prime (mod is sooo slow!).
+ * If you need less than 32 bits, use a bitmask.
+ */
+Datum
+vihash_any(register const unsigned char *k, register int keylen)
+{
+ register uint32 a,
+ b,
+ c,
+ len;
+
+ /* Set up the internal state */
+ len = keylen;
+ a = b = c = 0x9e3779b9 + len + 3923095;
+
+ /* If the source pointer is word-aligned, we use word-wide fetches */
+ if (((intptr_t) k & UINT32_ALIGN_MASK) == 0)
+ {
+ /* Code path for aligned source data */
+ register const uint32 *ka = (const uint32 *) k;
+
+ /* handle most of the key */
+ while (len >= 12)
+ {
+ a += ka[0];
+ b += ka[1];
+ c += ka[2];
+ mix(a, b, c);
+ ka += 3;
+ len -= 12;
+ }
+
+ /* handle the last 11 bytes */
+ k = (const unsigned char *) ka;
+#ifdef WORDS_BIGENDIAN
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 8);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 24);
+ /* the lowest byte of c is reserved for the length */
+ /* fall through */
+ case 8:
+ b += ka[1];
+ a += ka[0];
+ break;
+ case 7:
+ b += ((uint32) k[6] << 8);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 16);
+ /* fall through */
+ case 5:
+ b += ((uint32) k[4] << 24);
+ /* fall through */
+ case 4:
+ a += ka[0];
+ break;
+ case 3:
+ a += ((uint32) k[2] << 8);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 16);
+ /* fall through */
+ case 1:
+ a += ((uint32) k[0] << 24);
+ /* case 0: nothing left to add */
+ }
+#else /* !WORDS_BIGENDIAN */
+ switch (len)
+ {
+ case 11:
+ c += ((uint32) k[10] << 24);
+ /* fall through */
+ case 10:
+ c += ((uint32) k[9] << 16);
+ /* fall through */
+ case 9:
+ c += ((uint32) k[8] << 8);
+ /* the lowest byte of c is reserved for the length */
+ /* fall through */
+ case 8:
+ b += ka[1];
+ a += ka[0];
+ break;
+ case 7:
+ b += ((uint32) k[6] << 16);
+ /* fall through */
+ case 6:
+ b += ((uint32) k[5] << 8);
+ /* fall through */
+ case 5:
+ b += k[4];
+ /* fall through */
+ case 4:
+ a += ka[0];
+ break;
+ case 3:
+ a += ((uint32) k[2] << 16);
+ /* fall through */
+ case 2:
+ a += ((uint32) k[1] << 8);
+ /* fall through */
+ case 1:
+ a += k[0];
+ /* case 0: nothing left to add */
+ }
+#endif /* WORDS_BIGENDIAN */
+ }
+ else
+ {
+ /* Code path for non-aligned source data */
+
+ /* handle most of the key */
+ while (len >= 12)
+ {
+#ifdef WORDS_BIGENDIAN
+ a += (k[3] + ((uint32) k[2] << 8) + ((uint32) k[1] << 16) + ((uint32) k[0] << 24));
+ b += (k[7] + ((uint32) k[6] << 8) + ((uint32) k[5] << 16) + ((uint32) k[4] << 24));
+ c += (k[11] + ((uint32) k[10] << 8) + ((uint32) k[9] << 16) + ((uint32) k[8] << 24));
+#else /* !WORDS_BIGENDIAN */
+ a += (k[0] + ((uint32) k[1] << 8) + ((uint32) k[2] << 16) + ((uint32) k[3] << 24));
+ b += (k[4] + ((uint32) k[5] << 8) + ((uint32) k[6] << 16) + ((uint32) k[7] << 24));
+ c += (k[8] + ((uint32) k[9] << 8) + ((uint32) k[10] << 16) + ((uint32) k[11] << 24));
+#endif /* WORDS_BIGENDIAN */
+ mix(a, b, c);
+ k += 12;
+ len -= 12;
+ }
+
+ /* handle the last 11 bytes */
+#ifdef WORDS_BIGENDIAN
+ switch (len) /* all the case statements fall through */
+ {
+ case 11:
+ c += ((uint32) k[10] << 8);
+ case 10:
+ c += ((uint32) k[9] << 16);
+ case 9:
+ c += ((uint32) k[8] << 24);
+ /* the lowest byte of c is reserved for the length */
+ case 8:
+ b += k[7];
+ case 7:
+ b += ((uint32) k[6] << 8);
+ case 6:
+ b += ((uint32) k[5] << 16);
+ case 5:
+ b += ((uint32) k[4] << 24);
+ case 4:
+ a += k[3];
+ case 3:
+ a += ((uint32) k[2] << 8);
+ case 2:
+ a += ((uint32) k[1] << 16);
+ case 1:
+ a += ((uint32) k[0] << 24);
+ /* case 0: nothing left to add */
+ }
+#else /* !WORDS_BIGENDIAN */
+ switch (len) /* all the case statements fall through */
+ {
+ case 11:
+ c += ((uint32) k[10] << 24);
+ case 10:
+ c += ((uint32) k[9] << 16);
+ case 9:
+ c += ((uint32) k[8] << 8);
+ /* the lowest byte of c is reserved for the length */
+ case 8:
+ b += ((uint32) k[7] << 24);
+ case 7:
+ b += ((uint32) k[6] << 16);
+ case 6:
+ b += ((uint32) k[5] << 8);
+ case 5:
+ b += k[4];
+ case 4:
+ a += ((uint32) k[3] << 24);
+ case 3:
+ a += ((uint32) k[2] << 16);
+ case 2:
+ a += ((uint32) k[1] << 8);
+ case 1:
+ a += k[0];
+ /* case 0: nothing left to add */
+ }
+#endif /* WORDS_BIGENDIAN */
+ }
+
+ final(a, b, c);
+
+ /* report the result */
+ /* return UInt32GetDatum(c); */
+ return Int64GetDatum((uint64) b |
+ (((uint64) c) << (sizeof(uint64) / 2) * BITS_PER_BYTE));
+}
6 vihash.sql.in
@@ -0,0 +1,6 @@
+SET search_path = public;
+
+CREATE OR REPLACE FUNCTION vihashtext(text) RETURNS bigint
+AS 'MODULE_PATHNAME'
+STRICT IMMUTABLE
+LANGUAGE C;
Please sign in to comment.
Something went wrong with that request. Please try again.