Permalink
Browse files

use interpolation search

  • Loading branch information...
ryancdotorg committed Oct 20, 2015
1 parent db84319 commit d6c1c7cf1d218227542843fa35799e40dc373551
Showing with 55 additions and 63 deletions.
  1. +55 −63 hsearchf.c
View
@@ -16,90 +16,82 @@
#include "hash160.h"
#include "hsearchf.h"
-#define HASHLEN RIPEMD160_DIGEST_LENGTH
-
-//#define USE_FUDGE
+#define HSEARCHF_DEBUG 0
-#define DO_MEMCMP() memcmp(candidate.uc, hash->uc, HASHLEN)
+#define HASHLEN RIPEMD160_DIGEST_LENGTH
-#define MAKE_GUESS(F) do { \
- guess = entries * (ntohl(hash->ul[0]) / 4294967296.0 + (F)); \
+#define RESULT(R) do { \
+ res = R; \
+ goto hsearchf_result; \
} while (0)
-#define READ_AT(X) do { \
- if ((ret = fseek(f, (X) * HASHLEN, 0)) != 0) { return -1; } \
- if ((ret = fread(candidate.uc, HASHLEN, 1, f)) != 1) { return -1; } \
+#define DO_MEMCMP(H) memcmp(H.uc, hash->uc, HASHLEN)
+
+// use fadvise to do a readbehind
+#define READ_AT(X, H) do { \
+ posix_fadvise(fileno(f), ((X*HASHLEN)&0xfffff000)-4096, 8192, POSIX_FADV_WILLNEED); \
+ if ((ret = fseek(f, X * HASHLEN, 0)) != 0) { return -1; } \
+ if ((ret = fread(H.uc, HASHLEN, 1, f)) != 1) { return -1; } \
+ ++i; \
} while (0)
-// could mmapf and use bsearch, but meh
+// interpolation search
int hsearchf(FILE *f, hash160_t *hash) {
- int ret;
- //int i = 0;
+ int ret, res = 0, i = 0;
size_t file_sz;
struct stat sb;
- hash160_t candidate;
- //unsigned char hexed[64];
- int first, middle, last, entries;
+ hash160_t low_h, mid_h, high_h;
+ int64_t low_e, mid_e, high_e, entries;
+ int64_t vlow, vhigh, vtarget;
+
+#if HSEARCHF_DEBUG > 0
+ unsigned char hexed[64];
+#endif
if ((ret = fstat(fileno(f), &sb)) != 0) { return -1; }
file_sz = sb.st_size;
entries = file_sz / HASHLEN;
- first = 0;
- last = entries - 1;
-
-#ifdef USE_FUDGE
- // Most of the time, this is able to eliminate about nine reads because
- // the values are fairly uniform. Worst case, we make two extra reads, so
- // this is a win so long as it works at least a quarter of the time.
- // Playing with the denominators on the MAKE_GUESS macros may give slight
- // improvements.
- int guess;
- // make an initial guess at the location of the hash based on the target
- MAKE_GUESS(1.0/+2048);
- READ_AT(guess);
- //fprintf(stderr, "entries %10u guess %9u %s\n", hash->ul[0], guess, hex(candidate.uc, HASHLEN, hexed, sizeof(hexed)));
- ret = DO_MEMCMP();
-
- if (ret == 0) {
- return 1; // unlikely
- } else if (ret < 0) {
- first = guess;
- MAKE_GUESS(1.0/+1024);
- if (guess < entries) {
- READ_AT(guess);
- if (DO_MEMCMP() > 0) { last = guess; }
- }
- } else { // ret > 0
- last = guess;
- MAKE_GUESS(1.0/-32768);
- if (guess > 0) {
- READ_AT(guess);
- if (DO_MEMCMP() < 0) { first = guess; }
- }
- }
+ low_e = 0;
+ high_e = entries - 1;
+
+ vtarget = ntohl(hash->ul[0]);
+ memset(low_h.uc, 0x00, HASHLEN);
+ memset(high_h.uc, 0xff, HASHLEN);
+
+ // this tries to minimize reads, but does a few extra comparisons
+ while (low_e != high_e &&
+ memcmp(hash->uc, low_h.uc, HASHLEN) > 0 &&
+ memcmp(hash->uc, high_h.uc, HASHLEN) < 0) {
+ vlow = ntohl(low_h.ul[0]); vhigh = ntohl(high_h.ul[0]);
+ mid_e = low_e + (vtarget - vlow) * (high_e - low_e) / (vhigh - vlow);
+ READ_AT(mid_e, mid_h);
+ ret = DO_MEMCMP(mid_h);
+
+#if HSEARCHF_DEBUG > 1
+ fprintf(stderr, "target %s checking %9jd %9jd %9jd",
+ hex(hash->uc, HASHLEN, hexed, sizeof(hexed)), low_e, mid_e, high_e);
+ fprintf(stderr, " got %s %11d %2d\n",
+ hex(mid_h.uc, HASHLEN, hexed, sizeof(hexed)), ret, i);
#endif
- middle = (first + last) / 2;
-
- // binary search
- while (first <= last) {
- //fprintf(stderr, "target %s, checking entry %9u", hex(hash->uc, HASHLEN, hexed, sizeof(hexed)), middle);
- READ_AT(middle);
- ret = DO_MEMCMP();
- //fprintf(stderr, " got %s %11d %2d\n", hex(candidate.uc, HASHLEN, hexed, sizeof(hexed)), ret, ++i);
-
if (ret == 0) {
- return 1;
+ RESULT(1);
} else if (ret < 0) {
- first = middle + 1;
+ low_e = mid_e + 1;
+ READ_AT(low_e, low_h);
+ if (DO_MEMCMP(low_h) == 0) { RESULT(1); }
} else { // ret > 0
- last = middle - 1;
+ high_e = mid_e - 1;
+ READ_AT(high_e, high_h);
+ if (DO_MEMCMP(high_h) == 0) { RESULT(1); }
}
-
- middle = (first + last) / 2;
}
- return 0;
+hsearchf_result:
+#if HSEARCHF_DEBUG > 0
+ fprintf(stderr, "target: %s reads: %3d result: %d\n", hex(hash->uc, HASHLEN, hexed, sizeof(hexed)), i, res);
+#endif
+ return res;
}
/* vim: set ts=2 sw=2 et ai si: */

0 comments on commit d6c1c7c

Please sign in to comment.