Skip to content

Commit

Permalink
fix persistence offset issues
Browse files Browse the repository at this point in the history
  • Loading branch information
Cliff Moon committed Apr 27, 2009
1 parent 22d4215 commit b07c67c
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 10 deletions.
27 changes: 23 additions & 4 deletions c/bloom.c
Expand Up @@ -29,6 +29,8 @@ bloom_t *bloom_open(char* filename, long n, double e) {
uint32_t version; uint32_t version;
struct stat file_stat; struct stat file_stat;


// printf("sizeof(bloom_data_t) %d\n", sizeof(bloom_data_t));

if (-1 == stat(filename, &file_stat)) { if (-1 == stat(filename, &file_stat)) {
//create a new one //create a new one
// printf("creating new file\n"); // printf("creating new file\n");
Expand All @@ -45,18 +47,31 @@ bloom_t *bloom_open(char* filename, long n, double e) {


bloom->data.m = m; bloom->data.m = m;
bloom->data.k = (int) round(log(2) * m / n); bloom->data.k = (int) round(log(2) * m / n);
pwrite(file, bloom, sizeof(bloom_t) + BYTE_SIZE(m), 0); pwrite(file, &bloom->data, sizeof(bloom_data_t) + BYTE_SIZE(m), 0);
} else { } else {
// printf("opening existing file\n"); // printf("opening existing file\n");
if (-1 == (file = open(filename, O_RDWR))) { if (-1 == (file = open(filename, O_RDWR))) {
return NULL; return NULL;
} }


pread(file, &version, sizeof(uint32_t), 0); pread(file, &version, sizeof(uint32_t), 0);
pread(file, &m, sizeof(uint32_t), sizeof(uint32_t)); pread(file, &m, sizeof(uint32_t), 4);

// printf("read version of %d\n", version);
// printf("read m of %d\n", m);
bloom = malloc(sizeof(bloom_t) + BYTE_SIZE(m)); bloom = malloc(sizeof(bloom_t) + BYTE_SIZE(m));
pread(file, &bloom->data, sizeof(bloom_data_t) + BYTE_SIZE(m), 0); pread(file, &bloom->data, sizeof(bloom_data_t) + BYTE_SIZE(m), 0);
} }
// printf("bloom->data %d\n", (int)&bloom->data);
// printf("n %d\n", (int)&bloom->data.n);
// printf("keys %d\n", (int)&bloom->data.keys);
// printf("version = %d %d\n", bloom->data.version, (( int)&(bloom->data.version) - ( int)&(bloom->data)));
// printf("m = %d %d\n", bloom->data.m, (( int)&(bloom->data.m) - ( int)&(bloom->data)));
// printf("n = %d %d\n", bloom->data.n, (( int)&(bloom->data.n) - ( int)&(bloom->data)));
// printf("e = %f %d\n", bloom->data.e, (( int)&(bloom->data.e) - ( int)&(bloom->data)));
// printf("k = %d %d\n", bloom->data.k, (( int)&(bloom->data.k) - ( int)&(bloom->data)));
// printf("keys = %d %d\n", bloom->data.keys, (( int)&(bloom->data.keys) - ( int)&(bloom->data)));
// printf("seed = %d %d\n", bloom->data.seed, (( int)&(bloom->data.seed) - ( int)&(bloom->data)));
bloom->file = file; bloom->file = file;
bloom->filename = malloc(strlen(filename) + 1); bloom->filename = malloc(strlen(filename) + 1);
strcpy(bloom->filename, filename); strcpy(bloom->filename, filename);
Expand All @@ -79,11 +94,15 @@ void bloom_put(bloom_t *bloom, char *buff, int len) {
// printf("byte %d bit %d\n", BYTE_INDEX(index), BIT_INDEX(index)); // printf("byte %d bit %d\n", BYTE_INDEX(index), BIT_INDEX(index));
byte_index = BYTE_INDEX(index); byte_index = BYTE_INDEX(index);
SET_BIT(bloom->data.bits, index); SET_BIT(bloom->data.bits, index);
pwrite(bloom->file, &bloom->data.bits[byte_index], 1, sizeof(bloom_t) + byte_index - 1); // if ((sizeof(bloom_data_t) + byte_index - 1) < 108) {
// printf("writing to %d\n", sizeof(bloom_data_t) + byte_index - 1);
offset = (unsigned int)&(bloom->data.bits[byte_index]) - (unsigned int)&bloom->data;
pwrite(bloom->file, &bloom->data.bits[byte_index], 1, offset);
// printf("byte %d\n", bloom->bits[BYTE_INDEX(index)]); // printf("byte %d\n", bloom->bits[BYTE_INDEX(index)]);
} }
bloom->data.keys++; bloom->data.keys++;
offset = ((unsigned int)&(bloom->data.keys) - (unsigned int)bloom); offset = ((unsigned int)&(bloom->data.keys) - (unsigned int)&(bloom->data));
// printf("writing keys into offset %d\n", offset);
pwrite(bloom->file, &(bloom->data.keys), sizeof(uint32_t), offset); pwrite(bloom->file, &(bloom->data.keys), sizeof(uint32_t), offset);
} }


Expand Down
12 changes: 6 additions & 6 deletions c/bloom.h
@@ -1,11 +1,11 @@


typedef struct _bloom_data_t { typedef struct _bloom_data_t {
uint32_t version; uint32_t version; //0
uint32_t m; uint32_t m; //4
uint64_t n; uint64_t n; //8
double e; double e; //
uint32_t k; uint32_t k; //
uint64_t keys; uint64_t keys; //3
uint32_t seed; uint32_t seed;
char reserved[64]; char reserved[64];
char bits[1]; char bits[1];
Expand Down
27 changes: 27 additions & 0 deletions etest/bloom_test.erl
Expand Up @@ -37,6 +37,33 @@ false_positive_error_rate_test() ->
?assertEqual(10000, bloom:key_size(Bloom)), ?assertEqual(10000, bloom:key_size(Bloom)),
bloom:stop(Bloom). bloom:stop(Bloom).


persist_test() ->
file:delete(data_file()),
{ok, Bloom} = bloom:start(data_file(), 10000, 0.001),
Keys = lists:map(fun(N) ->
Key = "Key" ++ float_to_list(random:uniform()),
bloom:put(Bloom, Key),
Key
end, lists:seq(1, 10000)),
?debugMsg("got keys"),
bloom:stop(Bloom),
?debugMsg("stopping bloom"),
{ok, Bloom2} = bloom:start(data_file(), 10000, 0.001),
?debugMsg("restarted"),
FalsePositives = [X || X <- [bloom:has(Bloom2, "butt" ++ float_to_list(random:uniform())) || N <- lists:seq(1,10000)], X == true],
FPRate = length(FalsePositives) / 10000,
?debugFmt("false positives: ~p", [length(FalsePositives)]),
?debugFmt("false positives: ~p", [FPRate]),
?debugFmt("mem size ~p", [bloom:mem_size(Bloom2)]),
?assert(FPRate < 0.001),
?assertEqual(10000, bloom:key_size(Bloom2)),
TruePositives = [X || X <- [bloom:has(Bloom2, Key) || Key <- Keys], X == true],
?debugFmt("true positives: ~p", [length(TruePositives)]),
?debugFmt("keys ~p", [length(Keys)]),
TPRate = length(TruePositives) / 10000,
?assertEqual(1.0, TPRate),
bloom:stop(Bloom2).

priv_dir() -> priv_dir() ->
Dir = filename:join(t:config(priv_dir), "data"), Dir = filename:join(t:config(priv_dir), "data"),
filelib:ensure_dir(filename:join(Dir, "bloom")), filelib:ensure_dir(filename:join(Dir, "bloom")),
Expand Down

0 comments on commit b07c67c

Please sign in to comment.