Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 136 lines (120 sloc) 4.795 kB
3e346e4 persistent bloom filter
Cliff Moon authored
1 #include <stdint.h>
2 #include <sys/types.h>
3 #include <sys/stat.h>
90974b8 a bloom filter
Cliff Moon authored
4 #include <stdio.h>
3e346e4 persistent bloom filter
Cliff Moon authored
5 #include <fcntl.h>
90974b8 a bloom filter
Cliff Moon authored
6 #include <stdlib.h>
3e346e4 persistent bloom filter
Cliff Moon authored
7 #include <unistd.h>
8 #include <string.h>
90974b8 a bloom filter
Cliff Moon authored
9 #include <math.h>
10 #include "bloom.h"
11 #include "murmur.h"
12
3e346e4 persistent bloom filter
Cliff Moon authored
13 #define BYTE_SIZE(index) ((int)(index/8))
90974b8 a bloom filter
Cliff Moon authored
14 #define BYTE_INDEX(index) ((int)(index/8))
15 #define BIT_INDEX(index) (index % 8)
16
17 #define SET_BIT(buff, index) buff[BYTE_INDEX(index)] |= (1 << BIT_INDEX(index))
18 #define GET_BIT(buff, index) (int)(buff[BYTE_INDEX(index)] & (1 << BIT_INDEX(index)))
19
20
21 //internal function headers
3e346e4 persistent bloom filter
Cliff Moon authored
22 static void read_header(int file, bloom_t *bloom);
23 static void write_bloom(int file, bloom_t *bloom);
90974b8 a bloom filter
Cliff Moon authored
24
3e346e4 persistent bloom filter
Cliff Moon authored
25 bloom_t *bloom_open(char* filename, long n, double e) {
90974b8 a bloom filter
Cliff Moon authored
26 bloom_t *bloom;
3e346e4 persistent bloom filter
Cliff Moon authored
27 int file;
28 uint32_t m;
29 uint32_t version;
30 struct stat file_stat;
90974b8 a bloom filter
Cliff Moon authored
31
b07c67c fix persistence offset issues
Cliff Moon authored
32 // printf("sizeof(bloom_data_t) %d\n", sizeof(bloom_data_t));
33
3e346e4 persistent bloom filter
Cliff Moon authored
34 if (-1 == stat(filename, &file_stat)) {
35 //create a new one
35776ac tests for new interface
Cliff Moon authored
36 // printf("creating new file\n");
3e346e4 persistent bloom filter
Cliff Moon authored
37 if (-1 == (file = open(filename, O_CREAT | O_RDWR, S_IWUSR | S_IRUSR | S_IRGRP | S_IWGRP))) {
38 return NULL;
39 }
40
41 m = (int) ceil(n * log(e) / log(1.0 / pow(2, log(2))));
42 bloom = malloc(sizeof(bloom_t) + BYTE_SIZE(m));
43 bloom->data.n = n;
44 bloom->data.e = e;
45 bloom->data.keys = 0;
46 bloom->data.seed = rand();
47
48 bloom->data.m = m;
49 bloom->data.k = (int) round(log(2) * m / n);
b07c67c fix persistence offset issues
Cliff Moon authored
50 pwrite(file, &bloom->data, sizeof(bloom_data_t) + BYTE_SIZE(m), 0);
3e346e4 persistent bloom filter
Cliff Moon authored
51 } else {
35776ac tests for new interface
Cliff Moon authored
52 // printf("opening existing file\n");
3e346e4 persistent bloom filter
Cliff Moon authored
53 if (-1 == (file = open(filename, O_RDWR))) {
54 return NULL;
55 }
56
57 pread(file, &version, sizeof(uint32_t), 0);
b07c67c fix persistence offset issues
Cliff Moon authored
58 pread(file, &m, sizeof(uint32_t), 4);
59
60 // printf("read version of %d\n", version);
61 // printf("read m of %d\n", m);
3e346e4 persistent bloom filter
Cliff Moon authored
62 bloom = malloc(sizeof(bloom_t) + BYTE_SIZE(m));
63 pread(file, &bloom->data, sizeof(bloom_data_t) + BYTE_SIZE(m), 0);
90974b8 a bloom filter
Cliff Moon authored
64 }
b07c67c fix persistence offset issues
Cliff Moon authored
65 // printf("bloom->data %d\n", (int)&bloom->data);
66 // printf("n %d\n", (int)&bloom->data.n);
67 // printf("keys %d\n", (int)&bloom->data.keys);
68 // printf("version = %d %d\n", bloom->data.version, (( int)&(bloom->data.version) - ( int)&(bloom->data)));
69 // printf("m = %d %d\n", bloom->data.m, (( int)&(bloom->data.m) - ( int)&(bloom->data)));
70 // printf("n = %d %d\n", bloom->data.n, (( int)&(bloom->data.n) - ( int)&(bloom->data)));
71 // printf("e = %f %d\n", bloom->data.e, (( int)&(bloom->data.e) - ( int)&(bloom->data)));
72 // printf("k = %d %d\n", bloom->data.k, (( int)&(bloom->data.k) - ( int)&(bloom->data)));
73 // printf("keys = %d %d\n", bloom->data.keys, (( int)&(bloom->data.keys) - ( int)&(bloom->data)));
74 // printf("seed = %d %d\n", bloom->data.seed, (( int)&(bloom->data.seed) - ( int)&(bloom->data)));
90974b8 a bloom filter
Cliff Moon authored
75 bloom->file = file;
3e346e4 persistent bloom filter
Cliff Moon authored
76 bloom->filename = malloc(strlen(filename) + 1);
77 strcpy(bloom->filename, filename);
90974b8 a bloom filter
Cliff Moon authored
78
79 return bloom;
80 }
81
82 void bloom_put(bloom_t *bloom, char *buff, int len) {
83 int i=0;
3e346e4 persistent bloom filter
Cliff Moon authored
84 unsigned int hash = bloom->data.seed;
90974b8 a bloom filter
Cliff Moon authored
85 unsigned int index;
3e346e4 persistent bloom filter
Cliff Moon authored
86 unsigned int offset;
87 unsigned int byte_index;
90974b8 a bloom filter
Cliff Moon authored
88 // printf("bloom %p\n", bloom);
89 // printf("k %d\n", bloom->k);
3e346e4 persistent bloom filter
Cliff Moon authored
90 for(i=0; i<bloom->data.k; i++) {
90974b8 a bloom filter
Cliff Moon authored
91 hash = MurmurHash2(buff, len, hash);
3e346e4 persistent bloom filter
Cliff Moon authored
92 index = hash % bloom->data.m;
90974b8 a bloom filter
Cliff Moon authored
93 // printf("setting index %d\n", index);
94 // printf("byte %d bit %d\n", BYTE_INDEX(index), BIT_INDEX(index));
3e346e4 persistent bloom filter
Cliff Moon authored
95 byte_index = BYTE_INDEX(index);
96 SET_BIT(bloom->data.bits, index);
b07c67c fix persistence offset issues
Cliff Moon authored
97 // if ((sizeof(bloom_data_t) + byte_index - 1) < 108) {
98 // printf("writing to %d\n", sizeof(bloom_data_t) + byte_index - 1);
99 offset = (unsigned int)&(bloom->data.bits[byte_index]) - (unsigned int)&bloom->data;
100 pwrite(bloom->file, &bloom->data.bits[byte_index], 1, offset);
90974b8 a bloom filter
Cliff Moon authored
101 // printf("byte %d\n", bloom->bits[BYTE_INDEX(index)]);
102 }
3e346e4 persistent bloom filter
Cliff Moon authored
103 bloom->data.keys++;
b07c67c fix persistence offset issues
Cliff Moon authored
104 offset = ((unsigned int)&(bloom->data.keys) - (unsigned int)&(bloom->data));
105 // printf("writing keys into offset %d\n", offset);
3e346e4 persistent bloom filter
Cliff Moon authored
106 pwrite(bloom->file, &(bloom->data.keys), sizeof(uint32_t), offset);
90974b8 a bloom filter
Cliff Moon authored
107 }
108
109 int bloom_has(bloom_t *bloom, char *buff, int len) {
110 int i=0;
3e346e4 persistent bloom filter
Cliff Moon authored
111 unsigned int hash = bloom->data.seed;
90974b8 a bloom filter
Cliff Moon authored
112 unsigned int index;
113 // printf("bloom %p\n", bloom);
114 // printf("k %d\n", bloom->k);
3e346e4 persistent bloom filter
Cliff Moon authored
115 for(i=0; i<bloom->data.k; i++) {
90974b8 a bloom filter
Cliff Moon authored
116 hash = MurmurHash2(buff, len, hash);
3e346e4 persistent bloom filter
Cliff Moon authored
117 index = hash % bloom->data.m;
90974b8 a bloom filter
Cliff Moon authored
118 // printf("getting index %d\n", index);
119 // printf("byte %d bit %d\n", BYTE_INDEX(index), BIT_INDEX(index));
120 // printf("byte %d\n", bloom->bits[BYTE_INDEX(index)]);
121 // printf("get result %d\n", GET_BIT(bloom->bits, index));
3e346e4 persistent bloom filter
Cliff Moon authored
122 if (0 == GET_BIT(bloom->data.bits, index)) {
90974b8 a bloom filter
Cliff Moon authored
123 return 0;
124 }
125 }
126 return 1;
127 }
128
129 void bloom_destroy(bloom_t *bloom) {
3e346e4 persistent bloom filter
Cliff Moon authored
130 if (NULL != bloom) {
131 if (NULL != bloom->filename) free(bloom->filename);
132 if (-1 != bloom->file) close(bloom->file);
133 free(bloom);
134 }
90974b8 a bloom filter
Cliff Moon authored
135 }
Something went wrong with that request. Please try again.