-
Notifications
You must be signed in to change notification settings - Fork 0
/
select8_subarray.cpp
126 lines (108 loc) · 3.42 KB
/
select8_subarray.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#include <sdsl/bit_vectors.hpp>
#include <sdsl/int_vector.hpp>
#include <string>
#include <vector>
#include <ctime>
#include <chrono>
#include <iterator>
#include <iostream>
#include <bitset>
#include "vbyte_helpers.hpp"
#include <valgrind/callgrind.h>
using namespace std;
using namespace sdsl;
uint32_t bit_length = 8;
unsigned int random_accesses = 1000000;
uint32_t cap = 1<<bit_length;
int main(int argc, char *argv[]) {
if(argc<=1) {
cerr << "Give uint64_t file as parameter" << endl;
return 1;
}
vector<uint64_t> original = read_data_from_file(argv[1]);
vector<uint32_t> data;
for(vector<uint64_t>::const_iterator i = original.begin(); i != original.end(); i++) {
vector<uint32_t> v = vb_encode_number(*i, cap);
v.back() += cap;
data.insert(data.end(), v.begin(), v.end());
}
bit_vector b(data.size(), 0);
uint8_t *iv = new uint8_t[data.size()];
size_t index = 0;
for (vector<uint32_t>::const_iterator i = data.begin(); i != data.end(); i++, index++) {
b[index] = (*i>>bit_length) & 1;
iv[index] = *i;
}
bit_vector::select_1_type sls(&b);
srand((unsigned) time(0));
vector<unsigned int> indices(random_accesses, 0);
for(vector<uint64_t>::size_type i = 0; i < indices.size(); i++) {
// easy way to avoid index out of bounds
indices[i] = rand()%(original.size()-50);
}
uint64_t z = 0;
uint64_t *test;
uint8_t diff;
uint64_t val;
int bsize = 64;
cout << "bsize" << bsize << endl;
uint64_t bitmasks[8];
val = 0;
for(int i = 0;i < 8; i++) {
val = val << 8;
val = val | 0xFF;
bitmasks[i] = val;
}
uint8_t loopcount = 50;
chrono::steady_clock::time_point time_begin = chrono::steady_clock::now();
for (vector<unsigned int>::const_iterator i = indices.begin(); i != indices.end(); i++) {
index = *i;
int begin = index == 0 ? 0 : sls(index)+1;
int offset = (begin)%bsize;
int block = begin>>6;
uint64_t* blockpointer = b.data()+block;
uint64_t blokki = *blockpointer;
uint64_t bit_offset = blokki >> offset;
int total_diff = offset;
if(!bit_offset) {
bit_offset = *(++blockpointer);
diff = bits::lo(bit_offset);
bit_offset = bit_offset >> (diff+1);
total_diff = diff+1;
diff = diff + (64-offset);
}
else {
diff = bits::lo(bit_offset);
bit_offset = bit_offset >> (diff+1);
total_diff = total_diff + diff+1;
}
for(int counter=0;counter<loopcount;counter++) {
test = (uint64_t *)&iv[begin];
val = *test & bitmasks[diff];
// the following is for accessing the value so it's not optimized out
z = z^val;
// sanity check for debugging the values
if(0 && val != original[index+counter]) {
cout << "Did not match: " << val << " vs " << original[index+counter] << endl;
}
begin = begin+diff+1;
if(!bit_offset) {
bit_offset = *(++blockpointer);
diff = bits::lo(bit_offset);
int tempdiff = diff+1;
bit_offset = bit_offset >> (diff+1);
diff = diff + (64-total_diff);
total_diff = tempdiff;
}
else {
diff = bits::lo(bit_offset);
bit_offset = bit_offset >> (diff+1);
total_diff = total_diff + diff + 1;
}
}
}
chrono::steady_clock::time_point time_end = chrono::steady_clock::now();
cout << "checksum: " << z << endl;
cout << "Time taken: " << chrono::duration_cast<chrono::milliseconds> (time_end - time_begin).count() << "[ms]" << endl;
return 0;
}