forked from facebookresearch/faiss
-
Notifications
You must be signed in to change notification settings - Fork 0
/
InvertedLists.h
334 lines (234 loc) · 10 KB
/
InvertedLists.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
// -*- c++ -*-
#ifndef FAISS_INVERTEDLISTS_IVF_H
#define FAISS_INVERTEDLISTS_IVF_H
/**
* Definition of inverted lists + a few common classes that implement
* the interface.
*/
#include <vector>
#include <faiss/Index.h>
namespace faiss {
/** Table of inverted lists
* multithreading rules:
* - concurrent read accesses are allowed
* - concurrent update accesses are allowed
* - for resize and add_entries, only concurrent access to different lists
* are allowed
*/
struct InvertedLists {
typedef Index::idx_t idx_t;
size_t nlist; ///< number of possible key values
size_t code_size; ///< code size per vector in bytes
InvertedLists (size_t nlist, size_t code_size);
/*************************
* Read only functions */
/// get the size of a list
virtual size_t list_size(size_t list_no) const = 0;
/** get the codes for an inverted list
* must be released by release_codes
*
* @return codes size list_size * code_size
*/
virtual const uint8_t * get_codes (size_t list_no) const = 0;
/** get the ids for an inverted list
* must be released by release_ids
*
* @return ids size list_size
*/
virtual const idx_t * get_ids (size_t list_no) const = 0;
/// release codes returned by get_codes (default implementation is nop
virtual void release_codes (size_t list_no, const uint8_t *codes) const;
/// release ids returned by get_ids
virtual void release_ids (size_t list_no, const idx_t *ids) const;
/// @return a single id in an inverted list
virtual idx_t get_single_id (size_t list_no, size_t offset) const;
/// @return a single code in an inverted list
/// (should be deallocated with release_codes)
virtual const uint8_t * get_single_code (
size_t list_no, size_t offset) const;
/// prepare the following lists (default does nothing)
/// a list can be -1 hence the signed long
virtual void prefetch_lists (const idx_t *list_nos, int nlist) const;
/*************************
* writing functions */
/// add one entry to an inverted list
virtual size_t add_entry (size_t list_no, idx_t theid,
const uint8_t *code);
virtual size_t add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code) = 0;
virtual void update_entry (size_t list_no, size_t offset,
idx_t id, const uint8_t *code);
virtual void update_entries (size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids, const uint8_t *code) = 0;
virtual void resize (size_t list_no, size_t new_size) = 0;
virtual void reset ();
/// move all entries from oivf (empty on output)
void merge_from (InvertedLists *oivf, size_t add_id);
virtual ~InvertedLists ();
/*************************
* statistics */
/// 1= perfectly balanced, >1: imbalanced
double imbalance_factor () const;
/// display some stats about the inverted lists
void print_stats () const;
/// sum up list sizes
size_t compute_ntotal () const;
/**************************************
* Scoped inverted lists (for automatic deallocation)
*
* instead of writing:
*
* uint8_t * codes = invlists->get_codes (10);
* ... use codes
* invlists->release_codes(10, codes)
*
* write:
*
* ScopedCodes codes (invlists, 10);
* ... use codes.get()
* // release called automatically when codes goes out of scope
*
* the following function call also works:
*
* foo (123, ScopedCodes (invlists, 10).get(), 456);
*
*/
struct ScopedIds {
const InvertedLists *il;
const idx_t *ids;
size_t list_no;
ScopedIds (const InvertedLists *il, size_t list_no):
il (il), ids (il->get_ids (list_no)), list_no (list_no)
{}
const idx_t *get() {return ids; }
idx_t operator [] (size_t i) const {
return ids[i];
}
~ScopedIds () {
il->release_ids (list_no, ids);
}
};
struct ScopedCodes {
const InvertedLists *il;
const uint8_t *codes;
size_t list_no;
ScopedCodes (const InvertedLists *il, size_t list_no):
il (il), codes (il->get_codes (list_no)), list_no (list_no)
{}
ScopedCodes (const InvertedLists *il, size_t list_no, size_t offset):
il (il), codes (il->get_single_code (list_no, offset)),
list_no (list_no)
{}
const uint8_t *get() {return codes; }
~ScopedCodes () {
il->release_codes (list_no, codes);
}
};
};
/// simple (default) implementation as an array of inverted lists
struct ArrayInvertedLists: InvertedLists {
std::vector < std::vector<uint8_t> > codes; // binary codes, size nlist
std::vector < std::vector<idx_t> > ids; ///< Inverted lists for indexes
ArrayInvertedLists (size_t nlist, size_t code_size);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
size_t add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code) override;
void update_entries (size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids, const uint8_t *code) override;
void resize (size_t list_no, size_t new_size) override;
virtual ~ArrayInvertedLists ();
};
/*****************************************************************
* Meta-inverted lists
*
* About terminology: the inverted lists are seen as a sparse matrix,
* that can be stacked horizontally, vertically and sliced.
*****************************************************************/
struct ReadOnlyInvertedLists: InvertedLists {
ReadOnlyInvertedLists (size_t nlist, size_t code_size):
InvertedLists (nlist, code_size) {}
size_t add_entries (
size_t list_no, size_t n_entry,
const idx_t* ids, const uint8_t *code) override;
void update_entries (size_t list_no, size_t offset, size_t n_entry,
const idx_t *ids, const uint8_t *code) override;
void resize (size_t list_no, size_t new_size) override;
};
/// Horizontal stack of inverted lists
struct HStackInvertedLists: ReadOnlyInvertedLists {
std::vector<const InvertedLists *>ils;
/// build InvertedLists by concatenating nil of them
HStackInvertedLists (int nil, const InvertedLists **ils);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
void release_codes (size_t list_no, const uint8_t *codes) const override;
void release_ids (size_t list_no, const idx_t *ids) const override;
idx_t get_single_id (size_t list_no, size_t offset) const override;
const uint8_t * get_single_code (
size_t list_no, size_t offset) const override;
};
using ConcatenatedInvertedLists = HStackInvertedLists;
/// vertical slice of indexes in another InvertedLists
struct SliceInvertedLists: ReadOnlyInvertedLists {
const InvertedLists *il;
idx_t i0, i1;
SliceInvertedLists(const InvertedLists *il, idx_t i0, idx_t i1);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
void release_codes (size_t list_no, const uint8_t *codes) const override;
void release_ids (size_t list_no, const idx_t *ids) const override;
idx_t get_single_id (size_t list_no, size_t offset) const override;
const uint8_t * get_single_code (
size_t list_no, size_t offset) const override;
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
};
struct VStackInvertedLists: ReadOnlyInvertedLists {
std::vector<const InvertedLists *>ils;
std::vector<idx_t> cumsz;
/// build InvertedLists by concatenating nil of them
VStackInvertedLists (int nil, const InvertedLists **ils);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
void release_codes (size_t list_no, const uint8_t *codes) const override;
void release_ids (size_t list_no, const idx_t *ids) const override;
idx_t get_single_id (size_t list_no, size_t offset) const override;
const uint8_t * get_single_code (
size_t list_no, size_t offset) const override;
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
};
/** use the first inverted lists if they are non-empty otherwise use the second
*
* This is useful if il1 has a few inverted lists that are too long,
* and that il0 has replacement lists for those, with empty lists for
* the others. */
struct MaskedInvertedLists: ReadOnlyInvertedLists {
const InvertedLists *il0;
const InvertedLists *il1;
MaskedInvertedLists (const InvertedLists *il0,
const InvertedLists *il1);
size_t list_size(size_t list_no) const override;
const uint8_t * get_codes (size_t list_no) const override;
const idx_t * get_ids (size_t list_no) const override;
void release_codes (size_t list_no, const uint8_t *codes) const override;
void release_ids (size_t list_no, const idx_t *ids) const override;
idx_t get_single_id (size_t list_no, size_t offset) const override;
const uint8_t * get_single_code (
size_t list_no, size_t offset) const override;
void prefetch_lists (const idx_t *list_nos, int nlist) const override;
};
} // namespace faiss
#endif