Skip to content

Commit

Permalink
Fix a bug with distinct and limit.
Browse files Browse the repository at this point in the history
The Priority queue tried to keep a limit by assuming that each node would result in at least one result. This isn't true as a node's results could in theory *all* have distinct items that are already in the result set. This bug likely doesn't really happen in practice because maxWidth is larger than the limit.

BENCHMARK RESULTS (effectively no change):

Trie#populate x 8.32 ops/sec ±7.28% (25 runs sampled)
Trie#lookupPrefix(t, 5) x 39,572,343 ops/sec ±0.91% (91 runs sampled)
Trie#lookupPrefix(tee, 3) x 40,466,378 ops/sec ±0.86% (95 runs sampled)
Trie#lookupPrefix(tee, 6) x 39,473,421 ops/sec ±0.80% (91 runs sampled)
  • Loading branch information
Tyler Rockwood committed Aug 23, 2021
1 parent 1399dde commit 1e7045d
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 35 deletions.
3 changes: 1 addition & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import Node from './node';
import PQueue from './pqueue';
import { Item, SearchOptions } from './common';

interface TrieOptions {
Expand Down Expand Up @@ -78,7 +77,7 @@ class Trie<T> {
limit: opts?.limit ?? Number.POSITIVE_INFINITY,
};

node.getSortedResults(prefix, results, options, new PQueue(options.limit));
node.getSortedResults(prefix, results, options);

return results;
}
Expand Down
8 changes: 4 additions & 4 deletions src/node.ts
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,6 @@ class Node<T> {
prefix: string,
results: Array<T>,
opts: SearchOptions,
pqueue: PQueue<T>
) {
const seenKeys = new Set<string>();

Expand All @@ -218,7 +217,8 @@ class Node<T> {
}
}
} else {
pqueue.addList([this]);
const pqueue = new PQueue<T>();
pqueue.addAll([this]);

let next: Node<T> | Item<T> | undefined;
while ((next = pqueue.pop())) {
Expand All @@ -228,10 +228,10 @@ class Node<T> {
}

if (next.leaf) {
pqueue.addList(next.values as Array<Item<T>>);
pqueue.addAll(next.values as Array<Item<T>>);
} else {
const children = next.children;
pqueue.addList((next.values as Letter[]).map(v => children[v]));
pqueue.addAll((next.values as Letter[]).map(v => children[v]));
}
} else {
if (!opts.unique || !seenKeys.has(next.distinct || next.key)) {
Expand Down
41 changes: 12 additions & 29 deletions src/pqueue.ts
Original file line number Diff line number Diff line change
@@ -1,45 +1,28 @@
import Node from './node';
import { Item } from './common';

/** A PQueue with a limited size. */
/**
* A standard priority queue.
*
* TODO(perf): We should probably have some sort of max heap solution and lazily merge the lists instead of eagerly
* doing it. In the case of high maxWidth and low limits (likely) it's probably faster.
*/
class PQueue<T> {
private readonly todo: Array<Node<T> | Item<T>> = [];
constructor(private readonly limit: number) {}

addList(list: Array<Node<T> | Item<T>>): void {
let i = 0,
j = 0;
/** Add a sorted list of "score"-ables to the priority queue. */
addAll(list: Array<Node<T> | Item<T>>): void {
let j = 0;

// effectiveLength is the lower bound on the number of
// item's we're guaranteed to be able to find in the trie.
// In the case that unique is false this is the same as the length,
// but in the case unique is true, it's the number of Nodes in the queue
// (as items may be discarded).
let effectiveLength = 0;

while (i < this.todo.length && effectiveLength < this.limit) {
if (j < list.length && this.todo[i].score < list[j].score) {
for (let i = 0; i < this.todo.length && j < list.length; ++i) {
if (this.todo[i].score < list[j].score) {
this.todo.splice(i, 0, list[j]);
j += 1;
}

if (this.todo[i] instanceof Node) {
effectiveLength += 1;
}

i += 1;
}

while (this.todo.length > i) {
this.todo.pop();
}

while (effectiveLength < this.limit && j < list.length) {
for (; j < list.length; ++j) {
this.todo.push(list[j]);
if (list[j] instanceof Node) {
effectiveLength += 1;
}
j += 1;
}
}

Expand Down
23 changes: 23 additions & 0 deletions test/trie.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -293,4 +293,27 @@ describe('Trie', () => {
expect(t.prefixSearch('')).toEqual(['a', 'b']);
t.remove('a');
});

it('distinct limit bug', () => {
const t = new StrictTrie({maxWidth: 1});
t.add({
key: 'a',
score: 1,
value: 'a',
distinct: '1',
});
t.add({
key: 'b',
score: 1,
value: 'b',
distinct: '1',
});
t.add({
key: 'c',
score: 1,
value: 'c',
distinct: '2',
});
expect(t.prefixSearch("", {limit: 2, unique: true})).toEqual(['a', 'c']);
})
});

0 comments on commit 1e7045d

Please sign in to comment.