Skip to content

Commit

Permalink
refactor(core): use new backlink indexer
Browse files Browse the repository at this point in the history
  • Loading branch information
EYHN committed Jun 20, 2024
1 parent 35bead9 commit f6878a4
Show file tree
Hide file tree
Showing 38 changed files with 1,082 additions and 341 deletions.
17 changes: 17 additions & 0 deletions packages/common/infra/src/modules/doc/entities/record-list.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,23 @@ export class DocRecordList extends Entity {
[]
);

public readonly trashDocs$ = LiveData.from<DocRecord[]>(
this.store.watchTrashDocIds().pipe(
map(ids =>
ids.map(id => {
const exists = this.pool.get(id);
if (exists) {
return exists;
}
const record = this.framework.createEntity(DocRecord, { id });
this.pool.set(id, record);
return record;
})
)
),
[]
);

public readonly isReady$ = LiveData.from(
this.store.watchDocListReady(),
false
Expand Down
2 changes: 2 additions & 0 deletions packages/common/infra/src/modules/doc/entities/record.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,6 @@ export class DocRecord extends Entity<{ id: string }> {
}

title$ = this.meta$.map(meta => meta.title ?? '');

trash$ = this.meta$.map(meta => meta.trash ?? false);
}
24 changes: 23 additions & 1 deletion packages/common/infra/src/modules/doc/stores/docs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,29 @@ export class DocsStore extends Store {
return () => {
dispose();
};
}).pipe(distinctUntilChanged((p, c) => isEqual(p, c)));
});
}

watchTrashDocIds() {
return new Observable<string[]>(subscriber => {
const emit = () => {
subscriber.next(
this.workspaceService.workspace.docCollection.meta.docMetas
.map(v => (v.trash ? v.id : null))
.filter(Boolean) as string[]
);
};

emit();

const dispose =
this.workspaceService.workspace.docCollection.meta.docMetaUpdated.on(
emit
).dispose;
return () => {
dispose();
};
});
}

watchDocMeta(id: string) {
Expand Down
40 changes: 40 additions & 0 deletions packages/common/infra/src/sync/indexer/__tests__/black-box.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,46 @@ describe.each([
});
});

test('exists', async () => {
await writeData({
'1': {
title: 'hello world',
tag: '111',
},
'2': {
tag: '222',
},
'3': {
title: 'hello world',
tag: '333',
},
});

const result = await index.search({
type: 'exists',
field: 'title',
});

expect(result).toEqual({
nodes: expect.arrayContaining([
{
id: '1',
score: expect.anything(),
},
{
id: '3',
score: expect.anything(),
},
]),
pagination: {
count: 2,
hasMore: false,
limit: expect.anything(),
skip: 0,
},
});
});

test('subscribe', async () => {
await writeData({
'1': {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,12 @@ export class DataStruct {
return await this.matchAll(trx);
} else if (query.type === 'boost') {
return (await this.queryRaw(trx, query.query)).boost(query.boost);
} else if (query.type === 'exists') {
const iidx = this.invertedIndex.get(query.field as string);
if (!iidx) {
throw new Error(`Field '${query.field as string}' not found`);
}
return await iidx.all(trx);
}
throw new Error(`Query type '${query.type}' not supported`);
}
Expand Down
174 changes: 160 additions & 14 deletions packages/common/infra/src/sync/indexer/impl/indexeddb/inverted-index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ export interface InvertedIndex {

match(trx: DataStructROTransaction, term: string): Promise<Match>;

all(trx: DataStructROTransaction): Promise<Match>;

insert(
trx: DataStructRWTransaction,
id: number,
Expand All @@ -33,6 +35,29 @@ export class StringInvertedIndex implements InvertedIndex {
return match;
}

async all(trx: DataStructROTransaction): Promise<Match> {
const objs = await trx
.objectStore('invertedIndex')
.index('key')
.getAll(
IDBKeyRange.bound(
InvertedIndexKey.forPrefix(this.fieldKey).buffer(),
InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer()
)
);

const set = new Set<number>();
for (const obj of objs) {
set.add(obj.nid);
}

const match = new Match();
for (const nid of set) {
match.addScore(nid, 1);
}
return match;
}

async insert(trx: DataStructRWTransaction, id: number, terms: string[]) {
for (const term of terms) {
await trx.objectStore('invertedIndex').add({
Expand All @@ -58,6 +83,30 @@ export class IntegerInvertedIndex implements InvertedIndex {
return match;
}

// eslint-disable-next-line sonarjs/no-identical-functions
async all(trx: DataStructROTransaction): Promise<Match> {
const objs = await trx
.objectStore('invertedIndex')
.index('key')
.getAll(
IDBKeyRange.bound(
InvertedIndexKey.forPrefix(this.fieldKey).buffer(),
InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer()
)
);

const set = new Set<number>();
for (const obj of objs) {
set.add(obj.nid);
}

const match = new Match();
for (const nid of set) {
match.addScore(nid, 1);
}
return match;
}

async insert(trx: DataStructRWTransaction, id: number, terms: string[]) {
for (const term of terms) {
await trx.objectStore('invertedIndex').add({
Expand All @@ -71,6 +120,30 @@ export class IntegerInvertedIndex implements InvertedIndex {
export class BooleanInvertedIndex implements InvertedIndex {
constructor(readonly fieldKey: string) {}

// eslint-disable-next-line sonarjs/no-identical-functions
async all(trx: DataStructROTransaction): Promise<Match> {
const objs = await trx
.objectStore('invertedIndex')
.index('key')
.getAll(
IDBKeyRange.bound(
InvertedIndexKey.forPrefix(this.fieldKey).buffer(),
InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer()
)
);

const set = new Set<number>();
for (const obj of objs) {
set.add(obj.nid);
}

const match = new Match();
for (const nid of set) {
match.addScore(nid, 1);
}
return match;
}

async match(trx: DataStructROTransaction, term: string): Promise<Match> {
const objs = await trx
.objectStore('invertedIndex')
Expand Down Expand Up @@ -118,6 +191,14 @@ export class FullTextInvertedIndex implements InvertedIndex {
.getAll(
IDBKeyRange.bound(key.buffer(), key.add1().buffer(), false, true)
);
const submatched: {
nid: number;
score: number;
position: {
index: number;
ranges: [number, number][];
};
}[] = [];
for (const obj of objs) {
const key = InvertedIndexKey.fromBuffer(obj.key);
const originTokenTerm = key.asString();
Expand All @@ -139,17 +220,42 @@ export class FullTextInvertedIndex implements InvertedIndex {
const score =
bm25(termFreq, 1, totalCount, fieldLength, avgFieldLength) *
(matchLength / originTokenTerm.length);
const match = matched.get(obj.nid) || {
score: [] as number[],
const match = {
score,
positions: new Map(),
};
match.score.push(score);
const ranges = match.positions.get(position.i) || [];
ranges.push(
...position.rs.map(([start, _end]) => [start, start + matchLength])
);
match.positions.set(position.i, ranges);
matched.set(obj.nid, match);
submatched.push({
nid: obj.nid,
score,
position: {
index: position.i,
ranges: position.rs.map(([start, _end]) => [
start,
start + matchLength,
]),
},
});
}

// normalize score
const maxScore = submatched.reduce((acc, s) => Math.max(acc, s.score), 0);
const minScore = submatched.reduce((acc, s) => Math.min(acc, s.score), 1);
for (const { nid, score, position } of submatched) {
const normalizedScore = (score - minScore) / (maxScore - minScore);
const match = matched.get(nid) || {
score: [] as number[],
positions: new Map(),
};
match.score.push(normalizedScore);
const ranges = match.positions.get(position.index) || [];
ranges.push(...position.ranges);
match.positions.set(position.index, ranges);
matched.set(nid, match);
}
}
const match = new Match();
Expand All @@ -166,6 +272,30 @@ export class FullTextInvertedIndex implements InvertedIndex {
return match;
}

// eslint-disable-next-line sonarjs/no-identical-functions
async all(trx: DataStructROTransaction): Promise<Match> {
const objs = await trx
.objectStore('invertedIndex')
.index('key')
.getAll(
IDBKeyRange.bound(
InvertedIndexKey.forPrefix(this.fieldKey).buffer(),
InvertedIndexKey.forPrefix(this.fieldKey).add1().buffer()
)
);

const set = new Set<number>();
for (const obj of objs) {
set.add(obj.nid);
}

const match = new Match();
for (const nid of set) {
match.addScore(nid, 1);
}
return match;
}

async insert(trx: DataStructRWTransaction, id: number, terms: string[]) {
for (let i = 0; i < terms.length; i++) {
const tokenMap = new Map<string, Token[]>();
Expand Down Expand Up @@ -220,7 +350,8 @@ export class FullTextInvertedIndex implements InvertedIndex {
export class InvertedIndexKey {
constructor(
readonly field: ArrayBuffer,
readonly value: ArrayBuffer
readonly value: ArrayBuffer,
readonly gap: ArrayBuffer = new Uint8Array([58])
) {}

asString() {
Expand All @@ -232,14 +363,29 @@ export class InvertedIndexKey {
}

add1() {
const bytes = new Uint8Array(this.value.slice(0));
let carry = 1;
for (let i = bytes.length - 1; i >= 0 && carry > 0; i--) {
const sum = bytes[i] + carry;
bytes[i] = sum % 256;
carry = sum >> 8;
if (this.value.byteLength > 0) {
const bytes = new Uint8Array(this.value.slice(0));
let carry = 1;
for (let i = bytes.length - 1; i >= 0 && carry > 0; i--) {
const sum = bytes[i] + carry;
bytes[i] = sum % 256;
carry = sum >> 8;
}
return new InvertedIndexKey(this.field, bytes);
} else {
return new InvertedIndexKey(
this.field,
new ArrayBuffer(0),
new Uint8Array([59])
);
}
return new InvertedIndexKey(this.field, bytes);
}

static forPrefix(field: string) {
return new InvertedIndexKey(
new TextEncoder().encode(field),
new ArrayBuffer(0)
);
}

static forString(field: string, value: string) {
Expand All @@ -266,8 +412,8 @@ export class InvertedIndexKey {
this.field.byteLength + (this.value?.byteLength ?? 0) + 1
);
tmp.set(new Uint8Array(this.field), 0);
tmp.set([58], this.field.byteLength);
if (this.value) {
tmp.set(new Uint8Array(this.gap), this.field.byteLength);
if (this.value.byteLength > 0) {
tmp.set(new Uint8Array(this.value), this.field.byteLength + 1);
}
return tmp.buffer;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ export class DataStruct {
return this.matchAll();
} else if (query.type === 'boost') {
return this.queryRaw(query.query).boost(query.boost);
} else if (query.type === 'exists') {
const iidx = this.invertedIndex.get(query.field as string);
if (!iidx) {
throw new Error(`Field '${query.field as string}' not found`);
}
return iidx.all();
}
throw new Error(`Query type '${query.type}' not supported`);
}
Expand Down
Loading

0 comments on commit f6878a4

Please sign in to comment.