Skip to content

Commit 90a817d

Browse files
committed
[PERF] evaluation: zonify evaluation
This commit improves evaluation time from 5% to 20% on regular spreadsheet, depending on the spreadsheet. And even from 99% on some pathological cases Benchmark: https://www.odoo.com/odoo/documents/d9dGIrjZRTSa9nf153wEIwo9e4c5 Let’s say you have two formulas that depend on the same reference: A1: =C1  A2: =C1 In the FormulaDependencyGraph, both dependencies are added independently. Specifically, the R-tree data structure ends up with two separate entries: C1 -> A1 and C1 -> A2. Ideally, the internal R-tree structure should group these dependencies into a single node in the tree: C1 -> [A1, A2] or even C1 -> [A1:A2] because they can be grouped into a single zone. When this pattern occurs frequently, the R-tree becomes huged (and unbalanced) therefore sub-optimal. - Huge: lots and lots of individual positions - Unbalanced: this is common when using individual pivot formulas (e.g., for spreadsheet pivots), where each PIVOT.VALUE formula depends on the same range—the pivot dataset. With this commit, the dependencies R-Tree maps bounding boxes to zones, instead of bounding boxes to positions. To take advantage of this grouping, the entire evaluation process now uses zones as much as possible. closes #7360 Task: 4936229 Signed-off-by: Rémi Rahir (rar) <rar@odoo.com> Signed-off-by: Lucas Lefèvre (lul) <lul@odoo.com>
1 parent a7242a9 commit 90a817d

File tree

15 files changed

+910
-155
lines changed

15 files changed

+910
-155
lines changed

demo/data.js

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3936,15 +3936,16 @@ function computeStringCells(cols, rows) {
39363936

39373937
function computeSplitVlookup(rows) {
39383938
/*
3939-
* in A1 write =SPLIT("1 2", " ")
3940-
in C1, writ e=B2
3941-
write a VLOOKUP that search in column C --> slow
3942-
* */
3939+
* in A1 write =SPLIT("1 2", " ")
3940+
* in C1, write=B2
3941+
* write a VLOOKUP that search in column C --> slow
3942+
*/
39433943
const cells = {};
3944-
for (let row = 0; row < rows; row++) {
3944+
for (let row = 1; row < rows; row++) {
39453945
cells["A" + row] = { content: `=SPLIT("1 2", " ")` };
39463946
cells["C" + row] = { content: `=B${row}` };
3947-
cells["D" + row] = { content: `=vlookup("2",a1:c${rows},3)` };
3947+
cells["D" + row] = { content: `=VLOOKUP("2",C1:C${rows},1)` };
3948+
cells["F" + row] = { content: `=D${row}` };
39483949
}
39493950
return cells;
39503951
}

packages/o-spreadsheet-engine/src/helpers/range.ts

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ import {
1919
UnboundedZone,
2020
ZoneDimension,
2121
} from "../types/misc";
22-
import { Range, RangePart, RangeStringOptions } from "../types/range";
22+
import { BoundedRange, Range, RangePart, RangeStringOptions } from "../types/range";
2323
import { numberToLetters } from "./coordinates";
2424
import { getCanonicalSymbolName, groupConsecutive, largeMax, largeMin } from "./misc";
2525
import { isRowReference, splitReference } from "./references";
@@ -228,6 +228,11 @@ function getRangeParts(xc: string, zone: UnboundedZone): RangePart[] {
228228
return parts;
229229
}
230230

231+
export function positionToBoundedRange(position: CellPosition): BoundedRange {
232+
const zone = { left: position.col, top: position.row, right: position.col, bottom: position.row };
233+
return { sheetId: position.sheetId, zone };
234+
}
235+
231236
/**
232237
* Check that a zone is valid regarding the order of top-bottom and left-right.
233238
* Left should be smaller than right, top should be smaller than bottom.

packages/o-spreadsheet-engine/src/helpers/recompute_zones.ts

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,7 +178,35 @@ export function modifyProfiles<T extends Zone | UnboundedZone>( // export for te
178178
}
179179
}
180180

181-
function findIndexAndCreateProfile(
181+
export function profilesContainsZone(
182+
profilesStartingPosition: number[],
183+
profiles: Map<number, number[]>,
184+
zone: Zone
185+
): boolean {
186+
const leftValue = zone.left;
187+
const rightValue = zone.right;
188+
const topValue = zone.top;
189+
const bottomValue = zone.bottom + 1;
190+
const leftIndex = binaryPredecessorSearch(profilesStartingPosition, leftValue, 0);
191+
const rightIndex = binaryPredecessorSearch(profilesStartingPosition, rightValue, leftIndex);
192+
if (leftIndex === -1 || rightIndex === -1) {
193+
return false;
194+
}
195+
for (let i = leftIndex; i <= rightIndex; i++) {
196+
const profile = profiles.get(profilesStartingPosition[i])!;
197+
const topPredIndex = binaryPredecessorSearch(profile, topValue, 0, true);
198+
const bottomSuccIndex = binarySuccessorSearch(profile, bottomValue, 0, true);
199+
if (topPredIndex === -1 || topPredIndex % 2 !== 0) {
200+
return false;
201+
}
202+
if (topValue < profile[topPredIndex] || bottomValue > profile[bottomSuccIndex]) {
203+
return false;
204+
}
205+
}
206+
return true;
207+
}
208+
209+
export function findIndexAndCreateProfile(
182210
profilesStartingPosition: number[],
183211
profiles: Map<number, number[]>,
184212
value: number | undefined,
@@ -277,7 +305,17 @@ function modifyProfile(
277305

278306
// add the top and bottom value to the profile and
279307
// remove all information between the top and bottom index
280-
profile.splice(topPredIndex + 1, bottomSuccIndex - topPredIndex - 1, ...newPoints);
308+
const toDelete = bottomSuccIndex - topPredIndex - 1;
309+
const toInsert = newPoints.length;
310+
const start = topPredIndex + 1;
311+
// fast path and slow path
312+
if (start === profile.length - 1 && toDelete === 1 && toInsert === 1) {
313+
// fast path: we just need to replace the last element
314+
profile[start] = newPoints[0] ?? newPoints[1];
315+
} else {
316+
// equivalent but slower and with memory allocation
317+
profile.splice(start, toDelete, ...newPoints);
318+
}
281319
}
282320

283321
function removeContiguousProfiles(
@@ -301,7 +339,7 @@ function removeContiguousProfiles(
301339
}
302340
}
303341

304-
function constructZonesFromProfiles<T extends UnboundedZone | Zone>(
342+
export function constructZonesFromProfiles<T extends UnboundedZone | Zone>(
305343
profilesStartingPosition: number[],
306344
profiles: Map<number, number[]>
307345
): T[] {
@@ -333,8 +371,10 @@ function constructZonesFromProfiles<T extends UnboundedZone | Zone>(
333371
left,
334372
bottom,
335373
right,
336-
hasHeader: (bottom === undefined && top !== 0) || (right === undefined && left !== 0),
337374
};
375+
if ((bottom === undefined && top !== 0) || (right === undefined && left !== 0)) {
376+
profileZone.hasHeader = true;
377+
}
338378

339379
let findCorrespondingZone = false;
340380
for (let j = pendingZones.length - 1; j >= 0; j--) {

packages/o-spreadsheet-engine/src/helpers/zones.ts

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -502,20 +502,6 @@ export function excludeTopLeft(zone: Zone): Zone[] {
502502
return [leftColumnZone, rightPartZone];
503503
}
504504

505-
export function aggregatePositionsToZones(positions: Iterable<CellPosition>): {
506-
[sheetId: string]: Zone[];
507-
} {
508-
const result: { [sheetId: string]: Zone[] } = {};
509-
for (const position of positions) {
510-
result[position.sheetId] ??= [];
511-
result[position.sheetId].push(positionToZone(position));
512-
}
513-
for (const sheetId in result) {
514-
result[sheetId] = recomputeZones(result[sheetId]);
515-
}
516-
return result;
517-
}
518-
519505
/**
520506
* Array of all positions in the zone.
521507
*/
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
import { UID } from "../../..";
2+
import { BoundedRange } from "../../../types/range";
3+
import { RTreeBoundingBox, RTreeItem, SpreadsheetRTree } from "./r_tree";
4+
import { RangeSet } from "./range_set";
5+
6+
interface RangeSetItem {
7+
boundingBox: RTreeBoundingBox;
8+
data: RangeSet;
9+
}
10+
11+
type RTreeRangeItem = RTreeItem<BoundedRange>;
12+
13+
/**
14+
* R-Tree of ranges, mapping zones (r-tree bounding boxes) to ranges (data of the r-tree item).
15+
* Ranges associated to the exact same bounding box are grouped together
16+
* to reduce the number of nodes in the R-tree.
17+
*/
18+
export class DependenciesRTree {
19+
private readonly rTree: SpreadsheetRTree<RangeSet>;
20+
21+
constructor(items: RTreeRangeItem[] = []) {
22+
const compactedBoxes = groupSameBoundingBoxes(items);
23+
this.rTree = new SpreadsheetRTree(compactedBoxes);
24+
}
25+
26+
insert(item: RTreeRangeItem) {
27+
const data = this.rTree.search(item.boundingBox);
28+
const itemBoundingBox = item.boundingBox;
29+
const exactBoundingBox = data.find(
30+
({ boundingBox }) =>
31+
boundingBox.sheetId === itemBoundingBox.sheetId &&
32+
boundingBox.zone.left === itemBoundingBox.zone.left &&
33+
boundingBox.zone.top === itemBoundingBox.zone.top &&
34+
boundingBox.zone.right === itemBoundingBox.zone.right &&
35+
boundingBox.zone.bottom === itemBoundingBox.zone.bottom
36+
);
37+
if (exactBoundingBox) {
38+
exactBoundingBox.data.add(item.data);
39+
} else {
40+
this.rTree.insert({ ...item, data: new RangeSet([item.data]) });
41+
}
42+
}
43+
44+
search({ zone, sheetId }: RTreeBoundingBox): RangeSet {
45+
const results: RangeSet = new RangeSet();
46+
for (const { data } of this.rTree.search({ zone, sheetId })) {
47+
results.addMany(data);
48+
}
49+
return results;
50+
}
51+
52+
remove(item: RTreeRangeItem) {
53+
const data = this.rTree.search(item.boundingBox);
54+
const itemBoundingBox = item.boundingBox;
55+
const exactBoundingBox = data.find(
56+
({ boundingBox }) =>
57+
boundingBox.sheetId === itemBoundingBox.sheetId &&
58+
boundingBox.zone.left === itemBoundingBox.zone.left &&
59+
boundingBox.zone.top === itemBoundingBox.zone.top &&
60+
boundingBox.zone.right === itemBoundingBox.zone.right &&
61+
boundingBox.zone.bottom === itemBoundingBox.zone.bottom
62+
);
63+
if (exactBoundingBox) {
64+
exactBoundingBox.data.delete(item.data);
65+
} else {
66+
this.rTree.remove({ ...item, data: new RangeSet([item.data]) });
67+
}
68+
}
69+
}
70+
71+
/**
72+
* Group together all formulas pointing to the exact same dependency (bounding box).
73+
* The goal is to optimize the following case:
74+
* - if any cell in B1:B1000 changes, C1 must be recomputed
75+
* - if any cell in B1:B1000 changes, C2 must be recomputed
76+
* - if any cell in B1:B1000 changes, C3 must be recomputed
77+
* ...
78+
* - if any cell in B1:B1000 changes, C1000 must be recomputed
79+
*
80+
* Instead of having 1000 entries in the R-tree, we want to have a single entry
81+
* with B1:B1000 (bounding box) pointing to C1:C1000 (formulas).
82+
*/
83+
function groupSameBoundingBoxes(items: RTreeRangeItem[]): RangeSetItem[] {
84+
// Important: this function must be as fast as possible. It is on the evaluation hot path.
85+
let maxCol = 0;
86+
let maxRow = 0;
87+
for (let i = 0; i < items.length; i++) {
88+
const zone = items[i].boundingBox.zone;
89+
if (zone.right > maxCol) {
90+
maxCol = zone.right;
91+
}
92+
if (zone.bottom > maxRow) {
93+
maxRow = zone.bottom;
94+
}
95+
}
96+
maxCol += 1;
97+
maxRow += 1;
98+
99+
// in most real-world cases, we can use a fast numeric key
100+
// but if the zones are too far right or bottom, we fallback to a slower string key
101+
const maxPossibleKey = (((maxRow + 1) * maxCol + 1) * maxRow + 1) * maxCol;
102+
const useFastKey = maxPossibleKey <= Number.MAX_SAFE_INTEGER;
103+
if (!useFastKey) {
104+
console.warn("Max col/row size exceeded, using slow zone key");
105+
}
106+
const groupedByBBox: Record<UID, Record<string, RangeSetItem>> = {};
107+
for (const item of items) {
108+
const sheetId = item.boundingBox.sheetId;
109+
if (!groupedByBBox[sheetId]) {
110+
groupedByBBox[sheetId] = {};
111+
}
112+
const bBox = item.boundingBox.zone;
113+
let bBoxKey: number | string = 0;
114+
if (useFastKey) {
115+
bBoxKey =
116+
bBox.left +
117+
bBox.top * maxCol +
118+
bBox.right * maxCol * maxRow +
119+
bBox.bottom * maxCol * maxRow * maxCol;
120+
} else {
121+
bBoxKey = `${bBox.left},${bBox.top},${bBox.right},${bBox.bottom}`;
122+
}
123+
if (groupedByBBox[sheetId][bBoxKey]) {
124+
const ranges = groupedByBBox[sheetId][bBoxKey].data;
125+
ranges.add(item.data);
126+
} else {
127+
groupedByBBox[sheetId][bBoxKey] = {
128+
boundingBox: item.boundingBox,
129+
data: new RangeSet([item.data]),
130+
};
131+
}
132+
}
133+
const result: RangeSetItem[] = [];
134+
for (const sheetId in groupedByBBox) {
135+
const map = groupedByBBox[sheetId];
136+
for (const key in map) {
137+
result.push(map[key]);
138+
}
139+
}
140+
return result;
141+
}

0 commit comments

Comments
 (0)