Skip to content

Commit

Permalink
fix: sort elements based on selector matching algorithm (#9836)
Browse files Browse the repository at this point in the history
  • Loading branch information
jrandolf committed Mar 13, 2023
1 parent 8aea8e0 commit 9044609
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 14 deletions.
79 changes: 70 additions & 9 deletions packages/puppeteer-core/src/injected/PQuerySelector.ts
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,67 @@ class PQueryEngine {
}
}

class DepthCalculator {
#cache = new Map<Node, number[]>();

calculate(node: Node, depth: number[] = []): number[] {
if (node instanceof Document) {
return depth;
}
if (node instanceof ShadowRoot) {
node = node.host;
}

const cachedDepth = this.#cache.get(node);
if (cachedDepth) {
return [...cachedDepth, ...depth];
}

let index = 0;
for (
let prevSibling = node.previousSibling;
prevSibling;
prevSibling = prevSibling.previousSibling
) {
++index;
}

const value = this.calculate(node.parentNode as Node, [index]);
this.#cache.set(node, value);
return [...value, ...depth];
}
}

const compareDepths = (a: number[], b: number[]): -1 | 0 | 1 => {
if (a.length + b.length === 0) {
return 0;
}
const [i = Infinity, ...otherA] = a;
const [j = Infinity, ...otherB] = b;
if (i === j) {
return compareDepths(otherA, otherB);
}
return i < j ? 1 : -1;
};

const domSort = async function* (elements: AwaitableIterable<Node>) {
const results = new Set<Node>();
for await (const element of elements) {
results.add(element);
}
const calculator = new DepthCalculator();
yield* [...results.values()]
.map(result => {
return [result, calculator.calculate(result)] as const;
})
.sort(([, a], [, b]) => {
return compareDepths(a, b);
})
.map(([result]) => {
return result;
});
};

type QueryableNode = {
querySelectorAll: typeof Document.prototype.querySelectorAll;
};
Expand All @@ -179,7 +240,7 @@ type QueryableNode = {
*
* @internal
*/
export const pQuerySelectorAll = async function* (
export const pQuerySelectorAll = function (
root: Node,
selector: string
): AwaitableIterable<Node> {
Expand All @@ -195,10 +256,8 @@ export const pQuerySelectorAll = async function* (
}

if (isPureCSS) {
yield* (root as unknown as QueryableNode).querySelectorAll(selector);
return;
return (root as unknown as QueryableNode).querySelectorAll(selector);
}

// If there are any empty elements, then this implies the selector has
// contiguous combinators (e.g. `>>> >>>>`) or starts/ends with one which we
// treat as illegal, similar to existing behavior.
Expand All @@ -221,11 +280,13 @@ export const pQuerySelectorAll = async function* (
);
}

for (const selectorParts of selectors) {
const query = new PQueryEngine(root, selector, selectorParts);
query.run();
yield* query.elements;
}
return domSort(
AsyncIterableUtil.flatMap(selectors, selectorParts => {
const query = new PQueryEngine(root, selector, selectorParts);
query.run();
return query.elements;
})
);
};

/**
Expand Down
6 changes: 3 additions & 3 deletions packages/puppeteer-core/src/util/AsyncIterableUtil.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@ export class AsyncIterableUtil {
}
}

static async *flatMap<T>(
static async *flatMap<T, U>(
iterable: AwaitableIterable<T>,
map: (item: T) => AwaitableIterable<T>
): AsyncIterable<T> {
map: (item: T) => AwaitableIterable<U>
): AsyncIterable<U> {
for await (const value of iterable) {
yield* map(value);
}
Expand Down
56 changes: 54 additions & 2 deletions test/src/queryhandler.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -359,7 +359,9 @@ describe('Query handler tests', function () {
describe('P selectors', () => {
beforeEach(async () => {
const {page} = getTestState();
await page.setContent('<div>hello <button>world</button></div>');
await page.setContent(
'<div>hello <button>world<span></span></button></div>'
);
Puppeteer.clearCustomQueryHandlers();
});

Expand Down Expand Up @@ -489,10 +491,60 @@ describe('Query handler tests', function () {
expect(value).toMatchObject({textContent: 'world', tagName: 'BUTTON'});
});

it('should work with commas', async () => {
it('should work with selector lists', async () => {
const {page} = getTestState();
const elements = await page.$$('div, ::-p-text(world)');
expect(elements.length).toStrictEqual(2);
});

const permute = <T>(inputs: T[]): T[][] => {
const results: T[][] = [];
for (let i = 0; i < inputs.length; ++i) {
const permutation = permute(
inputs.slice(0, i).concat(inputs.slice(i + 1))
);
const value = inputs[i] as T;
if (permutation.length === 0) {
results.push([value]);
continue;
}
for (const part of permutation) {
results.push([value].concat(part));
}
}
return results;
};

it('should match querySelector* ordering', async () => {
const {page} = getTestState();
for (const list of permute(['div', 'button', 'span'])) {
const expected = await page.evaluate(selector => {
return [...document.querySelectorAll(selector)].map(element => {
return element.tagName;
});
}, list.join(','));
const elements = await page.$$(
list
.map(selector => {
return selector === 'button' ? '::-p-text(world)' : selector;
})
.join(',')
);
const actual = await Promise.all(
elements.map(element => {
return element.evaluate(element => {
return element.tagName;
});
})
);
expect(actual.join()).toStrictEqual(expected.join());
}
});

it('should not have duplicate elements from selector lists', async () => {
const {page} = getTestState();
const elements = await page.$$('::-p-text(world), button');
expect(elements.length).toStrictEqual(1);
});
});
});

0 comments on commit 9044609

Please sign in to comment.