Skip to content

Commit

Permalink
feat(text selector): match text in child nodes (#5293)
Browse files Browse the repository at this point in the history
This changes `text=` and `:text()` selectors to match the element when:
- it's combined text content matches the text;
- combined text content of any immediate child does not match the text.

This allows the following markup to match "Some bold and italics text":
`<div>Some <b>bold</b> and <i>italics</i> text</div>`.

For the reference, "combined text content" is almost equal to `element.textContent`,
but with some changes like using value of `<input type=button>` or ignoring `<head>`.

This also includes some caching optimizations, meaningful in complex matches
that involve multiple calls to the text engine.

Performance changes (measured on large page with ~25000 elements):
- `:has-text()` - 14% faster.
- `text=` - 50% faster.
- `:text()` - 0-35% slower.
- `:text-matches()` - 28% slower.
  • Loading branch information
dgozman committed Feb 5, 2021
1 parent c1b08f1 commit 0cbb2c1
Show file tree
Hide file tree
Showing 4 changed files with 196 additions and 212 deletions.
94 changes: 80 additions & 14 deletions src/server/injected/injectedScript.ts
Expand Up @@ -15,11 +15,10 @@
*/

import { SelectorEngine, SelectorRoot } from './selectorEngine';
import { createTextSelector } from './textSelectorEngine';
import { XPathEngine } from './xpathSelectorEngine';
import { ParsedSelector, ParsedSelectorPart, parseSelector } from '../common/selectorParser';
import { FatalDOMError } from '../common/domErrors';
import { SelectorEvaluatorImpl, isVisible, parentElementOrShadowHost } from './selectorEvaluator';
import { SelectorEvaluatorImpl, isVisible, parentElementOrShadowHost, elementMatchesText } from './selectorEvaluator';
import { CSSComplexSelectorList } from '../common/cssParser';

type Predicate<T> = (progress: InjectedScriptProgress, continuePolling: symbol) => T | symbol;
Expand Down Expand Up @@ -47,8 +46,8 @@ export class InjectedScript {
this._enginesV1 = new Map();
this._enginesV1.set('xpath', XPathEngine);
this._enginesV1.set('xpath:light', XPathEngine);
this._enginesV1.set('text', createTextSelector(true));
this._enginesV1.set('text:light', createTextSelector(false));
this._enginesV1.set('text', this._createTextEngine(true));
this._enginesV1.set('text:light', this._createTextEngine(false));
this._enginesV1.set('id', this._createAttributeEngine('id', true));
this._enginesV1.set('id:light', this._createAttributeEngine('id', false));
this._enginesV1.set('data-testid', this._createAttributeEngine('data-testid', true));
Expand Down Expand Up @@ -76,7 +75,9 @@ export class InjectedScript {
querySelector(selector: ParsedSelector, root: Node): Element | undefined {
if (!(root as any)['querySelector'])
throw new Error('Node is not queryable.');
return this._querySelectorRecursively(root as SelectorRoot, selector, 0);
const result = this._querySelectorRecursively(root as SelectorRoot, selector, 0);
this._evaluator.clearCaches();
return result;
}

private _querySelectorRecursively(root: SelectorRoot, selector: ParsedSelector, index: number): Element | undefined {
Expand Down Expand Up @@ -111,22 +112,24 @@ export class InjectedScript {
}
set = newSet;
}
const candidates = Array.from(set) as Element[];
if (!partsToCheckOne.length)
return candidates;
const partial = { parts: partsToCheckOne };
return candidates.filter(e => !!this._querySelectorRecursively(e, partial, 0));
let result = Array.from(set) as Element[];
if (partsToCheckOne.length) {
const partial = { parts: partsToCheckOne };
result = result.filter(e => !!this._querySelectorRecursively(e, partial, 0));
}
this._evaluator.clearCaches();
return result;
}

private _queryEngine(part: ParsedSelectorPart, root: SelectorRoot): Element | undefined {
if (Array.isArray(part))
return this._evaluator.evaluate({ scope: root as Document | Element, pierceShadow: true }, part)[0];
return this._evaluator.query({ scope: root as Document | Element, pierceShadow: true }, part)[0];
return this._enginesV1.get(part.name)!.query(root, part.body);
}

private _queryEngineAll(part: ParsedSelectorPart, root: SelectorRoot): Element[] {
if (Array.isArray(part))
return this._evaluator.evaluate({ scope: root as Document | Element, pierceShadow: true }, part);
return this._evaluator.query({ scope: root as Document | Element, pierceShadow: true }, part);
return this._enginesV1.get(part.name)!.queryAll(root, part.body);
}

Expand All @@ -137,10 +140,33 @@ export class InjectedScript {
};
return {
query: (root: SelectorRoot, selector: string): Element | undefined => {
return this._evaluator.evaluate({ scope: root as Document | Element, pierceShadow: shadow }, toCSS(selector))[0];
return this._evaluator.query({ scope: root as Document | Element, pierceShadow: shadow }, toCSS(selector))[0];
},
queryAll: (root: SelectorRoot, selector: string): Element[] => {
return this._evaluator.query({ scope: root as Document | Element, pierceShadow: shadow }, toCSS(selector));
}
};
}

private _createTextEngine(shadow: boolean): SelectorEngine {
return {
query: (root: SelectorRoot, selector: string): Element | undefined => {
const matcher = createTextMatcher(selector);
if (root.nodeType === Node.ELEMENT_NODE && elementMatchesText(this._evaluator, root as Element, matcher))
return root as Element;
const elements = this._evaluator._queryCSS({ scope: root as Document | Element, pierceShadow: shadow }, '*');
for (const element of elements) {
if (elementMatchesText(this._evaluator, element, matcher))
return element;
}
},
queryAll: (root: SelectorRoot, selector: string): Element[] => {
return this._evaluator.evaluate({ scope: root as Document | Element, pierceShadow: shadow }, toCSS(selector));
const matcher = createTextMatcher(selector);
const elements = this._evaluator._queryCSS({ scope: root as Document | Element, pierceShadow: shadow }, '*');
const result = elements.filter(e => elementMatchesText(this._evaluator, e, matcher));
if (root.nodeType === Node.ELEMENT_NODE && elementMatchesText(this._evaluator, root as Element, matcher))
result.unshift(root as Element);
return result;
}
};
}
Expand Down Expand Up @@ -776,4 +802,44 @@ const eventType = new Map<string, 'mouse'|'keyboard'|'touch'|'pointer'|'focus'|'
['drop', 'drag'],
]);

function unescape(s: string): string {
if (!s.includes('\\'))
return s;
const r: string[] = [];
let i = 0;
while (i < s.length) {
if (s[i] === '\\' && i + 1 < s.length)
i++;
r.push(s[i++]);
}
return r.join('');
}

type Matcher = (text: string) => boolean;
function createTextMatcher(selector: string): Matcher {
if (selector[0] === '/' && selector.lastIndexOf('/') > 0) {
const lastSlash = selector.lastIndexOf('/');
const re = new RegExp(selector.substring(1, lastSlash), selector.substring(lastSlash + 1));
return text => re.test(text);
}
let strict = false;
if (selector.length > 1 && selector[0] === '"' && selector[selector.length - 1] === '"') {
selector = unescape(selector.substring(1, selector.length - 1));
strict = true;
}
if (selector.length > 1 && selector[0] === "'" && selector[selector.length - 1] === "'") {
selector = unescape(selector.substring(1, selector.length - 1));
strict = true;
}
selector = selector.trim().replace(/\s+/g, ' ');
if (!strict)
selector = selector.toLowerCase();
return text => {
text = text.trim().replace(/\s+/g, ' ');
if (!strict)
return text.toLowerCase().includes(selector);
return text === selector;
};
}

export default InjectedScript;
64 changes: 42 additions & 22 deletions src/server/injected/selectorEvaluator.ts
Expand Up @@ -43,6 +43,7 @@ export class SelectorEvaluatorImpl implements SelectorEvaluator {
private _cacheCallMatches: QueryCache = new Map();
private _cacheCallQuery: QueryCache = new Map();
private _cacheQuerySimple: QueryCache = new Map();
_cacheText = new Map<Element | ShadowRoot, string>();
private _scoreMap: Map<Element, number> | undefined;

constructor(extraEngines: Map<string, SelectorEngine>) {
Expand Down Expand Up @@ -74,10 +75,7 @@ export class SelectorEvaluatorImpl implements SelectorEvaluator {
throw new Error(`Please keep customCSSNames in sync with evaluator engines`);
}

// This is the only function we should use for querying, because it does
// the right thing with caching.
evaluate(context: QueryContext, s: CSSComplexSelectorList): Element[] {
const result = this.query(context, s);
clearCaches() {
this._cacheQueryCSS.clear();
this._cacheMatches.clear();
this._cacheQuery.clear();
Expand All @@ -86,7 +84,7 @@ export class SelectorEvaluatorImpl implements SelectorEvaluator {
this._cacheCallMatches.clear();
this._cacheCallQuery.clear();
this._cacheQuerySimple.clear();
return result;
this._cacheText.clear();
}

private _cached<T>(cache: QueryCache, main: any, rest: any[], cb: () => T): T {
Expand Down Expand Up @@ -411,15 +409,17 @@ const textEngine: SelectorEngine = {
matches(element: Element, args: (string | number | Selector)[], context: QueryContext, evaluator: SelectorEvaluator): boolean {
if (args.length !== 1 || typeof args[0] !== 'string')
throw new Error(`"text" engine expects a single string`);
return elementMatchesText(element, context, textMatcher(args[0], true));
const matcher = textMatcher(args[0], true);
return elementMatchesText(evaluator as SelectorEvaluatorImpl, element, matcher);
},
};

const textIsEngine: SelectorEngine = {
matches(element: Element, args: (string | number | Selector)[], context: QueryContext, evaluator: SelectorEvaluator): boolean {
if (args.length !== 1 || typeof args[0] !== 'string')
throw new Error(`"text-is" engine expects a single string`);
return elementMatchesText(element, context, textMatcher(args[0], false));
const matcher = textMatcher(args[0], false);
return elementMatchesText(evaluator as SelectorEvaluatorImpl, element, matcher);
},
};

Expand All @@ -428,7 +428,8 @@ const textMatchesEngine: SelectorEngine = {
if (args.length === 0 || typeof args[0] !== 'string' || args.length > 2 || (args.length === 2 && typeof args[1] !== 'string'))
throw new Error(`"text-matches" engine expects a regexp body and optional regexp flags`);
const re = new RegExp(args[0], args.length === 2 ? args[1] : undefined);
return elementMatchesText(element, context, s => re.test(s));
const matcher = (s: string) => re.test(s);
return elementMatchesText(evaluator as SelectorEvaluatorImpl, element, matcher);
},
};

Expand All @@ -439,7 +440,7 @@ const hasTextEngine: SelectorEngine = {
if (shouldSkipForTextMatching(element))
return false;
const matcher = textMatcher(args[0], true);
return matcher(element.textContent || '');
return matcher(elementText(evaluator as SelectorEvaluatorImpl, element));
},
};

Expand All @@ -453,26 +454,45 @@ function textMatcher(text: string, substring: boolean): (s: string) => boolean {
};
}

function shouldSkipForTextMatching(element: Element) {
function shouldSkipForTextMatching(element: Element | ShadowRoot) {
return element.nodeName === 'SCRIPT' || element.nodeName === 'STYLE' || document.head && document.head.contains(element);
}

function elementMatchesText(element: Element, context: QueryContext, matcher: (s: string) => boolean) {
function elementText(evaluator: SelectorEvaluatorImpl, root: Element | ShadowRoot): string {
let value = evaluator._cacheText.get(root);
if (value === undefined) {
value = '';
if (!shouldSkipForTextMatching(root)) {
if ((root instanceof HTMLInputElement) && (root.type === 'submit' || root.type === 'button')) {
value = root.value;
} else {
for (let child = root.firstChild; child; child = child.nextSibling) {
if (child.nodeType === Node.ELEMENT_NODE)
value += elementText(evaluator, child as Element);
else if (child.nodeType === Node.TEXT_NODE)
value += child.nodeValue || '';
}
if ((root as Element).shadowRoot)
value += elementText(evaluator, (root as Element).shadowRoot!);
}
}
evaluator._cacheText.set(root, value);
}
return value;
}

export function elementMatchesText(evaluator: SelectorEvaluatorImpl, element: Element, matcher: (s: string) => boolean): boolean {
if (shouldSkipForTextMatching(element))
return false;
if ((element instanceof HTMLInputElement) && (element.type === 'submit' || element.type === 'button') && matcher(element.value))
return true;
let lastText = '';
if (!matcher(elementText(evaluator, element)))
return false;
for (let child = element.firstChild; child; child = child.nextSibling) {
if (child.nodeType === 3 /* Node.TEXT_NODE */) {
lastText += child.nodeValue;
} else {
if (lastText && matcher(lastText))
return true;
lastText = '';
}
if (child.nodeType === Node.ELEMENT_NODE && matcher(elementText(evaluator, child as Element)))
return false;
}
return !!lastText && matcher(lastText);
if (element.shadowRoot && matcher(elementText(evaluator, element.shadowRoot)))
return false;
return true;
}

function boxRightOf(box1: DOMRect, box2: DOMRect): number | undefined {
Expand Down

0 comments on commit 0cbb2c1

Please sign in to comment.