Skip to content

Commit

Permalink
feat: update getValueMatchSelector to return multiple matches
Browse files Browse the repository at this point in the history
Renamed to `getValueMatches` to avoid confusion
with other selector code
  • Loading branch information
aldeed committed Sep 11, 2020
1 parent 62f749c commit 97d7465
Show file tree
Hide file tree
Showing 3 changed files with 184 additions and 61 deletions.
7 changes: 3 additions & 4 deletions src/web/cues.ts
@@ -1,6 +1,6 @@
import { getAttribute } from './attribute';
import { getElementText } from './selectorEngine';
import { getValueMatchSelector, isDynamic } from './isDynamic';
import { getValueMatches, isDynamic } from './isDynamic';

const DYNAMIC_VALUE_OK_ATTRIBUTES = ['href', 'src', 'value'];

Expand Down Expand Up @@ -254,15 +254,14 @@ export const buildCuesForElement = ({
value: `[${name}="${value}"]`,
});
} else {
const { match, operator } = getValueMatchSelector(value) || {};
if (match) {
getValueMatches(value).forEach(({ match, operator }) => {
list.push({
level,
penalty,
type: 'attribute',
value: `[${name}${operator}"${match}"]`,
});
}
});
}
}
break;
Expand Down
147 changes: 91 additions & 56 deletions src/web/isDynamic.ts
Expand Up @@ -3,7 +3,7 @@ import htmlTags from 'html-tags';
// eslint-disable-next-line @typescript-eslint/no-var-requires
const englishWords = require('an-array-of-english-words/index.json');

type ValueMatchSelectorType =
type ValueMatchOperatorType =
| 'startsWith'
| 'endsWith'
| 'contains'
Expand All @@ -15,14 +15,14 @@ type ValueMatchOperator =
| '*='
| '=';

type ValueMatchSelector = {
type ValueMatch = {
match: string;
operator: ValueMatchOperator;
type: ValueMatchSelectorType;
type: ValueMatchOperatorType;
startPosition: number;
};

const matchOperators = new Map<ValueMatchSelectorType, ValueMatchOperator>([
const matchOperators = new Map<ValueMatchOperatorType, ValueMatchOperator>([
['startsWith', '^='],
['endsWith', '$='],
['contains', '*='],
Expand Down Expand Up @@ -80,10 +80,25 @@ export const getTokens = (value: string): string[] => {
return tokens.map((token) => token.toLowerCase());
};

/**
* @summary Given a value string that has already been pieced out,
* determines whether it appears to be dynamically generated (random, non-word)
* @param {String} value The string to check
* @return {Boolean} True if it appears to be dynamically generated
*/
export const tokenIsDynamic = (value: string): boolean => {
return !allWords.has(value);
};

/**
* @summary Given an attribute value, breaks it apart into pieces/words, and
* then determines how many pieces are dynamically generated.
* @param {String} value The attribute value to check
* @param {Number} [threshold=0.5] Provide a threshold override if necessary
* @return {Boolean} If two or more pieces are dynamic, or if 1 out of 2 pieces
* or 1 out of 1 piece are dynamic, returns true. Also returns `true` if
* `value` is not a string.
*/
export const isDynamic = (
value: string,
threshold = SCORE_THRESHOLD,
Expand All @@ -101,85 +116,105 @@ export const isDynamic = (
return dynamicTokens.length / tokens.length >= threshold;
};

export const getValueMatchSelector = (
/**
* @summary Given an attribute value, determines the best ways to match on only
* the pieces of it that appear to be static (regular words that don't seem
* to be dynamically generated).
*
* Examples:
*
* - For 'input-bj84jd9' it will suggest a starts-with match on `input-`
* - For 'bj84jd9-input' it will suggest an ends-with match on `-input`
* - For '25-input-bj84jd9' it will suggest a contains match on `-input-`
* - For 'bj84jd9' it will return `null` because the whole value is dynamic
* - For '', null, or undefined it will return `null`
* - For 'input-25-red-bj84jd9' it will suggest two matches: a starts-with
* match on `input-` and a contains match on `-red-`.
*
* @param {String|null|undefined} value The attribute value to examine
* @return {Object[]} List of possible value matches, empty if no static pieces are found
*/
export const getValueMatches = (
value: string,
): ValueMatchSelector | null => {
if (!value || typeof value !== 'string' || value.length === 0) return null;
): ValueMatch[] => {
if (!value || typeof value !== 'string' || value.length === 0) return [];

// Break the value into tokens, each of which may be words, numbers, or something else.
const tokens = getTokens(value);

let currentPosition = 0;
let currentSubstring = '';
let blockCount = 0;
let currentStaticBlock = '';
let lastTokenType: string;
let longestSubstring = '';
let longestSubstringStart = 0;
let type: ValueMatchSelectorType;

const checkLongest = (isEnd = false): void => {
if (currentSubstring.length > longestSubstring.length) {
longestSubstring = currentSubstring;
longestSubstringStart = currentPosition - currentSubstring.length;

if (longestSubstringStart === 0) {
type = 'startsWith';
} else {
const lastCharOfPreviousBlock = value[longestSubstringStart - 1];
if (SPLIT_CHARACTERS.includes(lastCharOfPreviousBlock)) {
longestSubstring = lastCharOfPreviousBlock + longestSubstring;
longestSubstringStart = longestSubstringStart - 1;
}
type = isEnd ? 'endsWith' : 'contains';
}
const staticMatches: ValueMatch[] = [];

const addMatchToList = (): void => {
const startPosition = currentPosition - currentStaticBlock.length;

// Determine what type of match this would be
let type: ValueMatchOperatorType;
if (currentStaticBlock.length === value.length) {
type = 'equals';
} else if (startPosition === 0) {
type = 'startsWith';
} else if (currentPosition === value.length) {
type = 'endsWith';
} else {
type = 'contains';
}
currentSubstring = '';

staticMatches.push({
match: currentStaticBlock,
operator: matchOperators.get(type),
type,
startPosition,
});
};

// There may be multiple dynamic tokens in a row or multiple static in a row.
// What we care about is the "token blocks", which is each group of same-type
// tokens. Thus we loop through, checking the type of each, and taking action
// only when we switch between types.
for (const token of tokens) {
const tokenType = tokenIsDynamic(token) ? 'dynamic' : 'static';

if (blockCount === 0 || tokenType !== lastTokenType) {
blockCount += 1;
// Whenever we finish a static block, add it to the list.
if (tokenType === 'dynamic' && lastTokenType === 'static') {
addMatchToList();
} else if (tokenType === 'static' && lastTokenType === 'dynamic') {
// When we start a new static block after a dynamic block, reset the current
// block string, and potentially add the previous split character to it.
const lastCharOfPreviousBlock = value[currentPosition - 1];
if (SPLIT_CHARACTERS.includes(lastCharOfPreviousBlock)) {
currentStaticBlock = lastCharOfPreviousBlock;
} else {
currentStaticBlock = '';
}
}

if (tokenType === 'dynamic' && lastTokenType === 'static') {
checkLongest();
// Add only static tokens to the end of the current static block string.
// We are re-combining static tokens that were pieced apart by `getTokens`.
// We could just append the token string itself, but it has been lowercased.
if (tokenType === 'static') {
currentStaticBlock += value.substr(currentPosition, token.length);
}

if (tokenType === 'static') currentSubstring += value.substr(currentPosition, token.length);
// Keep track of where we are in the original value string
currentPosition += token.length;

// Add back in the split-by character
const nextCharacter = value[currentPosition];
if (SPLIT_CHARACTERS.includes(nextCharacter)) {
if (tokenType === 'static') currentSubstring += nextCharacter;
if (tokenType === 'static') currentStaticBlock += nextCharacter;
currentPosition += 1;
}

lastTokenType = tokenType;
}

if (blockCount === 1) {
// Entire string was dynamic, so we can't match on any part of it
if (lastTokenType === 'dynamic') return null;

// Entire string was static, so we can match the whole thing
longestSubstring = value;
longestSubstringStart = 0;
type = 'equals';
} else if (lastTokenType === 'static') {
// Do final check for longest if last token type was static
checkLongest(true);
}

const selectorInfo = {
match: longestSubstring,
operator: matchOperators.get(type),
type,
startPosition: longestSubstringStart,
};
// Do final check for longest if last token type was static
if (lastTokenType === 'static') addMatchToList();

console.debug('selector info for "%s": %j', value, selectorInfo);
console.debug('value matches for "%s": %j', value, staticMatches);

return selectorInfo;
return staticMatches;
};
91 changes: 90 additions & 1 deletion test/web/isDynamic.test.ts
@@ -1,4 +1,4 @@
import { getTokens, isDynamic } from '../../src/web/isDynamic';
import { getValueMatches, getTokens, isDynamic } from '../../src/web/isDynamic';

describe('getTokens', () => {
it('splits space, dash, underscore, colon', () => {
Expand Down Expand Up @@ -80,3 +80,92 @@ describe('isDynamic', () => {
expect(isDynamic(example)).toBe(false);
});
});

describe('getValueMatches', () => {
test('all dynamic', () => {
const matches = getValueMatches('bj84jd9');
expect(matches.length).toBe(0);
});

test('all static', () => {
const matches = getValueMatches('firstName');
expect(matches).toMatchInlineSnapshot(`
Array [
Object {
"match": "firstName",
"operator": "=",
"startPosition": 0,
"type": "equals",
},
]
`);
});

test('starts with static', () => {
const matches = getValueMatches('input-bj84jd9');
expect(matches).toMatchInlineSnapshot(`
Array [
Object {
"match": "input-",
"operator": "^=",
"startPosition": 0,
"type": "startsWith",
},
]
`);
});

test('ends with static', () => {
const matches = getValueMatches('bj84jd9-input');
expect(matches).toMatchInlineSnapshot(`
Array [
Object {
"match": "-input",
"operator": "$=",
"startPosition": 7,
"type": "endsWith",
},
]
`);
});

test('contains static', () => {
const matches = getValueMatches('25-input-bj84jd9');
expect(matches).toMatchInlineSnapshot(`
Array [
Object {
"match": "-input-",
"operator": "*=",
"startPosition": 2,
"type": "contains",
},
]
`);
});

test('multiple matches', () => {
const matches = getValueMatches('input-25-red-bj84jd9-lastName');
expect(matches).toMatchInlineSnapshot(`
Array [
Object {
"match": "input-",
"operator": "^=",
"startPosition": 0,
"type": "startsWith",
},
Object {
"match": "-red-",
"operator": "*=",
"startPosition": 8,
"type": "contains",
},
Object {
"match": "-lastName",
"operator": "$=",
"startPosition": 20,
"type": "endsWith",
},
]
`);
});
});

0 comments on commit 97d7465

Please sign in to comment.