Skip to content
This repository was archived by the owner on Nov 25, 2021. It is now read-only.

Commit 9938078

Browse files
authored
fix: getTokenAtPositon gets the full token, even when it spans elements (#41)
1 parent 178b32e commit 9938078

File tree

2 files changed

+155
-47
lines changed

2 files changed

+155
-47
lines changed

src/token_position.test.ts

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import {
55
convertNode,
66
findElementWithOffset,
77
getCodeElementsInRange,
8+
getTextNodes,
89
getTokenAtPosition,
910
HoveredToken,
1011
locateTarget,
@@ -14,25 +15,6 @@ const { expect } = chai
1415

1516
const tabChar = String.fromCharCode(9)
1617

17-
/**
18-
* Get the all of the text nodes under a given node in the DOM tree.
19-
*
20-
* @param node is the node in which you want to get all of the text nodes from it's children
21-
*/
22-
export const getTextNodes = (node: Node): Node[] => {
23-
if (node.childNodes.length === 0 && node.TEXT_NODE === node.nodeType && node.nodeValue) {
24-
return [node]
25-
}
26-
27-
const nodes: Node[] = []
28-
29-
for (const child of Array.from(node.childNodes)) {
30-
nodes.push(...getTextNodes(child))
31-
}
32-
33-
return nodes
34-
}
35-
3618
describe('token_positions', () => {
3719
const dom = new DOM()
3820
after(dom.cleanup)
@@ -176,6 +158,25 @@ describe('token_positions', () => {
176158
}
177159
}
178160
})
161+
162+
it('gets the full token, even when it crosses multiple elements', () => {
163+
const codeView = dom.createElementFromString('<div>To<span>ken</span></div>')
164+
165+
const positions = [
166+
// Test walking to the right
167+
{ line: 1, character: 1 },
168+
// Test walking to the left
169+
{ line: 1, character: 3 },
170+
]
171+
172+
for (const position of positions) {
173+
const token = getTokenAtPosition(codeView, position, {
174+
getCodeElementFromLineNumber: code => code.children.item(0) as HTMLElement,
175+
})
176+
177+
chai.expect(token!.textContent).to.equal('Token')
178+
}
179+
})
179180
})
180181

181182
describe('locateTarget()', () => {
@@ -204,8 +205,14 @@ describe('token_positions', () => {
204205

205206
const token = found as HoveredToken
206207

207-
expect(token.line).to.equal(foundPosition.line)
208-
expect(token.character).to.equal(foundPosition.character)
208+
expect(token.line).to.equal(
209+
foundPosition.line,
210+
`expected line to be ${token.line} but got ${foundPosition.line}`
211+
)
212+
expect(token.character).to.equal(
213+
foundPosition.character,
214+
`expected character to be ${token.character} but got ${foundPosition.character}`
215+
)
209216
}
210217
}
211218
})
@@ -229,14 +236,12 @@ describe('token_positions', () => {
229236

230237
describe('getCodeElementsInRange()', () => {
231238
it('returns all code elements within a given range on a non-diff code view', () => {
232-
const codeView = document.createElement('div')
233-
codeView.innerHTML = `
239+
const codeView = dom.createElementFromString(`
234240
<div>Line 1</div>
235241
<div>Line 2</div>
236242
<div>Line 3</div>
237243
<div>Line 4</div>
238-
<div>Line 5</div>
239-
`
244+
`)
240245
const codeElements = getCodeElementsInRange({
241246
codeView,
242247
position: { line: 2, endLine: 4 },

src/token_position.ts

Lines changed: 125 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,54 @@ const VARIABLE_TOKENIZER = /(^\w+)/
145145
const ASCII_CHARACTER_TOKENIZER = /(^[\x21-\x2F|\x3A-\x40|\x5B-\x60|\x7B-\x7E])/
146146
const NONVARIABLE_TOKENIZER = /(^[^\x21-\x7E]+)/
147147

148+
const enum TokenType {
149+
/** Tokens that are alphanumeric, i.e. variable names, keywords */
150+
Alphanumeric,
151+
/** Tokens that are ascii characters but aren't in identies (i.e. {, }, [, ], |, ;, etc) */
152+
ASCII,
153+
/** Every token we encounter that doesn't fall into the other two TokenTypes */
154+
Other,
155+
}
156+
157+
/**
158+
* Get the type of token we are looking at.
159+
*
160+
* @param node The node containing the token.
161+
*/
162+
function getTokenType(node: Node): TokenType {
163+
const text = unescape(node.textContent || '')
164+
if (text.length === 0) {
165+
return TokenType.Other
166+
}
167+
const variableMatch = text.match(VARIABLE_TOKENIZER)
168+
if (variableMatch) {
169+
return TokenType.Alphanumeric
170+
}
171+
const asciiMatch = text.match(ASCII_CHARACTER_TOKENIZER)
172+
if (asciiMatch) {
173+
return TokenType.ASCII
174+
}
175+
return TokenType.Other
176+
}
177+
178+
/**
179+
* Checks to see if the TokenType of node is the same as the provided token type.
180+
*
181+
* When tokenizing the DOM, alphanumeric characters are grouped because they are identities.
182+
*
183+
* We also group whitespace just in case. See `consumeNextToken` comments for more information.
184+
* This is a helper function for making sure the node is the same type of a token and if we care
185+
* about grouping the type of token together.
186+
*/
187+
function isSameTokenType(tokenType: TokenType, node: Node): boolean {
188+
// We don't care about grouping things like :=, ===, etc
189+
if (tokenType === TokenType.ASCII) {
190+
return false
191+
}
192+
193+
return tokenType === getTokenType(node)
194+
}
195+
148196
/**
149197
* consumeNextToken parses the text content of a text node and returns the next "distinct"
150198
* code token. It handles edge case #1 from convertNode(). The tokenization scheme is
@@ -177,6 +225,25 @@ function consumeNextToken(txt: string): string {
177225
return txt[0]
178226
}
179227

228+
/**
229+
* Get the all of the text nodes under a given node in the DOM tree.
230+
*
231+
* @param node is the node in which you want to get all of the text nodes from it's children
232+
*/
233+
export const getTextNodes = (node: Node): Node[] => {
234+
if (node.childNodes.length === 0 && node.TEXT_NODE === node.nodeType && node.nodeValue) {
235+
return [node]
236+
}
237+
238+
const nodes: Node[] = []
239+
240+
for (const child of node.childNodes) {
241+
nodes.push(...getTextNodes(child))
242+
}
243+
244+
return nodes
245+
}
246+
180247
/**
181248
* Returns the <span> (descendent of a <td> containing code) which contains text beginning
182249
* at the specified character offset (1-indexed).
@@ -189,30 +256,62 @@ export function findElementWithOffset(codeElement: HTMLElement, offset: number):
189256
// Without being converted first, finding the position is inaccurate
190257
convertCodeElementIdempotent(codeElement)
191258

192-
let currOffset = 0
193-
const walkNode = (currNode: HTMLElement): HTMLElement | undefined => {
194-
const numChildNodes = currNode.childNodes.length
195-
for (let i = 0; i < numChildNodes; ++i) {
196-
const child = currNode.childNodes[i]
197-
switch (child.nodeType) {
198-
case Node.TEXT_NODE:
199-
if (currOffset < offset && currOffset + child.textContent!.length >= offset) {
200-
return currNode
201-
}
202-
currOffset += child.textContent!.length
203-
continue
204-
205-
case Node.ELEMENT_NODE:
206-
const found = walkNode(child as HTMLElement)
207-
if (found) {
208-
return found
209-
}
210-
continue
211-
}
259+
const textNodes = getTextNodes(codeElement)
260+
261+
// How far forward we have looked so far. Starting at one because codeintellify treats positions as being 1-indexed.
262+
let offsetStep = 1
263+
let nodeIndex = 0
264+
265+
// Find the text node that is at the given offset.
266+
let targetNode: Node | undefined
267+
for (const [i, node] of textNodes.entries()) {
268+
const text = node.textContent || ''
269+
if (offsetStep <= offset && offsetStep + text.length > offset) {
270+
targetNode = node
271+
nodeIndex = i
272+
break
212273
}
274+
275+
offsetStep += text.length
276+
}
277+
278+
if (!targetNode) {
213279
return undefined
214280
}
215-
return walkNode(codeElement)
281+
282+
const tokenType = getTokenType(targetNode)
283+
284+
/**
285+
* Walk forwards or backwards to find the edge of the actual token, not the DOM element.
286+
* This is needed because tokens can span different elements. In diffs, tokens can be colored
287+
* differently based if just part of the token changed.
288+
*
289+
* In other words, its not unexpexted to find a token that looks like: My<span>Token</span>.
290+
* Without doing this, just "My" or "Token" will be highlighted depending on where you hover.
291+
*
292+
* @param idx the index to start at
293+
* @param delta the direction we are walking
294+
*/
295+
const findTokenEdgeIndex = (idx: number, delta: -1 | 1): number => {
296+
let at = idx
297+
298+
while (textNodes[at + delta] && isSameTokenType(tokenType, textNodes[at + delta])) {
299+
at += delta
300+
}
301+
302+
return at
303+
}
304+
305+
const startNode = textNodes[findTokenEdgeIndex(nodeIndex, -1)]
306+
const endNode = textNodes[findTokenEdgeIndex(nodeIndex, 1)]
307+
308+
// Create a range spanning from the beginning of the token and the end.
309+
const tokenRange = document.createRange()
310+
tokenRange.setStartBefore(startNode)
311+
tokenRange.setEndAfter(endNode)
312+
313+
// Return the common ancestor as the full token.
314+
return tokenRange.commonAncestorContainer as HTMLElement
216315
}
217316

218317
/**
@@ -354,7 +453,10 @@ export const getCodeElementsInRange = ({
354453
export const getTokenAtPosition = (
355454
codeView: HTMLElement,
356455
{ line, character }: Position,
357-
{ getCodeElementFromLineNumber, isFirstCharacterDiffIndicator }: DOMFunctions,
456+
{
457+
getCodeElementFromLineNumber,
458+
isFirstCharacterDiffIndicator,
459+
}: Pick<DOMFunctions, 'getCodeElementFromLineNumber' | 'isFirstCharacterDiffIndicator'>,
358460
part?: DiffPart
359461
): HTMLElement | undefined => {
360462
const codeElement = getCodeElementFromLineNumber(codeView, line, part)
@@ -365,5 +467,6 @@ export const getTokenAtPosition = (
365467
if (isFirstCharacterDiffIndicator && isFirstCharacterDiffIndicator(codeElement)) {
366468
character++
367469
}
470+
368471
return findElementWithOffset(codeElement, character)
369472
}

0 commit comments

Comments
 (0)