diff --git a/src/token_position.test.ts b/src/token_position.test.ts
index 01d8aca5..976273c9 100644
--- a/src/token_position.test.ts
+++ b/src/token_position.test.ts
@@ -5,6 +5,7 @@ import {
convertNode,
findElementWithOffset,
getCodeElementsInRange,
+ getTextNodes,
getTokenAtPosition,
HoveredToken,
locateTarget,
@@ -14,25 +15,6 @@ const { expect } = chai
const tabChar = String.fromCharCode(9)
-/**
- * Get the all of the text nodes under a given node in the DOM tree.
- *
- * @param node is the node in which you want to get all of the text nodes from it's children
- */
-export const getTextNodes = (node: Node): Node[] => {
- if (node.childNodes.length === 0 && node.TEXT_NODE === node.nodeType && node.nodeValue) {
- return [node]
- }
-
- const nodes: Node[] = []
-
- for (const child of Array.from(node.childNodes)) {
- nodes.push(...getTextNodes(child))
- }
-
- return nodes
-}
-
describe('token_positions', () => {
const dom = new DOM()
after(dom.cleanup)
@@ -176,6 +158,25 @@ describe('token_positions', () => {
}
}
})
+
+ it('gets the full token, even when it crosses multiple elements', () => {
+ const codeView = dom.createElementFromString('
Token
')
+
+ const positions = [
+ // Test walking to the right
+ { line: 1, character: 1 },
+ // Test walking to the left
+ { line: 1, character: 3 },
+ ]
+
+ for (const position of positions) {
+ const token = getTokenAtPosition(codeView, position, {
+ getCodeElementFromLineNumber: code => code.children.item(0) as HTMLElement,
+ })
+
+ chai.expect(token!.textContent).to.equal('Token')
+ }
+ })
})
describe('locateTarget()', () => {
@@ -204,8 +205,14 @@ describe('token_positions', () => {
const token = found as HoveredToken
- expect(token.line).to.equal(foundPosition.line)
- expect(token.character).to.equal(foundPosition.character)
+ expect(token.line).to.equal(
+ foundPosition.line,
+ `expected line to be ${token.line} but got ${foundPosition.line}`
+ )
+ expect(token.character).to.equal(
+ foundPosition.character,
+ `expected character to be ${token.character} but got ${foundPosition.character}`
+ )
}
}
})
@@ -229,14 +236,12 @@ describe('token_positions', () => {
describe('getCodeElementsInRange()', () => {
it('returns all code elements within a given range on a non-diff code view', () => {
- const codeView = document.createElement('div')
- codeView.innerHTML = `
+ const codeView = dom.createElementFromString(`
Line 1
Line 2
Line 3
Line 4
- Line 5
- `
+ `)
const codeElements = getCodeElementsInRange({
codeView,
position: { line: 2, endLine: 4 },
diff --git a/src/token_position.ts b/src/token_position.ts
index 2e90fd14..d37dddd2 100644
--- a/src/token_position.ts
+++ b/src/token_position.ts
@@ -145,6 +145,54 @@ const VARIABLE_TOKENIZER = /(^\w+)/
const ASCII_CHARACTER_TOKENIZER = /(^[\x21-\x2F|\x3A-\x40|\x5B-\x60|\x7B-\x7E])/
const NONVARIABLE_TOKENIZER = /(^[^\x21-\x7E]+)/
+const enum TokenType {
+ /** Tokens that are alphanumeric, i.e. variable names, keywords */
+ Alphanumeric,
+ /** Tokens that are ascii characters but aren't in identies (i.e. {, }, [, ], |, ;, etc) */
+ ASCII,
+ /** Every token we encounter that doesn't fall into the other two TokenTypes */
+ Other,
+}
+
+/**
+ * Get the type of token we are looking at.
+ *
+ * @param node The node containing the token.
+ */
+function getTokenType(node: Node): TokenType {
+ const text = unescape(node.textContent || '')
+ if (text.length === 0) {
+ return TokenType.Other
+ }
+ const variableMatch = text.match(VARIABLE_TOKENIZER)
+ if (variableMatch) {
+ return TokenType.Alphanumeric
+ }
+ const asciiMatch = text.match(ASCII_CHARACTER_TOKENIZER)
+ if (asciiMatch) {
+ return TokenType.ASCII
+ }
+ return TokenType.Other
+}
+
+/**
+ * Checks to see if the TokenType of node is the same as the provided token type.
+ *
+ * When tokenizing the DOM, alphanumeric characters are grouped because they are identities.
+ *
+ * We also group whitespace just in case. See `consumeNextToken` comments for more information.
+ * This is a helper function for making sure the node is the same type of a token and if we care
+ * about grouping the type of token together.
+ */
+function isSameTokenType(tokenType: TokenType, node: Node): boolean {
+ // We don't care about grouping things like :=, ===, etc
+ if (tokenType === TokenType.ASCII) {
+ return false
+ }
+
+ return tokenType === getTokenType(node)
+}
+
/**
* consumeNextToken parses the text content of a text node and returns the next "distinct"
* code token. It handles edge case #1 from convertNode(). The tokenization scheme is
@@ -177,6 +225,25 @@ function consumeNextToken(txt: string): string {
return txt[0]
}
+/**
+ * Get the all of the text nodes under a given node in the DOM tree.
+ *
+ * @param node is the node in which you want to get all of the text nodes from it's children
+ */
+export const getTextNodes = (node: Node): Node[] => {
+ if (node.childNodes.length === 0 && node.TEXT_NODE === node.nodeType && node.nodeValue) {
+ return [node]
+ }
+
+ const nodes: Node[] = []
+
+ for (const child of node.childNodes) {
+ nodes.push(...getTextNodes(child))
+ }
+
+ return nodes
+}
+
/**
* Returns the (descendent of a containing code) which contains text beginning
* at the specified character offset (1-indexed).
@@ -189,30 +256,62 @@ export function findElementWithOffset(codeElement: HTMLElement, offset: number):
// Without being converted first, finding the position is inaccurate
convertCodeElementIdempotent(codeElement)
- let currOffset = 0
- const walkNode = (currNode: HTMLElement): HTMLElement | undefined => {
- const numChildNodes = currNode.childNodes.length
- for (let i = 0; i < numChildNodes; ++i) {
- const child = currNode.childNodes[i]
- switch (child.nodeType) {
- case Node.TEXT_NODE:
- if (currOffset < offset && currOffset + child.textContent!.length >= offset) {
- return currNode
- }
- currOffset += child.textContent!.length
- continue
-
- case Node.ELEMENT_NODE:
- const found = walkNode(child as HTMLElement)
- if (found) {
- return found
- }
- continue
- }
+ const textNodes = getTextNodes(codeElement)
+
+ // How far forward we have looked so far. Starting at one because codeintellify treats positions as being 1-indexed.
+ let offsetStep = 1
+ let nodeIndex = 0
+
+ // Find the text node that is at the given offset.
+ let targetNode: Node | undefined
+ for (const [i, node] of textNodes.entries()) {
+ const text = node.textContent || ''
+ if (offsetStep <= offset && offsetStep + text.length > offset) {
+ targetNode = node
+ nodeIndex = i
+ break
}
+
+ offsetStep += text.length
+ }
+
+ if (!targetNode) {
return undefined
}
- return walkNode(codeElement)
+
+ const tokenType = getTokenType(targetNode)
+
+ /**
+ * Walk forwards or backwards to find the edge of the actual token, not the DOM element.
+ * This is needed because tokens can span different elements. In diffs, tokens can be colored
+ * differently based if just part of the token changed.
+ *
+ * In other words, its not unexpexted to find a token that looks like: MyToken.
+ * Without doing this, just "My" or "Token" will be highlighted depending on where you hover.
+ *
+ * @param idx the index to start at
+ * @param delta the direction we are walking
+ */
+ const findTokenEdgeIndex = (idx: number, delta: -1 | 1): number => {
+ let at = idx
+
+ while (textNodes[at + delta] && isSameTokenType(tokenType, textNodes[at + delta])) {
+ at += delta
+ }
+
+ return at
+ }
+
+ const startNode = textNodes[findTokenEdgeIndex(nodeIndex, -1)]
+ const endNode = textNodes[findTokenEdgeIndex(nodeIndex, 1)]
+
+ // Create a range spanning from the beginning of the token and the end.
+ const tokenRange = document.createRange()
+ tokenRange.setStartBefore(startNode)
+ tokenRange.setEndAfter(endNode)
+
+ // Return the common ancestor as the full token.
+ return tokenRange.commonAncestorContainer as HTMLElement
}
/**
@@ -354,7 +453,10 @@ export const getCodeElementsInRange = ({
export const getTokenAtPosition = (
codeView: HTMLElement,
{ line, character }: Position,
- { getCodeElementFromLineNumber, isFirstCharacterDiffIndicator }: DOMFunctions,
+ {
+ getCodeElementFromLineNumber,
+ isFirstCharacterDiffIndicator,
+ }: Pick,
part?: DiffPart
): HTMLElement | undefined => {
const codeElement = getCodeElementFromLineNumber(codeView, line, part)
@@ -365,5 +467,6 @@ export const getTokenAtPosition = (
if (isFirstCharacterDiffIndicator && isFirstCharacterDiffIndicator(codeElement)) {
character++
}
+
return findElementWithOffset(codeElement, character)
}
|