Skip to content
This repository was archived by the owner on Nov 25, 2021. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 30 additions & 25 deletions src/token_position.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import {
convertNode,
findElementWithOffset,
getCodeElementsInRange,
getTextNodes,
getTokenAtPosition,
HoveredToken,
locateTarget,
Expand All @@ -14,25 +15,6 @@ const { expect } = chai

const tabChar = String.fromCharCode(9)

/**
* Get the all of the text nodes under a given node in the DOM tree.
*
* @param node is the node in which you want to get all of the text nodes from it's children
*/
export const getTextNodes = (node: Node): Node[] => {
if (node.childNodes.length === 0 && node.TEXT_NODE === node.nodeType && node.nodeValue) {
return [node]
}

const nodes: Node[] = []

for (const child of Array.from(node.childNodes)) {
nodes.push(...getTextNodes(child))
}

return nodes
}

describe('token_positions', () => {
const dom = new DOM()
after(dom.cleanup)
Expand Down Expand Up @@ -176,6 +158,25 @@ describe('token_positions', () => {
}
}
})

it('gets the full token, even when it crosses multiple elements', () => {
const codeView = dom.createElementFromString('<div>To<span>ken</span></div>')

const positions = [
// Test walking to the right
{ line: 1, character: 1 },
// Test walking to the left
{ line: 1, character: 3 },
]

for (const position of positions) {
const token = getTokenAtPosition(codeView, position, {
getCodeElementFromLineNumber: code => code.children.item(0) as HTMLElement,
})

chai.expect(token!.textContent).to.equal('Token')
}
})
})

describe('locateTarget()', () => {
Expand Down Expand Up @@ -204,8 +205,14 @@ describe('token_positions', () => {

const token = found as HoveredToken

expect(token.line).to.equal(foundPosition.line)
expect(token.character).to.equal(foundPosition.character)
expect(token.line).to.equal(
foundPosition.line,
`expected line to be ${token.line} but got ${foundPosition.line}`
)
expect(token.character).to.equal(
foundPosition.character,
`expected character to be ${token.character} but got ${foundPosition.character}`
)
}
}
})
Expand All @@ -229,14 +236,12 @@ describe('token_positions', () => {

describe('getCodeElementsInRange()', () => {
it('returns all code elements within a given range on a non-diff code view', () => {
const codeView = document.createElement('div')
codeView.innerHTML = `
const codeView = dom.createElementFromString(`
<div>Line 1</div>
<div>Line 2</div>
<div>Line 3</div>
<div>Line 4</div>
<div>Line 5</div>
`
`)
const codeElements = getCodeElementsInRange({
codeView,
position: { line: 2, endLine: 4 },
Expand Down
147 changes: 125 additions & 22 deletions src/token_position.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,54 @@ const VARIABLE_TOKENIZER = /(^\w+)/
const ASCII_CHARACTER_TOKENIZER = /(^[\x21-\x2F|\x3A-\x40|\x5B-\x60|\x7B-\x7E])/
const NONVARIABLE_TOKENIZER = /(^[^\x21-\x7E]+)/

const enum TokenType {
/** Tokens that are alphanumeric, i.e. variable names, keywords */
Alphanumeric,
/** Tokens that are ascii characters but aren't in identies (i.e. {, }, [, ], |, ;, etc) */
ASCII,
/** Every token we encounter that doesn't fall into the other two TokenTypes */
Other,
}

/**
* Get the type of token we are looking at.
*
* @param node The node containing the token.
*/
function getTokenType(node: Node): TokenType {
const text = unescape(node.textContent || '')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I dont think unescape is correct here, textContent shouldn’t contain HTML entities, unless the code contains actual HTML entities, in which case this would be wrong.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if (text.length === 0) {
return TokenType.Other
}
const variableMatch = text.match(VARIABLE_TOKENIZER)
if (variableMatch) {
return TokenType.Alphanumeric
}
const asciiMatch = text.match(ASCII_CHARACTER_TOKENIZER)
if (asciiMatch) {
return TokenType.ASCII
}
return TokenType.Other
}

/**
* Checks to see if the TokenType of node is the same as the provided token type.
*
* When tokenizing the DOM, alphanumeric characters are grouped because they are identities.
*
* We also group whitespace just in case. See `consumeNextToken` comments for more information.
* This is a helper function for making sure the node is the same type of a token and if we care
* about grouping the type of token together.
*/
function isSameTokenType(tokenType: TokenType, node: Node): boolean {
// We don't care about grouping things like :=, ===, etc
if (tokenType === TokenType.ASCII) {
return false
}

return tokenType === getTokenType(node)
}

/**
* consumeNextToken parses the text content of a text node and returns the next "distinct"
* code token. It handles edge case #1 from convertNode(). The tokenization scheme is
Expand Down Expand Up @@ -177,6 +225,25 @@ function consumeNextToken(txt: string): string {
return txt[0]
}

/**
* Get the all of the text nodes under a given node in the DOM tree.
*
* @param node is the node in which you want to get all of the text nodes from it's children
*/
export const getTextNodes = (node: Node): Node[] => {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function is a good candidate to write as a generator to avoid creating all the intermediate arrays

if (node.childNodes.length === 0 && node.TEXT_NODE === node.nodeType && node.nodeValue) {
return [node]
}

const nodes: Node[] = []

for (const child of node.childNodes) {
nodes.push(...getTextNodes(child))
}

return nodes
}

/**
* Returns the <span> (descendent of a <td> containing code) which contains text beginning
* at the specified character offset (1-indexed).
Expand All @@ -189,30 +256,62 @@ export function findElementWithOffset(codeElement: HTMLElement, offset: number):
// Without being converted first, finding the position is inaccurate
convertCodeElementIdempotent(codeElement)

let currOffset = 0
const walkNode = (currNode: HTMLElement): HTMLElement | undefined => {
const numChildNodes = currNode.childNodes.length
for (let i = 0; i < numChildNodes; ++i) {
const child = currNode.childNodes[i]
switch (child.nodeType) {
case Node.TEXT_NODE:
if (currOffset < offset && currOffset + child.textContent!.length >= offset) {
return currNode
}
currOffset += child.textContent!.length
continue

case Node.ELEMENT_NODE:
const found = walkNode(child as HTMLElement)
if (found) {
return found
}
continue
}
const textNodes = getTextNodes(codeElement)

// How far forward we have looked so far. Starting at one because codeintellify treats positions as being 1-indexed.
let offsetStep = 1
let nodeIndex = 0

// Find the text node that is at the given offset.
let targetNode: Node | undefined
for (const [i, node] of textNodes.entries()) {
const text = node.textContent || ''
if (offsetStep <= offset && offsetStep + text.length > offset) {
targetNode = node
nodeIndex = i
break
}

offsetStep += text.length
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could probably reduce the scope of the offsetStep variable by factoring this out as a function.


if (!targetNode) {
return undefined
}
return walkNode(codeElement)

const tokenType = getTokenType(targetNode)

/**
* Walk forwards or backwards to find the edge of the actual token, not the DOM element.
* This is needed because tokens can span different elements. In diffs, tokens can be colored
* differently based if just part of the token changed.
*
* In other words, its not unexpexted to find a token that looks like: My<span>Token</span>.
* Without doing this, just "My" or "Token" will be highlighted depending on where you hover.
*
* @param idx the index to start at
* @param delta the direction we are walking
*/
const findTokenEdgeIndex = (idx: number, delta: -1 | 1): number => {
let at = idx

while (textNodes[at + delta] && isSameTokenType(tokenType, textNodes[at + delta])) {
at += delta
}

return at
}

const startNode = textNodes[findTokenEdgeIndex(nodeIndex, -1)]
const endNode = textNodes[findTokenEdgeIndex(nodeIndex, 1)]

// Create a range spanning from the beginning of the token and the end.
const tokenRange = document.createRange()
tokenRange.setStartBefore(startNode)
tokenRange.setEndAfter(endNode)

// Return the common ancestor as the full token.
return tokenRange.commonAncestorContainer as HTMLElement
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could the common ancestor sometimes be so large (i.e. too high up in the DOM tree) that it includes too many nodes (i.e. the hovered token plus some surrounding nodes)? I'm thinking it might be possible to end up with the opposite problem of what this PR solves, but I haven't taken a close enough look at the DOM for diff views to say for sure.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If convertNode works properly, we shouldn't have to worry about this.

}

/**
Expand Down Expand Up @@ -354,7 +453,10 @@ export const getCodeElementsInRange = ({
export const getTokenAtPosition = (
codeView: HTMLElement,
{ line, character }: Position,
{ getCodeElementFromLineNumber, isFirstCharacterDiffIndicator }: DOMFunctions,
{
getCodeElementFromLineNumber,
isFirstCharacterDiffIndicator,
}: Pick<DOMFunctions, 'getCodeElementFromLineNumber' | 'isFirstCharacterDiffIndicator'>,
part?: DiffPart
): HTMLElement | undefined => {
const codeElement = getCodeElementFromLineNumber(codeView, line, part)
Expand All @@ -365,5 +467,6 @@ export const getTokenAtPosition = (
if (isFirstCharacterDiffIndicator && isFirstCharacterDiffIndicator(codeElement)) {
character++
}

return findElementWithOffset(codeElement, character)
}