@@ -145,6 +145,54 @@ const VARIABLE_TOKENIZER = /(^\w+)/
145145const ASCII_CHARACTER_TOKENIZER = / ( ^ [ \x21 - \x2F | \x3A - \x40 | \x5B - \x60 | \x7B - \x7E ] ) /
146146const NONVARIABLE_TOKENIZER = / ( ^ [ ^ \x21 - \x7E ] + ) /
147147
148+ const enum TokenType {
149+ /** Tokens that are alphanumeric, i.e. variable names, keywords */
150+ Alphanumeric ,
151+ /** Tokens that are ascii characters but aren't in identies (i.e. {, }, [, ], |, ;, etc) */
152+ ASCII ,
153+ /** Every token we encounter that doesn't fall into the other two TokenTypes */
154+ Other ,
155+ }
156+
157+ /**
158+ * Get the type of token we are looking at.
159+ *
160+ * @param node The node containing the token.
161+ */
162+ function getTokenType ( node : Node ) : TokenType {
163+ const text = unescape ( node . textContent || '' )
164+ if ( text . length === 0 ) {
165+ return TokenType . Other
166+ }
167+ const variableMatch = text . match ( VARIABLE_TOKENIZER )
168+ if ( variableMatch ) {
169+ return TokenType . Alphanumeric
170+ }
171+ const asciiMatch = text . match ( ASCII_CHARACTER_TOKENIZER )
172+ if ( asciiMatch ) {
173+ return TokenType . ASCII
174+ }
175+ return TokenType . Other
176+ }
177+
178+ /**
179+ * Checks to see if the TokenType of node is the same as the provided token type.
180+ *
181+ * When tokenizing the DOM, alphanumeric characters are grouped because they are identities.
182+ *
183+ * We also group whitespace just in case. See `consumeNextToken` comments for more information.
184+ * This is a helper function for making sure the node is the same type of a token and if we care
185+ * about grouping the type of token together.
186+ */
187+ function isSameTokenType ( tokenType : TokenType , node : Node ) : boolean {
188+ // We don't care about grouping things like :=, ===, etc
189+ if ( tokenType === TokenType . ASCII ) {
190+ return false
191+ }
192+
193+ return tokenType === getTokenType ( node )
194+ }
195+
148196/**
149197 * consumeNextToken parses the text content of a text node and returns the next "distinct"
150198 * code token. It handles edge case #1 from convertNode(). The tokenization scheme is
@@ -177,6 +225,25 @@ function consumeNextToken(txt: string): string {
177225 return txt [ 0 ]
178226}
179227
228+ /**
229+ * Get the all of the text nodes under a given node in the DOM tree.
230+ *
231+ * @param node is the node in which you want to get all of the text nodes from it's children
232+ */
233+ export const getTextNodes = ( node : Node ) : Node [ ] => {
234+ if ( node . childNodes . length === 0 && node . TEXT_NODE === node . nodeType && node . nodeValue ) {
235+ return [ node ]
236+ }
237+
238+ const nodes : Node [ ] = [ ]
239+
240+ for ( const child of node . childNodes ) {
241+ nodes . push ( ...getTextNodes ( child ) )
242+ }
243+
244+ return nodes
245+ }
246+
180247/**
181248 * Returns the <span> (descendent of a <td> containing code) which contains text beginning
182249 * at the specified character offset (1-indexed).
@@ -189,30 +256,62 @@ export function findElementWithOffset(codeElement: HTMLElement, offset: number):
189256 // Without being converted first, finding the position is inaccurate
190257 convertCodeElementIdempotent ( codeElement )
191258
192- let currOffset = 0
193- const walkNode = ( currNode : HTMLElement ) : HTMLElement | undefined => {
194- const numChildNodes = currNode . childNodes . length
195- for ( let i = 0 ; i < numChildNodes ; ++ i ) {
196- const child = currNode . childNodes [ i ]
197- switch ( child . nodeType ) {
198- case Node . TEXT_NODE :
199- if ( currOffset < offset && currOffset + child . textContent ! . length >= offset ) {
200- return currNode
201- }
202- currOffset += child . textContent ! . length
203- continue
204-
205- case Node . ELEMENT_NODE :
206- const found = walkNode ( child as HTMLElement )
207- if ( found ) {
208- return found
209- }
210- continue
211- }
259+ const textNodes = getTextNodes ( codeElement )
260+
261+ // How far forward we have looked so far. Starting at one because codeintellify treats positions as being 1-indexed.
262+ let offsetStep = 1
263+ let nodeIndex = 0
264+
265+ // Find the text node that is at the given offset.
266+ let targetNode : Node | undefined
267+ for ( const [ i , node ] of textNodes . entries ( ) ) {
268+ const text = node . textContent || ''
269+ if ( offsetStep <= offset && offsetStep + text . length > offset ) {
270+ targetNode = node
271+ nodeIndex = i
272+ break
212273 }
274+
275+ offsetStep += text . length
276+ }
277+
278+ if ( ! targetNode ) {
213279 return undefined
214280 }
215- return walkNode ( codeElement )
281+
282+ const tokenType = getTokenType ( targetNode )
283+
284+ /**
285+ * Walk forwards or backwards to find the edge of the actual token, not the DOM element.
286+ * This is needed because tokens can span different elements. In diffs, tokens can be colored
287+ * differently based if just part of the token changed.
288+ *
289+ * In other words, its not unexpexted to find a token that looks like: My<span>Token</span>.
290+ * Without doing this, just "My" or "Token" will be highlighted depending on where you hover.
291+ *
292+ * @param idx the index to start at
293+ * @param delta the direction we are walking
294+ */
295+ const findTokenEdgeIndex = ( idx : number , delta : - 1 | 1 ) : number => {
296+ let at = idx
297+
298+ while ( textNodes [ at + delta ] && isSameTokenType ( tokenType , textNodes [ at + delta ] ) ) {
299+ at += delta
300+ }
301+
302+ return at
303+ }
304+
305+ const startNode = textNodes [ findTokenEdgeIndex ( nodeIndex , - 1 ) ]
306+ const endNode = textNodes [ findTokenEdgeIndex ( nodeIndex , 1 ) ]
307+
308+ // Create a range spanning from the beginning of the token and the end.
309+ const tokenRange = document . createRange ( )
310+ tokenRange . setStartBefore ( startNode )
311+ tokenRange . setEndAfter ( endNode )
312+
313+ // Return the common ancestor as the full token.
314+ return tokenRange . commonAncestorContainer as HTMLElement
216315}
217316
218317/**
@@ -354,7 +453,10 @@ export const getCodeElementsInRange = ({
354453export const getTokenAtPosition = (
355454 codeView : HTMLElement ,
356455 { line, character } : Position ,
357- { getCodeElementFromLineNumber, isFirstCharacterDiffIndicator } : DOMFunctions ,
456+ {
457+ getCodeElementFromLineNumber,
458+ isFirstCharacterDiffIndicator,
459+ } : Pick < DOMFunctions , 'getCodeElementFromLineNumber' | 'isFirstCharacterDiffIndicator' > ,
358460 part ?: DiffPart
359461) : HTMLElement | undefined => {
360462 const codeElement = getCodeElementFromLineNumber ( codeView , line , part )
@@ -365,5 +467,6 @@ export const getTokenAtPosition = (
365467 if ( isFirstCharacterDiffIndicator && isFirstCharacterDiffIndicator ( codeElement ) ) {
366468 character ++
367469 }
470+
368471 return findElementWithOffset ( codeElement , character )
369472}
0 commit comments