From 86386e4a38a8357992ede4693a88e11f1d7f576b Mon Sep 17 00:00:00 2001 From: Varun Gandhi Date: Fri, 24 Nov 2023 17:05:40 +0800 Subject: [PATCH 1/2] client: Minor cleanup for search-based code intel (#58331) The separation of the logic into different functions makes it clearer what the order of searches is. It also makes it clearer that for some reason, we're only using the locals information from the SCIP Document for 'Find references', and not for 'Go to definition'. Using the SCIP Document for for 'Go to definition' too could avoid a network request. (cherry-picked from e955cddec490d0cc2b5eba36be2ec4958ba06bf8) --- .../src/enterprise/codeintel/searchBased.ts | 33 ++- .../codeintel/useSearchBasedCodeIntel.ts | 262 +++++++++--------- 2 files changed, 148 insertions(+), 147 deletions(-) diff --git a/client/web/src/enterprise/codeintel/searchBased.ts b/client/web/src/enterprise/codeintel/searchBased.ts index 0e7ece16f98b..83973f1f594a 100644 --- a/client/web/src/enterprise/codeintel/searchBased.ts +++ b/client/web/src/enterprise/codeintel/searchBased.ts @@ -110,11 +110,11 @@ function makeRepositoryPattern(repo: string): string { /** The time in ms to delay between unindexed search request and the fallback indexed search request. */ const DEFAULT_UNINDEXED_SEARCH_TIMEOUT_MS = 5000 +export type RepoFilter = 'current-repo' | 'all-other-repos' + /** * Invoke the given search function by modifying the query with a term that will - * only look in the current git tree by appending a repo filter with the repo name - * and the current commit or, if `negateRepoFilter` is set, outside of current git - * tree. + * look in certain git trees (controlled by `repoFilter`). * * This is likely to timeout on large repos or organizations with monorepos if the * current commit is not an indexed commit. Instead of waiting for a timeout, we @@ -125,7 +125,6 @@ const DEFAULT_UNINDEXED_SEARCH_TIMEOUT_MS = 5000 * * @param search The search function. * @param args The arguments to the search function. - * @param negateRepoFilter Whether to look only inside or outside the given repo. */ export function searchWithFallback< P extends { @@ -136,14 +135,14 @@ export function searchWithFallback< queryTerms: string[] }, R ->(search: (args: P) => Promise, args: P, negateRepoFilter = false, getSetting: SettingsGetter): Promise { +>(search: (args: P) => Promise, args: P, repoFilter: RepoFilter, getSetting: SettingsGetter): Promise { if (getSetting('basicCodeIntel.indexOnly', false)) { - return searchIndexed(search, args, negateRepoFilter, getSetting) + return searchIndexed(search, args, repoFilter, getSetting) } return raceWithDelayOffset( - searchUnindexed(search, args, negateRepoFilter, getSetting), - () => searchIndexed(search, args, negateRepoFilter, getSetting), + searchUnindexed(search, args, repoFilter, getSetting), + () => searchIndexed(search, args, repoFilter, getSetting), getSetting('basicCodeIntel.unindexedSearchTimeout', DEFAULT_UNINDEXED_SEARCH_TIMEOUT_MS) ) } @@ -153,18 +152,16 @@ export function searchWithFallback< * * @param search The search function. * @param args The arguments to the search function. - * @param negateRepoFilter Whether to look only inside or outside the given repo. */ function searchIndexed< P extends { repo: string isFork: boolean isArchived: boolean - commit: string queryTerms: string[] }, R ->(search: (args: P) => Promise, args: P, negateRepoFilter = false, getSetting: SettingsGetter): Promise { +>(search: (args: P) => Promise, args: P, repoFilter: RepoFilter, getSetting: SettingsGetter): Promise { const { repo, isFork, isArchived, queryTerms } = args // Create a copy of the args so that concurrent calls to other @@ -175,14 +172,16 @@ function searchIndexed< // Unlike unindexed search, we can't supply a commit as that particular // commit may not be indexed. We force index and look inside/outside // the repo at _whatever_ commit happens to be indexed at the time. - queryTermsCopy.push((negateRepoFilter ? '-' : '') + `repo:${makeRepositoryPattern(repo)}`) + const isCurrentRepoSearch = repoFilter === 'current-repo' + const prefix = isCurrentRepoSearch ? '' : '-' + queryTermsCopy.push(prefix + `repo:${makeRepositoryPattern(repo)}`) queryTermsCopy.push('index:only') // If we're a fork, search in forks _for the same repo_. Otherwise, // search in forks only if it's set in the settings. This is also // symmetric for archived repositories. queryTermsCopy.push( - ...repositoryKindTerms(isFork && !negateRepoFilter, isArchived && !negateRepoFilter, getSetting) + ...repositoryKindTerms(isFork && isCurrentRepoSearch, isArchived && isCurrentRepoSearch, getSetting) ) return search({ ...args, queryTerms: queryTermsCopy }) @@ -193,7 +192,6 @@ function searchIndexed< * * @param search The search function. * @param args The arguments to the search function. - * @param negateRepoFilter Whether to look only inside or outside the given repo. */ function searchUnindexed< P extends { @@ -204,7 +202,7 @@ function searchUnindexed< queryTerms: string[] }, R ->(search: (args: P) => Promise, args: P, negateRepoFilter = false, getSetting: SettingsGetter): Promise { +>(search: (args: P) => Promise, args: P, repoFilter: RepoFilter, getSetting: SettingsGetter): Promise { const { repo, isFork, isArchived, commit, queryTerms } = args // Create a copy of the args so that concurrent calls to other @@ -212,7 +210,8 @@ function searchUnindexed< // modified. const queryTermsCopy = [...queryTerms] - if (!negateRepoFilter) { + const isCurrentRepoSearch = repoFilter === 'current-repo' + if (isCurrentRepoSearch) { // Look in this commit only queryTermsCopy.push(`repo:${makeRepositoryPattern(repo)}@${commit}`) } else { @@ -224,7 +223,7 @@ function searchUnindexed< // search in forks only if it's set in the settings. This is also // symmetric for archived repositories. queryTermsCopy.push( - ...repositoryKindTerms(isFork && !negateRepoFilter, isArchived && !negateRepoFilter, getSetting) + ...repositoryKindTerms(isFork && isCurrentRepoSearch, isArchived && isCurrentRepoSearch, getSetting) ) return search({ ...args, queryTerms: queryTermsCopy }) diff --git a/client/web/src/enterprise/codeintel/useSearchBasedCodeIntel.ts b/client/web/src/enterprise/codeintel/useSearchBasedCodeIntel.ts index 9d600bd03fd6..2e7c49b6c62c 100644 --- a/client/web/src/enterprise/codeintel/useSearchBasedCodeIntel.ts +++ b/client/web/src/enterprise/codeintel/useSearchBasedCodeIntel.ts @@ -29,6 +29,7 @@ import { type SearchResult, searchResultToResults, searchWithFallback, + type RepoFilter, } from './searchBased' import { sortByProximity } from './sort' @@ -120,149 +121,139 @@ export const useSearchBasedCodeIntel = (options: UseSearchBasedCodeIntelOptions) } } -// searchBasedReferences is 90% copy&paste from code-intel-extension's -export async function searchBasedReferences({ - repo, - isFork, - isArchived, - commit, - searchToken, - path, - position, - fileContent, - spec, - getSetting, - filter, -}: UseSearchBasedCodeIntelOptions): Promise { +function searchBasedReferencesViaSCIPLocals(options: UseSearchBasedCodeIntelOptions): Location[] | undefined { const view = getBlobEditView() - if (view !== null) { - const occurrences = view.state.facet(syntaxHighlight).occurrences - for (const occurrence of occurrences) { - if ( - occurrence.symbol?.startsWith('local ') && - occurrence.range.contains(new ScipPosition(position.line, position.character)) - ) { - return occurrences - .filter(reference => reference.symbol === occurrence.symbol) - .map(reference => ({ - repo, - file: path, - content: fileContent, - commitID: commit, - range: reference.range, - url: toPrettyBlobURL({ - filePath: path, - revision: commit, - repoName: repo, - commitID: commit, - position: { - line: reference.range.start.line + 1, - character: reference.range.start.character + 1, - }, - }), - lines: split(fileContent), - precise: false, - })) - } + if (view === null) { + return + } + const occurrences = view.state.facet(syntaxHighlight).occurrences + const { path, repo, position, fileContent: content, commit: commitID } = options + const lines = split(content) + const scipPosition = new ScipPosition(position.line, position.character) + for (const occurrence of occurrences) { + const symbol = occurrence.symbol + if (!(symbol?.startsWith('local ') && occurrence.range.contains(scipPosition))) { + continue } + return occurrences + .filter(reference => reference.symbol === symbol) + .map(reference => ({ + repo, + file: path, + content, + commitID, + range: reference.range, + url: toPrettyBlobURL({ + filePath: path, + revision: commitID, + repoName: repo, + commitID, + position: { + line: reference.range.start.line + 1, + character: reference.range.start.character + 1, + }, + }), + lines, + precise: false, + })) } - const filterReferences = (results: Location[]): Location[] => - filter ? results.filter(location => location.file.includes(filter)) : results + return +} +async function searchBasedReferencesViaSquirrel( + options: UseSearchBasedCodeIntelOptions +): Promise { + const { repo, position, path, commit, fileContent } = options + const symbol = await findSymbol({ repository: repo, path, commit, row: position.line, column: position.character }) + if (!symbol?.refs) { + return + } + // HISTORICAL NOTE: Squirrel only support find refs for locals + // (the code below uses the same 'path' value for all references, + // and is based as-is on the original code written by Chris), + // so we can delete this code once we have SCIP locals support + // for all the same languages that Squirrel does. + const lines = split(fileContent) + return symbol.refs.map(reference => ({ + repo, + file: path, + content: fileContent, + commitID: commit, + range: rangeToExtensionRange(reference), + url: toPrettyBlobURL({ + filePath: path, + revision: commit, + repoName: repo, + commitID: commit, + position: { + line: reference.row + 1, + character: reference.column + 1, + }, + }), + lines, + precise: false, + })) +} + +async function searchBasedDefinitionsViaSquirrel( + options: UseSearchBasedCodeIntelOptions +): Promise { + const { repo, commit, path, position, fileContent } = options const symbol = await findSymbol({ repository: repo, commit, path, row: position.line, column: position.character }) - if (symbol?.refs) { - return symbol.refs.map(reference => ({ + if (!symbol?.def) { + return + } + return [ + { repo, file: path, content: fileContent, commitID: commit, - range: rangeToExtensionRange(reference), + range: rangeToExtensionRange(symbol.def), url: toPrettyBlobURL({ filePath: path, revision: commit, repoName: repo, commitID: commit, position: { - line: reference.row + 1, - character: reference.column + 1, + line: symbol.def.row + 1, + character: symbol.def.column + 1, }, }), lines: split(fileContent), precise: false, - })) - } + }, + ] +} +async function searchBasedReferencesViaSearchQueries(options: UseSearchBasedCodeIntelOptions): Promise { + const { searchToken, path, repo, isFork, isArchived, commit, spec, filter } = options const queryTerms = referencesQuery({ searchToken, path, fileExts: spec.fileExts }) - const queryArguments = { - repo, - isFork, - isArchived, - commit, - queryTerms, - filterReferences, - } + const filterReferences = (results: Location[]): Location[] => + filter ? results.filter(location => location.file.includes(filter)) : results - const doSearch = (negateRepoFilter: boolean): Promise => + const doSearch = (repoFilter: RepoFilter): Promise => searchWithFallback( args => searchAndFilterReferences({ queryTerms: args.queryTerms, filterReferences }), - queryArguments, - negateRepoFilter, - getSetting + { repo, isFork, isArchived, commit, queryTerms, filterReferences }, + repoFilter, + options.getSetting ) - // Perform a search in the current git tree - const sameRepoReferences = doSearch(false) + const sameRepoReferences = doSearch('current-repo') // Perform an indexed search over all _other_ repositories. This // query is ineffective on DotCom as we do not keep repositories // in the index permanently. - const remoteRepoReferences = isSourcegraphDotCom() ? Promise.resolve([]) : doSearch(true) + const remoteRepoReferences = isSourcegraphDotCom() ? Promise.resolve([]) : doSearch('all-other-repos') - // Resolve then merge all references and sort them by proximity - // to the current text document path. - const referenceChunk = [sameRepoReferences, remoteRepoReferences] - const mergedReferences = flatten(await Promise.all(referenceChunk)) - return sortByProximity(mergedReferences, location.pathname) + return flatten(await Promise.all([sameRepoReferences, remoteRepoReferences])) } -export async function searchBasedDefinitions({ - repo, - isFork, - isArchived, - commit, - searchToken, - fileContent, - path, - position, - spec, - getSetting, - filter, -}: UseSearchBasedCodeIntelOptions): Promise { - const symbol = await findSymbol({ repository: repo, commit, path, row: position.line, column: position.character }) - if (symbol?.def) { - return [ - { - repo, - file: path, - content: fileContent, - commitID: commit, - range: rangeToExtensionRange(symbol.def), - url: toPrettyBlobURL({ - filePath: path, - revision: commit, - repoName: repo, - commitID: commit, - position: { - line: symbol.def.row + 1, - character: symbol.def.column + 1, - }, - }), - lines: split(fileContent), - precise: false, - }, - ] - } - +async function searchBasedDefinitionsViaSearchQueries(options: UseSearchBasedCodeIntelOptions): Promise { + const { searchToken, path, repo, isFork, fileContent, isArchived, commit, spec, filter } = options + // Construct base definition query without scoping terms + const queryTerms = definitionQuery({ searchToken, path, fileExts: spec.fileExts }) const filterDefinitions = (results: Location[]): Location[] => { const filteredByName = filter ? results.filter(location => location.file.includes(filter)) : results return spec?.filterDefinitions @@ -274,29 +265,15 @@ export async function searchBasedDefinitions({ : filteredByName } - // Construct base definition query without scoping terms - const queryTerms = definitionQuery({ searchToken, path, fileExts: spec.fileExts }) - const queryArguments = { - repo, - isFork, - isArchived, - commit, - path, - fileContent, - filterDefinitions, - queryTerms, - } - - const doSearch = (negateRepoFilter: boolean): Promise => + const doSearch = (repoFilter: RepoFilter): Promise => searchWithFallback( args => searchAndFilterDefinitions({ spec, path, filterDefinitions, queryTerms: args.queryTerms }), - queryArguments, - negateRepoFilter, - getSetting + { repo, isFork, isArchived, commit, path, fileContent, filterDefinitions, queryTerms }, + repoFilter, + options.getSetting ) - // Perform a search in the current git tree - const sameRepoDefinitions = doSearch(false) + const sameRepoDefinitions = doSearch('current-repo') // Return any local location definitions first const results = await sameRepoDefinitions @@ -308,7 +285,32 @@ export async function searchBasedDefinitions({ // an indexed search over all repositories. Do not do this on the DotCom // instance as we are unlikely to have indexed the relevant definition // and we'd end up jumping to what would seem like a random line of code. - return isSourcegraphDotCom() ? Promise.resolve([]) : doSearch(true) + return isSourcegraphDotCom() ? Promise.resolve([]) : doSearch('all-other-repos') +} + +// Originally based on the code from code-intel-extension +export async function searchBasedReferences(options: UseSearchBasedCodeIntelOptions): Promise { + const refsViaSCIPLocals = searchBasedReferencesViaSCIPLocals(options) + if (refsViaSCIPLocals) { + return refsViaSCIPLocals + } + + const refsViaSquirrel = await searchBasedReferencesViaSquirrel(options) + if (refsViaSquirrel) { + return refsViaSquirrel + } + + const refsViaSearchQueries = await searchBasedReferencesViaSearchQueries(options) + return sortByProximity(refsViaSearchQueries, options.path) +} + +export async function searchBasedDefinitions(options: UseSearchBasedCodeIntelOptions): Promise { + const defsViaSquirrel = await searchBasedDefinitionsViaSquirrel(options) + if (defsViaSquirrel) { + return defsViaSquirrel + } + + return searchBasedDefinitionsViaSearchQueries(options) } /** From d651f31f00f7c9e58abcf4854279ed7d52ff54dd Mon Sep 17 00:00:00 2001 From: Varun Gandhi Date: Thu, 14 Dec 2023 19:23:18 +0530 Subject: [PATCH 2/2] client: Avoid complex tokenization in ref panel code (#58954) Previously, we relied on detecting the language from file paths, then using various regexes associated with the language to identify token boundaries. However, the code mirror blob view always provides a full token range, which can be used directly, instead of attempting to recompute the token boundaries. For older URLs, we fallback to simple identifiers, which should work for the vast majority of languages and identifiers. We cannot yet remove the language detection here because the file extensions associated with the language are later used for search-based code navigation. This patch also makes the language spec optional for search-based code intel, as we do not have a solution to #56376 which would guarantee that we always have a language available. If a language is not available, search-based code intel falls back to searching other files with the same extension as a best effort guess. Locally tested for MATLAB code. The ref panel shows up correctly, unlike the error earlier. (cherry-picked from c42cad25f6e35af9c53c303279b59c59006c95b6) --- client/web/src/codeintel/ReferencesPanel.tsx | 87 +++--- client/web/src/codeintel/token.ts | 265 +++--------------- client/web/src/codeintel/useCodeIntel.ts | 2 +- .../src/enterprise/codeintel/searchBased.ts | 4 +- .../codeintel/useSearchBasedCodeIntel.ts | 35 ++- 5 files changed, 113 insertions(+), 280 deletions(-) diff --git a/client/web/src/codeintel/ReferencesPanel.tsx b/client/web/src/codeintel/ReferencesPanel.tsx index 8128c845a956..b257f9da66ea 100644 --- a/client/web/src/codeintel/ReferencesPanel.tsx +++ b/client/web/src/codeintel/ReferencesPanel.tsx @@ -9,7 +9,6 @@ import { type Observable, of } from 'rxjs' import { map } from 'rxjs/operators' import { CodeExcerpt } from '@sourcegraph/branded' -import type { HoveredToken } from '@sourcegraph/codeintellify' import { type ErrorLike, logger, pluralize } from '@sourcegraph/common' import { Position } from '@sourcegraph/extension-api-classes' import { useQuery } from '@sourcegraph/http-client' @@ -69,13 +68,12 @@ import { } from './location' import { FETCH_HIGHLIGHTED_BLOB } from './ReferencesPanelQueries' import { newSettingsGetter } from './settings' -import { findSearchToken } from './token' +import { findSearchToken, type ZeroBasedPosition } from './token' import { useRepoAndBlob } from './useRepoAndBlob' -import { isDefined } from './util/helpers' import styles from './ReferencesPanel.module.scss' -type Token = HoveredToken & RepoSpec & RevisionSpec & FileSpec & ResolvedRevisionSpec +type Token = { range: State['range'] } & RepoSpec & RevisionSpec & FileSpec & ResolvedRevisionSpec interface HighlightedFileLineRangesProps { fetchHighlightedFileLineRanges: (parameters: FetchFileParameters, force?: boolean) => Observable @@ -101,8 +99,10 @@ interface State { repoName: string revision?: string filePath: string - line: number - character: number + range: { + start: OneBasedPosition + end?: OneBasedPosition + } jumpToFirst: boolean collapsedState: { references: boolean @@ -112,9 +112,14 @@ interface State { } } +interface OneBasedPosition { + line: number + character: number +} + function createStateFromLocation(location: H.Location): null | State { const { hash, pathname, search } = location - const { line, character, viewState } = parseQueryAndHash(search, hash) + const { line, character, endLine, endCharacter, viewState } = parseQueryAndHash(search, hash) const { filePath, repoName, revision } = parseBrowserRepoURL(pathname) // If we don't have enough information in the URL, we can't render the panel @@ -142,7 +147,11 @@ function createStateFromLocation(location: H.Location): null | State { collapsedState.definitions = true } - return { repoName, revision, filePath, line, character, jumpToFirst, collapsedState } + const range = { + start: { line, character }, + end: endLine && endCharacter ? { line: endLine, character: endCharacter } : undefined, + } + return { repoName, revision, filePath, range, jumpToFirst, collapsedState } } export const ReferencesPanel: React.FunctionComponent> = props => { @@ -159,8 +168,7 @@ export const ReferencesPanel: React.FunctionComponent @@ -171,8 +179,7 @@ const RevisionResolvingReferencesList: React.FunctionComponent< React.PropsWithChildren< ReferencesPanelProps & { repoName: string - line: number - character: number + range: State['range'] filePath: string revision?: string collapsedState: State['collapsedState'] @@ -184,7 +191,7 @@ const RevisionResolvingReferencesList: React.FunctionComponent< // Scroll blob UI to the selected symbol right after the reference panel is rendered // and shifted the blob UI (scroll into view is needed because there are a few cases // when ref panel may overlap with current symbol) - useEffect(() => BlobAPI.scrollIntoView({ line: props.line }), [props.line]) + useEffect(() => BlobAPI.scrollIntoView({ line: props.range.start.line }), [props.range.start.line]) if (loading && !data) { return @@ -205,8 +212,7 @@ const RevisionResolvingReferencesList: React.FunctionComponent< const token = { repoName: props.repoName, - line: props.line, - character: props.character, + range: props.range, filePath: props.filePath, revision: data.revision, commitID: data.commitID, @@ -233,29 +239,28 @@ interface ReferencesPanelPropsWithToken extends ReferencesPanelProps { collapsedState: State['collapsedState'] } +function oneBasedPositionToZeroBased(p: OneBasedPosition): ZeroBasedPosition { + return { + line: p.line - 1, + character: p.character - 1, + } +} + const SearchTokenFindingReferencesList: React.FunctionComponent< React.PropsWithChildren > = props => { - const languageId = getModeFromPath(props.token.filePath) - const spec = findLanguageSpec(languageId) - const tokenResult = - spec && - findSearchToken({ - text: props.fileContent, - position: { - line: props.token.line - 1, - character: props.token.character - 1, - }, - lineRegexes: spec.commentStyles.map(style => style.lineRegex).filter(isDefined), - blockCommentStyles: spec.commentStyles.map(style => style.block).filter(isDefined), - identCharPattern: spec.identCharPattern, - }) + const tokenRange = props.token.range + const tokenResult = findSearchToken({ + text: props.fileContent, + start: oneBasedPositionToZeroBased(tokenRange.start), + end: tokenRange.end ? oneBasedPositionToZeroBased(tokenRange.end) : undefined, + }) const shouldMixPreciseAndSearchBasedReferences: boolean = newSettingsGetter(props.settingsCascade)( 'codeIntel.mixPreciseAndSearchBasedReferences', false ) - if (!spec || !tokenResult?.searchToken) { + if (tokenResult === undefined) { return (
Could not find token. @@ -269,12 +274,13 @@ const SearchTokenFindingReferencesList: React.FunctionComponent< // change. This way we avoid showing stale results. key={shouldMixPreciseAndSearchBasedReferences.toString()} {...props} - token={props.token} - searchToken={tokenResult?.searchToken} - spec={spec} - fileContent={props.fileContent} - isFork={props.isFork} - isArchived={props.isArchived} + searchToken={tokenResult} + // The file extensions attached to the 'spec' value here are + // used for search-based code intel. However, determining + // the spec purely based on the file path is wrong. + // + // See FIXME(id: language-detection). + spec={findLanguageSpec(getModeFromPath(props.token.filePath))} /> ) } @@ -285,7 +291,7 @@ const ReferencesList: React.FunctionComponent< React.PropsWithChildren< ReferencesPanelPropsWithToken & { searchToken: string - spec: LanguageSpec + spec: LanguageSpec | undefined fileContent: string collapsedState: State['collapsedState'] } @@ -322,8 +328,8 @@ const ReferencesList: React.FunctionComponent< path: props.token.filePath, // On the backend the line/character are 0-indexed, but what we // get from hoverifier is 1-indexed. - line: props.token.line - 1, - character: props.token.character - 1, + line: props.token.range.start.line - 1, + character: props.token.range.start.character - 1, filter: debouncedFilter || null, firstReferences: 100, afterReferences: null, @@ -1124,7 +1130,8 @@ const LoadingCodeIntelFailed: React.FunctionComponent= lines.length || + (args.end && (args.end.line !== args.start.line || args.end.character <= args.start.character)) + ) { return undefined } - const view = getBlobEditView() - if (view !== null) { - const occurrences = view.state.facet(syntaxHighlight).occurrences - for (const occurrence of occurrences) { - if ( - occurrence.range.isSingleLine() && - occurrence.range.contains(new Position(position.line, position.character)) - ) { - const text = line.slice(occurrence.range.start.character, occurrence.range.end.character) - return { - searchToken: text, - isString: occurrence.kind === SyntaxKind.StringLiteral, - isComment: occurrence.kind === SyntaxKind.Comment, - } - } - } + const line = lines[args.start.line] + + // In the common case, when 'Find references' is triggered through + // the blob view, the 'end' parameter will be provided. + if (args.end) { + return line.slice(args.start.character, args.end.character) } - // Scan from the current hover position to the right while the characters - // still match the identifier pattern. If no characters match the pattern - // then we default to the end of the line. + // For old URLs, have a fallback where we attempt to detect identifier + // boundaries in a best-effort fashion. let end = line.length - for (let index = position.character; index < line.length; index++) { - if (!identCharPattern.test(line[index])) { + const start = args.start.character + for (let index = start; index < line.length; index++) { + if (!DEFAULT_IDENT_CHAR_PATTERN.test(line[index])) { end = index break } } - // Scan from the current hover position to the left while the characters - // still match the identifier pattern. If no characters match the pattern - // then we default to the start of the line. - - let start = 0 - for (let index = position.character; index >= 0; index--) { - if (!identCharPattern.test(line[index])) { - start = index + 1 - break - } - } - - if (start >= end) { - return undefined - } - - return { - searchToken: line.slice(start, end), - isString: isInsideString({ line: lines[position.line], start, end }), - isComment: isInsideComment({ lines, position, start, end, lineRegexes, blockCommentStyles }), - } -} - -/** - * Determine if the identifier matched on the given line occurs within a string. - * - * @param args Parameter bag. - */ -function isInsideString({ - line, - start, - end, -}: { - /** The line containing the identifier */ - line: string - /** The offset of the identifier in the target line. */ - start: number - /** The offset and length of the identifier in the target line. */ - end: number -}): boolean { - return checkMatchIntersection([...line.matchAll(/'.*?'|".*?"/gs)], { start, end }) -} - -/** - * Determine if the identifier matched on the given line occurs within a comment - * defined by the given line comment and block comment regular expressions. - * - * @param args Parameter bag. - */ -function isInsideComment({ - lines, - position, - start, - end, - blockCommentStyles, - lineRegexes, -}: { - /** The text of the current document split into lines. */ - lines: string[] - /** The current hover position. */ - position: { line: number } - /** The offset of the identifier in the target line. */ - start: number - /** The offset and length of the identifier in the target line. */ - end: number - /** The patterns that identify line comments. */ - lineRegexes: RegExp[] - /** The patterns that identify block comments. */ - blockCommentStyles: BlockCommentStyle[] -}): boolean { - const line = lines[position.line] - - if ( - isInsideLineComment({ line, start, lineRegexes }) || - isInsideBlockComment({ lines, position, start, end, blockCommentStyles }) - ) { - const searchToken = lines[position.line].slice(start, end) - - const blessedPatterns = [ - // looks like a function call - new RegExp(`${searchToken}\\(`), - // looks like a field projection - new RegExp(`\\.${searchToken}`), - ] - - return !blessedPatterns.some(pattern => pattern.test(line)) - } - - return false -} - -/** - * Determine if the identifier matched on the given line occurs within a comment - * defined by the given line comment regular expressions. - * - * @param args Parameter bag. - */ -function isInsideLineComment({ - line, - start, - lineRegexes, -}: { - /** The line containing the identifier */ - line: string - /** The index where the identifier occurs on the line. */ - start: number - /** The patterns that identify line comments. */ - lineRegexes: RegExp[] -}): boolean { - // Determine if the token occurs after a comment on the same line - return lineRegexes.some(lineRegex => { - const match = line.match(lineRegex) - if (!match) { - return false - } - - return match?.index !== undefined && match.index < start - }) -} - -/** - * How many lines of context to capture on each side of a identifier when checking - * whether or not the user is within a comment. A value of 50 will search over 101 - * lines in total. - */ -const LINES_OF_CONTEXT = 50 - -/** - * Determine if the identifier matched on the given line occurs within a comment - * defined by the given line block comment style. - * - * @param args Parameter bag. - */ -function isInsideBlockComment({ - lines, - position, - start, - end, - blockCommentStyles, -}: { - /** The text of the current document split into lines. */ - lines: string[] - /** The current hover position. */ - position: { line: number } - /** The offset of the identifier in the target line. */ - start: number - /** The offset and length of the identifier in the target line. */ - end: number - /** The patterns that identify block comments. */ - blockCommentStyles: BlockCommentStyle[] -}): boolean { - const line = lines[position.line] - const linesBefore = lines.slice(Math.max(position.line - LINES_OF_CONTEXT, 0), position.line) - const linesAfter = lines.slice(position.line + 1, position.line + LINES_OF_CONTEXT + 1) - - // Search over multiple lines of text covering our identifier - const context = flatten([linesBefore, [line], linesAfter]).join('\n') - - // Determine how many characters in the context we skip before landing on our line - const offset = linesBefore.reduce((accumulator, line) => accumulator + line.length, 0) + linesBefore.length - - // Match all commented blocks in the given block of text. We know - // the range of the target identifier in this text: if it's covered - // in a match's range then it is nested inside of a comment. - return blockCommentStyles.some(block => - checkMatchIntersection( - [...context.matchAll(new RegExp(`${block.startRegex.source}.*?${block.endRegex.source}`, 'gs'))], - { start: start + offset, end: end + offset } - ) - ) -} - -/** - * Determine if any of the matches in the given array cover the given range. - */ -function checkMatchIntersection(matches: RegExpMatchArray[], range: { start: number; end: number }): boolean { - return matches.some( - match => match.index !== undefined && match.index <= range.start && match.index + match[0].length >= range.end - ) + return line.slice(start, end) } diff --git a/client/web/src/codeintel/useCodeIntel.ts b/client/web/src/codeintel/useCodeIntel.ts index 470fdae9dd41..8db96e08523c 100644 --- a/client/web/src/codeintel/useCodeIntel.ts +++ b/client/web/src/codeintel/useCodeIntel.ts @@ -50,7 +50,7 @@ export interface UseCodeIntelParameters { searchToken: string fileContent: string - spec: LanguageSpec + spec: LanguageSpec | undefined isFork: boolean isArchived: boolean diff --git a/client/web/src/enterprise/codeintel/searchBased.ts b/client/web/src/enterprise/codeintel/searchBased.ts index 83973f1f594a..ebf3b12c04ca 100644 --- a/client/web/src/enterprise/codeintel/searchBased.ts +++ b/client/web/src/enterprise/codeintel/searchBased.ts @@ -242,7 +242,7 @@ export function isSourcegraphDotCom(): boolean { * @param result The search result. */ export function isExternalPrivateSymbol( - spec: LanguageSpec, + spec: LanguageSpec | undefined, path: string, { fileLocal, file, symbolKind }: Result ): boolean { @@ -250,7 +250,7 @@ export function isExternalPrivateSymbol( // doesn't let us treat that way. // See https://github.com/universal-ctags/ctags/issues/1844 - if (spec.languageID === 'java' && symbolKind === 'ENUMMEMBER') { + if (spec && spec.languageID === 'java' && symbolKind === 'ENUMMEMBER') { return false } diff --git a/client/web/src/enterprise/codeintel/useSearchBasedCodeIntel.ts b/client/web/src/enterprise/codeintel/useSearchBasedCodeIntel.ts index 2e7c49b6c62c..49d7628b2e89 100644 --- a/client/web/src/enterprise/codeintel/useSearchBasedCodeIntel.ts +++ b/client/web/src/enterprise/codeintel/useSearchBasedCodeIntel.ts @@ -52,7 +52,7 @@ interface UseSearchBasedCodeIntelOptions { searchToken: string fileContent: string - spec: LanguageSpec + spec: LanguageSpec | undefined isFork: boolean isArchived: boolean @@ -228,7 +228,11 @@ async function searchBasedDefinitionsViaSquirrel( async function searchBasedReferencesViaSearchQueries(options: UseSearchBasedCodeIntelOptions): Promise { const { searchToken, path, repo, isFork, isArchived, commit, spec, filter } = options - const queryTerms = referencesQuery({ searchToken, path, fileExts: spec.fileExts }) + const queryTerms = referencesQuery({ + searchToken, + path, + fileExts: getFileExtensionsForSearchBasedQuery(spec, path), + }) const filterReferences = (results: Location[]): Location[] => filter ? results.filter(location => location.file.includes(filter)) : results @@ -250,10 +254,33 @@ async function searchBasedReferencesViaSearchQueries(options: UseSearchBasedCode return flatten(await Promise.all([sameRepoReferences, remoteRepoReferences])) } +function getFileExtension(filePath: string): string { + const lastDot = filePath.lastIndexOf('.') + if (lastDot === -1) { + const lastSlash = filePath.lastIndexOf('/') + if (lastSlash === -1) { + return filePath + } + return filePath.slice(lastSlash + 1) + } + return filePath.slice(lastDot + 1) +} + +function getFileExtensionsForSearchBasedQuery(spec: LanguageSpec | undefined, path: string): string[] { + if (spec !== undefined) { + return spec.fileExts + } + return [getFileExtension(path)] +} + async function searchBasedDefinitionsViaSearchQueries(options: UseSearchBasedCodeIntelOptions): Promise { const { searchToken, path, repo, isFork, fileContent, isArchived, commit, spec, filter } = options // Construct base definition query without scoping terms - const queryTerms = definitionQuery({ searchToken, path, fileExts: spec.fileExts }) + const queryTerms = definitionQuery({ + searchToken, + path, + fileExts: getFileExtensionsForSearchBasedQuery(spec, path), + }) const filterDefinitions = (results: Location[]): Location[] => { const filteredByName = filter ? results.filter(location => location.file.includes(filter)) : results return spec?.filterDefinitions @@ -328,7 +355,7 @@ async function searchAndFilterDefinitions({ queryTerms, }: { /** The LanguageSpec of the language in which we're searching */ - spec: LanguageSpec + spec: LanguageSpec | undefined /** The file we're in */ path: string /** The function used to filter definitions. */