diff --git a/client/shared/src/codeintel/legacy-extensions/language-specs/languages.ts b/client/shared/src/codeintel/legacy-extensions/language-specs/languages.ts index db06e5ef5e0a..93cf1b938ba6 100644 --- a/client/shared/src/codeintel/legacy-extensions/language-specs/languages.ts +++ b/client/shared/src/codeintel/legacy-extensions/language-specs/languages.ts @@ -377,6 +377,8 @@ const zigSpec: LanguageSpec = { * * The set of languages come from https://madnight.github.io/githut/#/pull_requests/2018/4 (with additions) * The language names come from https://code.visualstudio.com/docs/languages/identifiers#_known-language-identifiers. + * + * @deprecated See FIXME(id: language-detection) */ export const languageSpecs: LanguageSpec[] = [ apexSpec, @@ -425,6 +427,8 @@ export const languageSpecs: LanguageSpec[] = [ /** * Returns the language spec with the given language identifier. If no language * matches is configured with the given identifier an error is thrown. + * + * @deprecated See FIXME(id: language-detection) */ export function findLanguageSpec(languageID: string): LanguageSpec | undefined { return languageSpecs.find(spec => spec.languageID === languageID || spec.additionalLanguages?.includes(languageID)) diff --git a/client/shared/src/languages.ts b/client/shared/src/languages.ts index c9aee656b646..b40d9f8e585f 100644 --- a/client/shared/src/languages.ts +++ b/client/shared/src/languages.ts @@ -3,9 +3,22 @@ import { basename } from 'path' /** The LSP mode used for plain text files and all other unrecognized files. */ const PLAINTEXT_MODE = 'plaintext' +// FIXME(id: language-detection) +// +// For correctness, we should do language detection on the server +// and use the inferred language on the client instead of incorrectly +// doing language detection on the client using incomplete information +// like file extensions. +// +// For example, MATLAB and Objective-C both use the '.m' file extension. +// +// See https://github.com/sourcegraph/sourcegraph/issues/56376 + /** * getModeFromPath returns the LSP mode for the provided file path. If the file path does not correspond to any * known mode, 'plaintext' is returned. + * + * @deprecated See FIXME(id: language-detection). */ export function getModeFromPath(path: string): string { const fileName = basename(path) @@ -19,6 +32,8 @@ export function getModeFromPath(path: string): string { * provided file name (e.g. "dockerfile") * * Cherry picked from https://github.com/github/linguist/blob/master/lib/linguist/languages.yml + * + * @deprecated See FIXME(id: language-detection) */ function getModeFromExactFilename(fileName: string): string | undefined { switch (fileName.toLowerCase()) { @@ -43,6 +58,8 @@ function getModeFromExactFilename(fileName: string): string | undefined { * * Cherry picked from https://github.com/isagalaev/highlight.js/tree/master/src/languages * and https://github.com/github/linguist/blob/master/lib/linguist/languages.yml. + * + * @deprecated See FIXME(id: language-detection) */ function getModeFromExtension(extension: string): string | undefined { switch (extension.toLowerCase()) { diff --git a/client/web/src/codeintel/ReferencesPanel.tsx b/client/web/src/codeintel/ReferencesPanel.tsx index 19cc7bb8f820..e2b8343e7386 100644 --- a/client/web/src/codeintel/ReferencesPanel.tsx +++ b/client/web/src/codeintel/ReferencesPanel.tsx @@ -9,7 +9,6 @@ import { type Observable, of } from 'rxjs' import { map } from 'rxjs/operators' import { CodeExcerpt } from '@sourcegraph/branded' -import type { HoveredToken } from '@sourcegraph/codeintellify' import { type ErrorLike, logger, pluralize } from '@sourcegraph/common' import { Position } from '@sourcegraph/extension-api-classes' import type { FetchFileParameters } from '@sourcegraph/shared/src/backend/file' @@ -56,13 +55,12 @@ import type { CodeIntelligenceProps } from '.' import { type Location, LocationsGroup, type LocationsGroupedByRepo, type LocationsGroupedByFile } from './location' import { newSettingsGetter } from './settings' import { SideBlob, type SideBlobProps } from './SideBlob' -import { findSearchToken } from './token' +import { findSearchToken, type ZeroBasedPosition } from './token' import { useRepoAndBlob } from './useRepoAndBlob' -import { isDefined } from './util/helpers' import styles from './ReferencesPanel.module.scss' -type Token = HoveredToken & RepoSpec & RevisionSpec & FileSpec & ResolvedRevisionSpec +type Token = { range: State['range'] } & RepoSpec & RevisionSpec & FileSpec & ResolvedRevisionSpec interface HighlightedFileLineRangesProps { fetchHighlightedFileLineRanges: (parameters: FetchFileParameters, force?: boolean) => Observable @@ -88,8 +86,10 @@ interface State { repoName: string revision?: string filePath: string - line: number - character: number + range: { + start: OneBasedPosition + end?: OneBasedPosition + } jumpToFirst: boolean collapsedState: { references: boolean @@ -99,9 +99,14 @@ interface State { } } +interface OneBasedPosition { + line: number + character: number +} + function createStateFromLocation(location: H.Location): null | State { const { hash, pathname, search } = location - const { line, character, viewState } = parseQueryAndHash(search, hash) + const { line, character, endLine, endCharacter, viewState } = parseQueryAndHash(search, hash) const { filePath, repoName, revision } = parseBrowserRepoURL(pathname) // If we don't have enough information in the URL, we can't render the panel @@ -129,7 +134,11 @@ function createStateFromLocation(location: H.Location): null | State { collapsedState.definitions = true } - return { repoName, revision, filePath, line, character, jumpToFirst, collapsedState } + const range = { + start: { line, character }, + end: endLine && endCharacter ? { line: endLine, character: endCharacter } : undefined, + } + return { repoName, revision, filePath, range, jumpToFirst, collapsedState } } export const ReferencesPanel: React.FunctionComponent> = props => { @@ -146,8 +155,7 @@ export const ReferencesPanel: React.FunctionComponent @@ -158,8 +166,7 @@ const RevisionResolvingReferencesList: React.FunctionComponent< React.PropsWithChildren< ReferencesPanelProps & { repoName: string - line: number - character: number + range: State['range'] filePath: string revision?: string collapsedState: State['collapsedState'] @@ -171,7 +178,7 @@ const RevisionResolvingReferencesList: React.FunctionComponent< // Scroll blob UI to the selected symbol right after the reference panel is rendered // and shifted the blob UI (scroll into view is needed because there are a few cases // when ref panel may overlap with current symbol) - useEffect(() => BlobAPI.scrollIntoView({ line: props.line }), [props.line]) + useEffect(() => BlobAPI.scrollIntoView({ line: props.range.start.line }), [props.range.start.line]) if (loading && !data) { return @@ -192,8 +199,7 @@ const RevisionResolvingReferencesList: React.FunctionComponent< const token = { repoName: props.repoName, - line: props.line, - character: props.character, + range: props.range, filePath: props.filePath, revision: data.revision, commitID: data.commitID, @@ -220,29 +226,28 @@ interface ReferencesPanelPropsWithToken extends ReferencesPanelProps { collapsedState: State['collapsedState'] } +function oneBasedPositionToZeroBased(p: OneBasedPosition): ZeroBasedPosition { + return { + line: p.line - 1, + character: p.character - 1, + } +} + const SearchTokenFindingReferencesList: React.FunctionComponent< React.PropsWithChildren > = props => { - const languageId = getModeFromPath(props.token.filePath) - const spec = findLanguageSpec(languageId) - const tokenResult = - spec && - findSearchToken({ - text: props.fileContent, - position: { - line: props.token.line - 1, - character: props.token.character - 1, - }, - lineRegexes: spec.commentStyles.map(style => style.lineRegex).filter(isDefined), - blockCommentStyles: spec.commentStyles.map(style => style.block).filter(isDefined), - identCharPattern: spec.identCharPattern, - }) + const tokenRange = props.token.range + const tokenResult = findSearchToken({ + text: props.fileContent, + start: oneBasedPositionToZeroBased(tokenRange.start), + end: tokenRange.end ? oneBasedPositionToZeroBased(tokenRange.end) : undefined, + }) const shouldMixPreciseAndSearchBasedReferences: boolean = newSettingsGetter(props.settingsCascade)( 'codeIntel.mixPreciseAndSearchBasedReferences', false ) - if (!spec || !tokenResult?.searchToken) { + if (tokenResult === undefined) { return (
Could not find token. @@ -256,8 +261,13 @@ const SearchTokenFindingReferencesList: React.FunctionComponent< // change. This way we avoid showing stale results. key={shouldMixPreciseAndSearchBasedReferences.toString()} {...props} - searchToken={tokenResult?.searchToken} - spec={spec} + searchToken={tokenResult} + // The file extensions attached to the 'spec' value here are + // used for search-based code intel. However, determining + // the spec purely based on the file path is wrong. + // + // See FIXME(id: language-detection). + spec={findLanguageSpec(getModeFromPath(props.token.filePath))} /> ) } @@ -268,7 +278,7 @@ const ReferencesList: React.FunctionComponent< React.PropsWithChildren< ReferencesPanelPropsWithToken & { searchToken: string - spec: LanguageSpec + spec: LanguageSpec | undefined fileContent: string collapsedState: State['collapsedState'] } @@ -305,8 +315,8 @@ const ReferencesList: React.FunctionComponent< path: props.token.filePath, // On the backend the line/character are 0-indexed, but what we // get from hoverifier is 1-indexed. - line: props.token.line - 1, - character: props.token.character - 1, + line: props.token.range.start.line - 1, + character: props.token.range.start.character - 1, filter: debouncedFilter || null, firstReferences: 100, afterReferences: null, @@ -993,7 +1003,8 @@ const LoadingCodeIntelFailed: React.FunctionComponent= lines.length || + (args.end && (args.end.line !== args.start.line || args.end.character <= args.start.character)) + ) { return undefined } - const view = getBlobEditView() - if (view !== null) { - const occurrences = view.state.facet(syntaxHighlight).occurrences - for (const occurrence of occurrences) { - if ( - occurrence.range.isSingleLine() && - occurrence.range.contains(new Position(position.line, position.character)) - ) { - const text = line.slice(occurrence.range.start.character, occurrence.range.end.character) - return { - searchToken: text, - isString: occurrence.kind === SyntaxKind.StringLiteral, - isComment: occurrence.kind === SyntaxKind.Comment, - } - } - } + const line = lines[args.start.line] + + // In the common case, when 'Find references' is triggered through + // the blob view, the 'end' parameter will be provided. + if (args.end) { + return line.slice(args.start.character, args.end.character) } - // Scan from the current hover position to the right while the characters - // still match the identifier pattern. If no characters match the pattern - // then we default to the end of the line. + // For old URLs, have a fallback where we attempt to detect identifier + // boundaries in a best-effort fashion. let end = line.length - for (let index = position.character; index < line.length; index++) { - if (!identCharPattern.test(line[index])) { + const start = args.start.character + for (let index = start; index < line.length; index++) { + if (!DEFAULT_IDENT_CHAR_PATTERN.test(line[index])) { end = index break } } - // Scan from the current hover position to the left while the characters - // still match the identifier pattern. If no characters match the pattern - // then we default to the start of the line. - - let start = 0 - for (let index = position.character; index >= 0; index--) { - if (!identCharPattern.test(line[index])) { - start = index + 1 - break - } - } - - if (start >= end) { - return undefined - } - - return { - searchToken: line.slice(start, end), - isString: isInsideString({ line: lines[position.line], start, end }), - isComment: isInsideComment({ lines, position, start, end, lineRegexes, blockCommentStyles }), - } -} - -/** - * Determine if the identifier matched on the given line occurs within a string. - * - * @param args Parameter bag. - */ -function isInsideString({ - line, - start, - end, -}: { - /** The line containing the identifier */ - line: string - /** The offset of the identifier in the target line. */ - start: number - /** The offset and length of the identifier in the target line. */ - end: number -}): boolean { - return checkMatchIntersection([...line.matchAll(/'.*?'|".*?"/gs)], { start, end }) -} - -/** - * Determine if the identifier matched on the given line occurs within a comment - * defined by the given line comment and block comment regular expressions. - * - * @param args Parameter bag. - */ -function isInsideComment({ - lines, - position, - start, - end, - blockCommentStyles, - lineRegexes, -}: { - /** The text of the current document split into lines. */ - lines: string[] - /** The current hover position. */ - position: { line: number } - /** The offset of the identifier in the target line. */ - start: number - /** The offset and length of the identifier in the target line. */ - end: number - /** The patterns that identify line comments. */ - lineRegexes: RegExp[] - /** The patterns that identify block comments. */ - blockCommentStyles: BlockCommentStyle[] -}): boolean { - const line = lines[position.line] - - if ( - isInsideLineComment({ line, start, lineRegexes }) || - isInsideBlockComment({ lines, position, start, end, blockCommentStyles }) - ) { - const searchToken = lines[position.line].slice(start, end) - - const blessedPatterns = [ - // looks like a function call - new RegExp(`${searchToken}\\(`), - // looks like a field projection - new RegExp(`\\.${searchToken}`), - ] - - return !blessedPatterns.some(pattern => pattern.test(line)) - } - - return false -} - -/** - * Determine if the identifier matched on the given line occurs within a comment - * defined by the given line comment regular expressions. - * - * @param args Parameter bag. - */ -function isInsideLineComment({ - line, - start, - lineRegexes, -}: { - /** The line containing the identifier */ - line: string - /** The index where the identifier occurs on the line. */ - start: number - /** The patterns that identify line comments. */ - lineRegexes: RegExp[] -}): boolean { - // Determine if the token occurs after a comment on the same line - return lineRegexes.some(lineRegex => { - const match = line.match(lineRegex) - if (!match) { - return false - } - - return match?.index !== undefined && match.index < start - }) -} - -/** - * How many lines of context to capture on each side of a identifier when checking - * whether or not the user is within a comment. A value of 50 will search over 101 - * lines in total. - */ -const LINES_OF_CONTEXT = 50 - -/** - * Determine if the identifier matched on the given line occurs within a comment - * defined by the given line block comment style. - * - * @param args Parameter bag. - */ -function isInsideBlockComment({ - lines, - position, - start, - end, - blockCommentStyles, -}: { - /** The text of the current document split into lines. */ - lines: string[] - /** The current hover position. */ - position: { line: number } - /** The offset of the identifier in the target line. */ - start: number - /** The offset and length of the identifier in the target line. */ - end: number - /** The patterns that identify block comments. */ - blockCommentStyles: BlockCommentStyle[] -}): boolean { - const line = lines[position.line] - const linesBefore = lines.slice(Math.max(position.line - LINES_OF_CONTEXT, 0), position.line) - const linesAfter = lines.slice(position.line + 1, position.line + LINES_OF_CONTEXT + 1) - - // Search over multiple lines of text covering our identifier - const context = flatten([linesBefore, [line], linesAfter]).join('\n') - - // Determine how many characters in the context we skip before landing on our line - const offset = linesBefore.reduce((accumulator, line) => accumulator + line.length, 0) + linesBefore.length - - // Match all commented blocks in the given block of text. We know - // the range of the target identifier in this text: if it's covered - // in a match's range then it is nested inside of a comment. - return blockCommentStyles.some(block => - checkMatchIntersection( - [...context.matchAll(new RegExp(`${block.startRegex.source}.*?${block.endRegex.source}`, 'gs'))], - { start: start + offset, end: end + offset } - ) - ) -} - -/** - * Determine if any of the matches in the given array cover the given range. - */ -function checkMatchIntersection(matches: RegExpMatchArray[], range: { start: number; end: number }): boolean { - return matches.some( - match => match.index !== undefined && match.index <= range.start && match.index + match[0].length >= range.end - ) + return line.slice(start, end) } diff --git a/client/web/src/codeintel/useCodeIntel.ts b/client/web/src/codeintel/useCodeIntel.ts index f7c35bfff7af..0c632be35b02 100644 --- a/client/web/src/codeintel/useCodeIntel.ts +++ b/client/web/src/codeintel/useCodeIntel.ts @@ -50,7 +50,7 @@ export interface UseCodeIntelParameters { searchToken: string fileContent: string - spec: LanguageSpec + spec: LanguageSpec | undefined isFork: boolean isArchived: boolean diff --git a/client/web/src/enterprise/codeintel/searchBased.ts b/client/web/src/enterprise/codeintel/searchBased.ts index 070411ac3da4..ef062eea5584 100644 --- a/client/web/src/enterprise/codeintel/searchBased.ts +++ b/client/web/src/enterprise/codeintel/searchBased.ts @@ -242,7 +242,7 @@ export function isSourcegraphDotCom(): boolean { * @param result The search result. */ export function isExternalPrivateSymbol( - spec: LanguageSpec, + spec: LanguageSpec | undefined, path: string, { fileLocal, file, symbolKind }: Result ): boolean { @@ -250,7 +250,7 @@ export function isExternalPrivateSymbol( // doesn't let us treat that way. // See https://github.com/universal-ctags/ctags/issues/1844 - if (spec.languageID === 'java' && symbolKind === 'ENUMMEMBER') { + if (spec && spec.languageID === 'java' && symbolKind === 'ENUMMEMBER') { return false } diff --git a/client/web/src/enterprise/codeintel/useSearchBasedCodeIntel.ts b/client/web/src/enterprise/codeintel/useSearchBasedCodeIntel.ts index 25ac58046499..5a94979f88a7 100644 --- a/client/web/src/enterprise/codeintel/useSearchBasedCodeIntel.ts +++ b/client/web/src/enterprise/codeintel/useSearchBasedCodeIntel.ts @@ -51,7 +51,7 @@ interface UseSearchBasedCodeIntelOptions { searchToken: string fileContent: string - spec: LanguageSpec + spec: LanguageSpec | undefined isFork: boolean isArchived: boolean @@ -227,7 +227,11 @@ async function searchBasedDefinitionsViaSquirrel( async function searchBasedReferencesViaSearchQueries(options: UseSearchBasedCodeIntelOptions): Promise { const { searchToken, path, repo, isFork, isArchived, commit, spec, filter } = options - const queryTerms = referencesQuery({ searchToken, path, fileExts: spec.fileExts }) + const queryTerms = referencesQuery({ + searchToken, + path, + fileExts: getFileExtensionsForSearchBasedQuery(spec, path), + }) const filterReferences = (results: Location[]): Location[] => filter ? results.filter(location => location.file.includes(filter)) : results @@ -249,10 +253,33 @@ async function searchBasedReferencesViaSearchQueries(options: UseSearchBasedCode return flatten(await Promise.all([sameRepoReferences, remoteRepoReferences])) } +function getFileExtension(filePath: string): string { + const lastDot = filePath.lastIndexOf('.') + if (lastDot === -1) { + const lastSlash = filePath.lastIndexOf('/') + if (lastSlash === -1) { + return filePath + } + return filePath.slice(lastSlash + 1) + } + return filePath.slice(lastDot + 1) +} + +function getFileExtensionsForSearchBasedQuery(spec: LanguageSpec | undefined, path: string): string[] { + if (spec !== undefined) { + return spec.fileExts + } + return [getFileExtension(path)] +} + async function searchBasedDefinitionsViaSearchQueries(options: UseSearchBasedCodeIntelOptions): Promise { const { searchToken, path, repo, isFork, fileContent, isArchived, commit, spec, filter } = options // Construct base definition query without scoping terms - const queryTerms = definitionQuery({ searchToken, path, fileExts: spec.fileExts }) + const queryTerms = definitionQuery({ + searchToken, + path, + fileExts: getFileExtensionsForSearchBasedQuery(spec, path), + }) const filterDefinitions = (results: Location[]): Location[] => { const filteredByName = filter ? results.filter(location => location.file.includes(filter)) : results return spec?.filterDefinitions @@ -326,7 +353,7 @@ async function searchAndFilterDefinitions({ queryTerms, }: { /** The LanguageSpec of the language in which we're searching */ - spec: LanguageSpec + spec: LanguageSpec | undefined /** The file we're in */ path: string /** The function used to filter definitions. */