Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Autocomplete: Improve jaccard similiarty retriever #2662

Merged
merged 12 commits into from
Jan 11, 2024
3 changes: 3 additions & 0 deletions lib/shared/src/experimentation/FeatureFlagProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ export enum FeatureFlag {
// Enables the bfg-mixed context retriever that will combine BFG with the default local editor
// context.
CodyAutocompleteContextBfgMixed = 'cody-autocomplete-context-bfg-mixed',
// Enables the new-jaccard-similarity context strategy that can find more than one match per
// open file and includes matches from the same file.
CodyAutocompleteContextNewJaccardSimilarity = 'cody-autocomplete-new-jaccard-similarity',
// Enable latency adjustments based on accept/reject streaks
CodyAutocompleteUserLatency = 'cody-autocomplete-user-latency',
// Dynamically decide wether to show a single line or multiple lines for completions.
Expand Down
1 change: 1 addition & 0 deletions vscode/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ This is a log of all notable changes to Cody for VS Code. [Unreleased] changes a

- Edit: Added support for user-provided context. Use "@" to include files and "@#" to include specific symbols. [pull/2574](https://github.com/sourcegraph/cody/pull/2574)
- Autocomplete: Experimental support for inline completions with Code Llama via [Ollama](https://ollama.ai/) running locally. [pull/2635](https://github.com/sourcegraph/cody/pull/2635)
- Autocomplete: Improves the jaccard similarity retriever to find better matches. [pull/2662](https://github.com/sourcegraph/cody/pull/2662)

### Fixed

Expand Down
19 changes: 16 additions & 3 deletions vscode/src/completions/context/context-strategy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,17 @@ import { type ContextRetriever } from '../types'
import { type BfgRetriever } from './retrievers/bfg/bfg-retriever'
import { JaccardSimilarityRetriever } from './retrievers/jaccard-similarity/jaccard-similarity-retriever'
import { LspLightRetriever } from './retrievers/lsp-light/lsp-light-retriever'
import { JaccardSimilarityRetriever as NewJaccardSimilarityRetriever } from './retrievers/new-jaccard-similarity/jaccard-similarity-retriever'
import { SectionHistoryRetriever } from './retrievers/section-history/section-history-retriever'

export type ContextStrategy = 'lsp-light' | 'bfg' | 'jaccard-similarity' | 'bfg-mixed' | 'local-mixed' | 'none'
export type ContextStrategy =
| 'lsp-light'
| 'bfg'
| 'jaccard-similarity'
| 'new-jaccard-similarity'
| 'bfg-mixed'
| 'local-mixed'
| 'none'

export interface ContextStrategyFactory extends vscode.Disposable {
getStrategy(document: vscode.TextDocument): { name: ContextStrategy; retrievers: ContextRetriever[] }
Expand Down Expand Up @@ -46,6 +54,10 @@ export class DefaultContextStrategyFactory implements ContextStrategyFactory {
this.localRetriever = new JaccardSimilarityRetriever()
this.disposables.push(this.localRetriever)
break
case 'new-jaccard-similarity':
this.localRetriever = new NewJaccardSimilarityRetriever()
this.disposables.push(this.localRetriever)
break
case 'local-mixed':
this.localRetriever = new JaccardSimilarityRetriever()
// Filling the graphRetriever field with another local retriever but that's alright
Expand Down Expand Up @@ -103,8 +115,9 @@ export class DefaultContextStrategyFactory implements ContextStrategyFactory {
}
break

// The jaccard similarity strategy only uses the local retriever
case 'jaccard-similarity': {
// The jaccard similarity strategies only uses the local retriever
case 'jaccard-similarity':
case 'new-jaccard-similarity': {
if (this.localRetriever) {
retrievers.push(this.localRetriever)
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
import dedent from 'dedent'
import { describe, expect, it } from 'vitest'

import { bestJaccardMatches, getWordOccurrences } from './bestJaccardMatch'

const targetSnippet = `
import { bestJaccardMatch, getWords } from './context'

describe('getWords', () => {
it('works with regular text', () => {
expect(getWords('foo bar baz')).toEqual(
new Map<string, number>([
['foo', 1],
['bar', 1],
['baz', 1],
])
)
expect(getWords('running rocks slipped over')).toEqual(
new Map<string, number>([
['run', 1],
['rock', 1],
['slip', 1],
])
)
})
})
`

const MAX_MATCHES = 50

describe('getWords', () => {
it('works with regular text', () => {
expect(getWordOccurrences('foo bar baz')).toEqual(
new Map<string, number>([
['foo', 1],
['bar', 1],
['baz', 1],
])
)
expect(getWordOccurrences('running rocks slipped over')).toEqual(
new Map<string, number>([
['run', 1],
['rock', 1],
['slip', 1],
])
)
})

it('works with code snippets', () => {
expect(getWordOccurrences(targetSnippet)).toEqual(
new Map<string, number>([
['import', 1],
['bestjaccardmatch', 1],
['getword', 4],
['context', 1],
['describ', 1],
['work', 1],
['regular', 1],
['text', 1],
['expect', 2],
['foo', 2],
['bar', 2],
['baz', 2],
['toequal', 2],
['new', 2],
['map', 2],
['string', 2],
['number', 2],
['1', 6],
['run', 2],
['rock', 2],
['slip', 2],
])
)
})
})

describe('bestJaccardMatch', () => {
it('should return the best match', () => {
const matchText = dedent`
foo
bar
baz
qux
quux
quuz
corge
grault
garply
waldo
fred
plugh
xyzzy
thud
`
expect(bestJaccardMatches('foo\nbar\nbaz', matchText, 3, MAX_MATCHES)[0]).toEqual({
score: 1,
content: 'foo\nbar\nbaz',
endLine: 2,
startLine: 0,
})
expect(bestJaccardMatches('bar\nquux', matchText, 4, MAX_MATCHES)[0]).toEqual({
score: 0.5,
content: 'bar\nbaz\nqux\nquux',
endLine: 4,
startLine: 1,
})
expect(
bestJaccardMatches(
['grault', 'notexist', 'garply', 'notexist', 'waldo', 'notexist', 'notexist'].join('\n'),
matchText,
6,
MAX_MATCHES
)[0]
).toEqual({
score: 0.3,
startLine: 4,
endLine: 9,
content: ['quux', 'quuz', 'corge', 'grault', 'garply', 'waldo'].join('\n'),
})
})

it('returns more than one match', () => {
const matchText = dedent`
foo
bar
baz
qux
foo
quuz
corge
grault
garply
waldo
fred
plugh
xyzzy
thud`

const matches = bestJaccardMatches('foo\nbar\nbaz', matchText, 3, MAX_MATCHES)

// Since we slide over the target text line-by-line, we expect matchText.lines - 2 windows
// to be returned
expect(matches).toHaveLength(matchText.split('\n').length - 2)
expect(matches.map(match => match.content.split('\n'))).toEqual([
['foo', 'bar', 'baz'],
['bar', 'baz', 'qux'],
['baz', 'qux', 'foo'],
['qux', 'foo', 'quuz'],
['foo', 'quuz', 'corge'],
['quuz', 'corge', 'grault'],
['corge', 'grault', 'garply'],
['grault', 'garply', 'waldo'],
['garply', 'waldo', 'fred'],
['waldo', 'fred', 'plugh'],
['fred', 'plugh', 'xyzzy'],
['plugh', 'xyzzy', 'thud'],
])
})

it('works with code snippets', () => {
expect(
bestJaccardMatches(
targetSnippet,
dedent`
describe('bestJaccardMatch', () => {
it('should return the best match', () => {
const matchText = [
'foo',
'bar',
'baz',
'qux',
'quux',
].join('\n')
})
})
`,
5,
MAX_MATCHES
)[0]
).toMatchInlineSnapshot(`
{
"content": "describe('bestJaccardMatch', () => {
it('should return the best match', () => {
const matchText = [
'foo',
'bar',",
"endLine": 4,
"score": 0.08695652173913043,
"startLine": 0,
}
`)
})

it('works for input texts that are shorter than the window size', () => {
expect(bestJaccardMatches('foo', 'foo', 10, MAX_MATCHES)[0]).toEqual({
content: 'foo',
endLine: 0,
score: 1,
startLine: 0,
})
})

it('skips over windows with empty start lines', () => {
const matches = bestJaccardMatches(
'foo',
dedent`
// foo
// unrelated 1
// unrelated 2


// foo
// unrelated 3
// unrelated 4
`,
3,
MAX_MATCHES
)

expect(matches[0].content).toBe('// foo\n// unrelated 1\n// unrelated 2')
expect(matches[1].content).toBe('// foo\n// unrelated 3\n// unrelated 4')
})

it("does not skips over windows with empty start lines if we're at the en", () => {
const matches = bestJaccardMatches(
targetSnippet,
dedent`
// foo
// unrelated
// unrelated


// foo
`,
3,
MAX_MATCHES
)

expect(matches[0].content).toBe('\n\n// foo')
expect(matches[1].content).toBe('// foo\n// unrelated\n// unrelated')
})
})
Loading
Loading