Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Autocomplete: Improve jaccard similiarty retriever #2662

Merged
merged 12 commits into from
Jan 11, 2024
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import dedent from 'dedent'
import { describe, expect, it } from 'vitest'

import { bestJaccardMatch, getWords } from './bestJaccardMatch'
import { bestJaccardMatches, getWordOccurrences } from './bestJaccardMatch'

const targetSnippet = `
import { bestJaccardMatch, getWords } from './context'
Expand All @@ -25,30 +26,18 @@ describe('getWords', () => {
})
`

const matchSnippet = `
describe('bestJaccardMatch', () => {
it('should return the best match', () => {
const matchText = [
'foo',
'bar',
'baz',
'qux',
'quux',
].join('\n')
})
})
`
const MAX_MATCHES = 50

describe('getWords', () => {
it('works with regular text', () => {
expect(getWords('foo bar baz')).toEqual(
expect(getWordOccurrences('foo bar baz')).toEqual(
new Map<string, number>([
['foo', 1],
['bar', 1],
['baz', 1],
])
)
expect(getWords('running rocks slipped over')).toEqual(
expect(getWordOccurrences('running rocks slipped over')).toEqual(
new Map<string, number>([
['run', 1],
['rock', 1],
Expand All @@ -58,7 +47,7 @@ describe('getWords', () => {
})

it('works with code snippets', () => {
expect(getWords(targetSnippet)).toEqual(
expect(getWordOccurrences(targetSnippet)).toEqual(
new Map<string, number>([
['import', 1],
['bestjaccardmatch', 1],
Expand Down Expand Up @@ -87,7 +76,7 @@ describe('getWords', () => {
})

describe('bestJaccardMatch', () => {
it('should return the best match in 5 line windows', () => {
it('should return the best match', () => {
const matchText = [
'foo',
'bar',
Expand All @@ -104,36 +93,142 @@ describe('bestJaccardMatch', () => {
'xyzzy',
'thud',
].join('\n')
expect(bestJaccardMatch('foo\nbar\nbaz', matchText, 3)).toEqual({
expect(bestJaccardMatches('foo\nbar\nbaz', matchText, 3, MAX_MATCHES)[0]).toEqual({
score: 1,
content: 'foo\nbar\nbaz',
endLine: 3,
startLine: 0,
})
expect(bestJaccardMatch('bar\nquux', matchText, 4)).toEqual({
expect(bestJaccardMatches('bar\nquux', matchText, 4, MAX_MATCHES)[0]).toEqual({
score: 0.5,
content: 'bar\nbaz\nqux\nquux',
endLine: 5,
startLine: 1,
})
expect(
bestJaccardMatch(
bestJaccardMatches(
['grault', 'notexist', 'garply', 'notexist', 'waldo', 'notexist', 'notexist'].join('\n'),
matchText,
6
)
6,
MAX_MATCHES
)[0]
).toEqual({
score: 0.3,
content: ['corge', 'grault', 'garply', 'waldo', 'fred', 'plugh'].join('\n'),
startLine: 4,
endLine: 10,
content: ['quux', 'quuz', 'corge', 'grault', 'garply', 'waldo'].join('\n'),
})
})

it('returns more than one match', () => {
const matchText = dedent`
foo
bar
baz
qux
foo
quuz
corge
grault
garply
waldo
fred
plugh
xyzzy
thud`

const matches = bestJaccardMatches('foo\nbar\nbaz', matchText, 3, MAX_MATCHES)

// Since we slide over the target text line-by-line, we expect matchText.lines - 2 windows
// to be returned
expect(matches).toHaveLength(matchText.split('\n').length - 2)
expect(matches.map(match => match.content.split('\n'))).toEqual([
['foo', 'bar', 'baz'],
['bar', 'baz', 'qux'],
['baz', 'qux', 'foo'],
['qux', 'foo', 'quuz'],
['foo', 'quuz', 'corge'],
['quuz', 'corge', 'grault'],
['corge', 'grault', 'garply'],
['grault', 'garply', 'waldo'],
['garply', 'waldo', 'fred'],
['waldo', 'fred', 'plugh'],
['fred', 'plugh', 'xyzzy'],
['plugh', 'xyzzy', 'thud'],
])
})

it('works with code snippets', () => {
expect(bestJaccardMatch(targetSnippet, matchSnippet, 5)).toMatchInlineSnapshot(`
{
"content": "describe('bestJaccardMatch', () => {
it('should return the best match', () => {
const matchText = [
'foo',
'bar',",
"score": 0.08695652173913043,
}
expect(
bestJaccardMatches(
targetSnippet,
dedent`
describe('bestJaccardMatch', () => {
it('should return the best match', () => {
const matchText = [
'foo',
'bar',
'baz',
'qux',
'quux',
].join('\n')
})
})
`,
5,
MAX_MATCHES
)[0]
).toMatchInlineSnapshot(`
{
"content": "describe('bestJaccardMatch', () => {
it('should return the best match', () => {
const matchText = [
'foo',
'bar',",
"endLine": 5,
"score": 0.08695652173913043,
"startLine": 0,
}
`)
})

it('skips over windows with empty start lines', () => {
const matches = bestJaccardMatches(
'foo',
dedent`
// foo
// unrelated 1
// unrelated 2


// foo
// unrelated 3
// unrelated 4
`,
3,
MAX_MATCHES
)

expect(matches[0].content).toBe('// foo\n// unrelated 1\n// unrelated 2')
expect(matches[1].content).toBe('// foo\n// unrelated 3\n// unrelated 4')
})

it("does not skips over windows with empty start lines if we're at the en", () => {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
it("does not skips over windows with empty start lines if we're at the en", () => {
it("does not skip over windows with empty start lines if we're at the end", () => {

const matches = bestJaccardMatches(
targetSnippet,
dedent`
// foo
// unrelated
// unrelated


// foo
`,
3,
MAX_MATCHES
)

expect(matches[0].content).toBe('\n\n// foo')
expect(matches[1].content).toBe('// foo\n// unrelated\n// unrelated')
})
})
Loading
Loading