Skip to content

Commit

Permalink
Autocomplete: Various latency related tweaks and new eager cancellati…
Browse files Browse the repository at this point in the history
…on experiment (#3096)

A few small tweaks from my learnings of looking at some traces:

1. Fixes a bug where the debounce time was increased for non-local
models
2. Sets the same debounce time for single-line and multi-line
3. Remove some config evaluations off the critical path. Those are
heavily cached but it would still cause the very first completion to be
slower
4. Add a new `eager cancellation` experiment that will cancel requests
as soon as a new request is created and reduces the debounce time
significantly to try and counter the latency regression

## Test plan

- For the new experiment, I added a abort handler in the fireworks
client and ensured it was heavily hit
- For the rest, just made sure completions still work. The changes are
trivial.
  • Loading branch information
philipp-spiess authored Feb 9, 2024
1 parent 28adbab commit 703afda
Show file tree
Hide file tree
Showing 6 changed files with 45 additions and 15 deletions.
3 changes: 3 additions & 0 deletions lib/shared/src/experimentation/FeatureFlagProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ export enum FeatureFlag {
CodyAutocompleteUserLatency = 'cody-autocomplete-user-latency',
// Dynamically decide wether to show a single line or multiple lines for completions.
CodyAutocompleteDynamicMultilineCompletions = 'cody-autocomplete-dynamic-multiline-completions',
// Completion requests will be cancelled as soon as a new request comes in and the debounce time
// will be reduced to try and counter the latency impact.
CodyAutocompleteEagerCancellation = 'cody-autocomplete-eager-cancellation',
// Continue generations after a single-line completion and use the response to see the next line
// if the first completion is accepted.
CodyAutocompleteHotStreak = 'cody-autocomplete-hot-streak',
Expand Down
1 change: 1 addition & 0 deletions vscode/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ This is a log of all notable changes to Cody for VS Code. [Unreleased] changes a
### Changed

- Custom Command: The `description` field is now optional and will default to use the command prompt. [pull/3025](https://github.com/sourcegraph/cody/pull/3025)
- Autocomplete: Move some work off the critical path in an attempt to further reduce latency. [pull/3096](https://github.com/sourcegraph/cody/pull/3096)

## [1.4.0]

Expand Down
2 changes: 2 additions & 0 deletions vscode/src/completions/completion-provider-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ class CompletionProviderConfig {
FeatureFlag.CodyAutocompleteHotStreak,
FeatureFlag.CodyAutocompleteSingleMultilineRequest,
FeatureFlag.CodyAutocompleteFastPath,
FeatureFlag.CodyAutocompleteUserLatency,
FeatureFlag.CodyAutocompleteEagerCancellation,
] as const

private get config() {
Expand Down
22 changes: 13 additions & 9 deletions vscode/src/completions/inline-completion-item-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ import * as vscode from 'vscode'
import {
ConfigFeaturesSingleton,
FeatureFlag,
featureFlagProvider,
isCodyIgnoredFile,
RateLimitError,
wrapInActiveSpan,
Expand Down Expand Up @@ -173,6 +172,10 @@ export class InlineCompletionItemProvider
}
)
)

// Warm caches for the config feature configuration to avoid the first completion call
// having to block on this.
void ConfigFeaturesSingleton.getInstance().getConfigFeatures()
}

/** Set the tracer (or unset it with `null`). */
Expand Down Expand Up @@ -222,11 +225,6 @@ export class InlineCompletionItemProvider
this.lastCompletionRequestTimestamp = start
}

// We start feature flag requests early so that we have a high chance of getting a response
// before we need it.
const userLatencyPromise = featureFlagProvider.evaluateFeatureFlag(
FeatureFlag.CodyAutocompleteUserLatency
)
const tracer = this.config.tracer ? createTracerForInvocation(this.config.tracer) : undefined

let stopLoading: (() => void) | undefined
Expand Down Expand Up @@ -304,7 +302,9 @@ export class InlineCompletionItemProvider
}

const latencyFeatureFlags: LatencyFeatureFlags = {
user: await userLatencyPromise,
user: completionProviderConfig.getPrefetchedFlag(
FeatureFlag.CodyAutocompleteUserLatency
),
}

const artificialDelay = getArtificialDelay(
Expand All @@ -315,6 +315,10 @@ export class InlineCompletionItemProvider
)

const isLocalProvider = isLocalCompletionsProvider(this.config.providerConfig.identifier)
const isEagerCancellationEnabled = completionProviderConfig.getPrefetchedFlag(
FeatureFlag.CodyAutocompleteEagerCancellation
)
const debounceInterval = isLocalProvider ? 125 : isEagerCancellationEnabled ? 10 : 75

try {
const result = await this.getInlineCompletions({
Expand All @@ -328,8 +332,8 @@ export class InlineCompletionItemProvider
requestManager: this.requestManager,
lastCandidate: this.lastCandidate,
debounceInterval: {
singleLine: isLocalProvider ? 75 : 125,
multiLine: 125,
singleLine: debounceInterval,
multiLine: debounceInterval,
},
setIsLoading,
abortSignal: abortController.signal,
Expand Down
4 changes: 3 additions & 1 deletion vscode/src/completions/request-manager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
import { documentAndPosition, nextTick } from './test-helpers'
import { STOP_REASON_HOT_STREAK } from './providers/hot-streak'
import type { InlineCompletionItemWithAnalytics } from './text-processing/process-inline-completions'
import { initCompletionProviderConfig } from './get-inline-completions-tests/helpers'

class MockProvider extends Provider {
public didFinishNetworkRequest = false
Expand Down Expand Up @@ -98,7 +99,8 @@ describe('RequestManager', () => {
provider: Provider,
suffix?: string
) => Promise<RequestManagerResult>
beforeEach(() => {
beforeEach(async () => {
await initCompletionProviderConfig({})
const requestManager = new RequestManager()

createRequest = (prefix: string, provider: Provider, suffix?: string) =>
Expand Down
28 changes: 23 additions & 5 deletions vscode/src/completions/request-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { partition } from 'lodash'
import { LRUCache } from 'lru-cache'
import type * as vscode from 'vscode'

import { isDefined, wrapInActiveSpan } from '@sourcegraph/cody-shared'
import { FeatureFlag, isDefined, wrapInActiveSpan } from '@sourcegraph/cody-shared'

import { addAutocompleteDebugEvent } from '../services/open-telemetry/debug-utils'

Expand All @@ -23,6 +23,8 @@ import type { ContextSnippet } from './types'
import { lines, removeIndentation } from './text-processing'
import { logDebug } from '../log'
import { isLocalCompletionsProvider } from './providers/experimental-ollama'
import { completionProviderConfig } from './completion-provider-config'
import { forkSignal } from './utils'

export interface RequestParams {
/** The request's document */
Expand Down Expand Up @@ -72,6 +74,9 @@ export class RequestManager {
private latestRequestParams: null | RequestsManagerParams = null

public async request(params: RequestsManagerParams): Promise<RequestManagerResult> {
const eagerCancellation = completionProviderConfig.getPrefetchedFlag(
FeatureFlag.CodyAutocompleteEagerCancellation
)
this.latestRequestParams = params

const { requestParams, provider, context, isCacheEnabled, tracer } = params
Expand All @@ -89,7 +94,10 @@ export class RequestManager {
// When request recycling is enabled, we do not pass the original abort signal forward as to
// not interrupt requests that are no longer relevant. Instead, we let all previous requests
// complete and try to see if their results can be reused for other inflight requests.
const abortController: AbortController = new AbortController()
const abortController: AbortController =
eagerCancellation && params.requestParams.abortSignal
? forkSignal(params.requestParams.abortSignal)
: new AbortController()

const request = new InflightRequest(requestParams, abortController)
this.inflightRequests.add(request)
Expand Down Expand Up @@ -135,7 +143,13 @@ export class RequestManager {
})

request.lastCompletions = processedCompletions
this.testIfResultCanBeRecycledForInflightRequests(request, processedCompletions)

if (!eagerCancellation) {
this.testIfResultCanBeRecycledForInflightRequests(
request,
processedCompletions
)
}
}

// Save hot streak completions for later use.
Expand All @@ -154,7 +168,9 @@ export class RequestManager {
)
}

this.cancelIrrelevantRequests()
if (!eagerCancellation) {
this.cancelIrrelevantRequests()
}
}
} catch (error) {
request.reject(error as Error)
Expand All @@ -163,7 +179,9 @@ export class RequestManager {
}
}

this.cancelIrrelevantRequests()
if (!eagerCancellation) {
this.cancelIrrelevantRequests()
}

void wrapInActiveSpan('autocomplete.generate', generateCompletions)
return request.promise
Expand Down

0 comments on commit 703afda

Please sign in to comment.