Autocomplete: Various latency related tweaks and new eager cancellati…

…on experiment (#3096) A few small tweaks from my learnings of looking at some traces: 1. Fixes a bug where the debounce time was increased for non-local models 2. Sets the same debounce time for single-line and multi-line 3. Remove some config evaluations off the critical path. Those are heavily cached but it would still cause the very first completion to be slower 4. Add a new `eager cancellation` experiment that will cancel requests as soon as a new request is created and reduces the debounce time significantly to try and counter the latency regression ## Test plan - For the new experiment, I added a abort handler in the fireworks client and ensured it was heavily hit - For the rest, just made sure completions still work. The changes are trivial.
sourcegraph · Feb 9, 2024 · 703afda · 703afda
1 parent 28adbab
commit 703afda
Show file tree

Hide file tree

Showing 6 changed files with 45 additions and 15 deletions.
diff --git a/lib/shared/src/experimentation/FeatureFlagProvider.ts b/lib/shared/src/experimentation/FeatureFlagProvider.ts
@@ -25,6 +25,9 @@ export enum FeatureFlag {
     CodyAutocompleteUserLatency = 'cody-autocomplete-user-latency',
     // Dynamically decide wether to show a single line or multiple lines for completions.
     CodyAutocompleteDynamicMultilineCompletions = 'cody-autocomplete-dynamic-multiline-completions',
+    // Completion requests will be cancelled as soon as a new request comes in and the debounce time
+    // will be reduced to try and counter the latency impact.
+    CodyAutocompleteEagerCancellation = 'cody-autocomplete-eager-cancellation',
     // Continue generations after a single-line completion and use the response to see the next line
     // if the first completion is accepted.
     CodyAutocompleteHotStreak = 'cody-autocomplete-hot-streak',

diff --git a/vscode/CHANGELOG.md b/vscode/CHANGELOG.md
@@ -15,6 +15,7 @@ This is a log of all notable changes to Cody for VS Code. [Unreleased] changes a
 ### Changed
 
 - Custom Command: The `description` field is now optional and will default to use the command prompt. [pull/3025](https://github.com/sourcegraph/cody/pull/3025)
+- Autocomplete: Move some work off the critical path in an attempt to further reduce latency. [pull/3096](https://github.com/sourcegraph/cody/pull/3096)
 
 ## [1.4.0]
 

diff --git a/vscode/src/completions/completion-provider-config.ts b/vscode/src/completions/completion-provider-config.ts
@@ -16,6 +16,8 @@ class CompletionProviderConfig {
         FeatureFlag.CodyAutocompleteHotStreak,
         FeatureFlag.CodyAutocompleteSingleMultilineRequest,
         FeatureFlag.CodyAutocompleteFastPath,
+        FeatureFlag.CodyAutocompleteUserLatency,
+        FeatureFlag.CodyAutocompleteEagerCancellation,
     ] as const
 
     private get config() {

diff --git a/vscode/src/completions/inline-completion-item-provider.ts b/vscode/src/completions/inline-completion-item-provider.ts
@@ -3,7 +3,6 @@ import * as vscode from 'vscode'
 import {
     ConfigFeaturesSingleton,
     FeatureFlag,
-    featureFlagProvider,
     isCodyIgnoredFile,
     RateLimitError,
     wrapInActiveSpan,
@@ -173,6 +172,10 @@ export class InlineCompletionItemProvider
                 }
             )
         )
+
+        // Warm caches for the config feature configuration to avoid the first completion call
+        // having to block on this.
+        void ConfigFeaturesSingleton.getInstance().getConfigFeatures()
     }
 
     /** Set the tracer (or unset it with `null`). */
@@ -222,11 +225,6 @@ export class InlineCompletionItemProvider
                 this.lastCompletionRequestTimestamp = start
             }
 
-            // We start feature flag requests early so that we have a high chance of getting a response
-            // before we need it.
-            const userLatencyPromise = featureFlagProvider.evaluateFeatureFlag(
-                FeatureFlag.CodyAutocompleteUserLatency
-            )
             const tracer = this.config.tracer ? createTracerForInvocation(this.config.tracer) : undefined
 
             let stopLoading: (() => void) | undefined
@@ -304,7 +302,9 @@ export class InlineCompletionItemProvider
             }
 
             const latencyFeatureFlags: LatencyFeatureFlags = {
-                user: await userLatencyPromise,
+                user: completionProviderConfig.getPrefetchedFlag(
+                    FeatureFlag.CodyAutocompleteUserLatency
+                ),
             }
 
             const artificialDelay = getArtificialDelay(
@@ -315,6 +315,10 @@ export class InlineCompletionItemProvider
             )
 
             const isLocalProvider = isLocalCompletionsProvider(this.config.providerConfig.identifier)
+            const isEagerCancellationEnabled = completionProviderConfig.getPrefetchedFlag(
+                FeatureFlag.CodyAutocompleteEagerCancellation
+            )
+            const debounceInterval = isLocalProvider ? 125 : isEagerCancellationEnabled ? 10 : 75
 
             try {
                 const result = await this.getInlineCompletions({
@@ -328,8 +332,8 @@ export class InlineCompletionItemProvider
                     requestManager: this.requestManager,
                     lastCandidate: this.lastCandidate,
                     debounceInterval: {
-                        singleLine: isLocalProvider ? 75 : 125,
-                        multiLine: 125,
+                        singleLine: debounceInterval,
+                        multiLine: debounceInterval,
                     },
                     setIsLoading,
                     abortSignal: abortController.signal,

diff --git a/vscode/src/completions/request-manager.test.ts b/vscode/src/completions/request-manager.test.ts
@@ -13,6 +13,7 @@ import {
 import { documentAndPosition, nextTick } from './test-helpers'
 import { STOP_REASON_HOT_STREAK } from './providers/hot-streak'
 import type { InlineCompletionItemWithAnalytics } from './text-processing/process-inline-completions'
+import { initCompletionProviderConfig } from './get-inline-completions-tests/helpers'
 
 class MockProvider extends Provider {
     public didFinishNetworkRequest = false
@@ -98,7 +99,8 @@ describe('RequestManager', () => {
         provider: Provider,
         suffix?: string
     ) => Promise<RequestManagerResult>
-    beforeEach(() => {
+    beforeEach(async () => {
+        await initCompletionProviderConfig({})
         const requestManager = new RequestManager()
 
         createRequest = (prefix: string, provider: Provider, suffix?: string) =>

diff --git a/vscode/src/completions/request-manager.ts b/vscode/src/completions/request-manager.ts
@@ -2,7 +2,7 @@ import { partition } from 'lodash'
 import { LRUCache } from 'lru-cache'
 import type * as vscode from 'vscode'
 
-import { isDefined, wrapInActiveSpan } from '@sourcegraph/cody-shared'
+import { FeatureFlag, isDefined, wrapInActiveSpan } from '@sourcegraph/cody-shared'
 
 import { addAutocompleteDebugEvent } from '../services/open-telemetry/debug-utils'
 
@@ -23,6 +23,8 @@ import type { ContextSnippet } from './types'
 import { lines, removeIndentation } from './text-processing'
 import { logDebug } from '../log'
 import { isLocalCompletionsProvider } from './providers/experimental-ollama'
+import { completionProviderConfig } from './completion-provider-config'
+import { forkSignal } from './utils'
 
 export interface RequestParams {
     /** The request's document */
@@ -72,6 +74,9 @@ export class RequestManager {
     private latestRequestParams: null | RequestsManagerParams = null
 
     public async request(params: RequestsManagerParams): Promise<RequestManagerResult> {
+        const eagerCancellation = completionProviderConfig.getPrefetchedFlag(
+            FeatureFlag.CodyAutocompleteEagerCancellation
+        )
         this.latestRequestParams = params
 
         const { requestParams, provider, context, isCacheEnabled, tracer } = params
@@ -89,7 +94,10 @@ export class RequestManager {
         // When request recycling is enabled, we do not pass the original abort signal forward as to
         // not interrupt requests that are no longer relevant. Instead, we let all previous requests
         // complete and try to see if their results can be reused for other inflight requests.
-        const abortController: AbortController = new AbortController()
+        const abortController: AbortController =
+            eagerCancellation && params.requestParams.abortSignal
+                ? forkSignal(params.requestParams.abortSignal)
+                : new AbortController()
 
         const request = new InflightRequest(requestParams, abortController)
         this.inflightRequests.add(request)
@@ -135,7 +143,13 @@ export class RequestManager {
                         })
 
                         request.lastCompletions = processedCompletions
-                        this.testIfResultCanBeRecycledForInflightRequests(request, processedCompletions)
+
+                        if (!eagerCancellation) {
+                            this.testIfResultCanBeRecycledForInflightRequests(
+                                request,
+                                processedCompletions
+                            )
+                        }
                     }
 
                     // Save hot streak completions for later use.
@@ -154,7 +168,9 @@ export class RequestManager {
                         )
                     }
 
-                    this.cancelIrrelevantRequests()
+                    if (!eagerCancellation) {
+                        this.cancelIrrelevantRequests()
+                    }
                 }
             } catch (error) {
                 request.reject(error as Error)
@@ -163,7 +179,9 @@ export class RequestManager {
             }
         }
 
-        this.cancelIrrelevantRequests()
+        if (!eagerCancellation) {
+            this.cancelIrrelevantRequests()
+        }
 
         void wrapInActiveSpan('autocomplete.generate', generateCompletions)
         return request.promise