Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Chat: sync token limit at model import time #3486

Merged
merged 17 commits into from
Mar 22, 2024
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ data class ModelProvider(
val codyProOnly: Boolean? = null,
val provider: String? = null,
val title: String? = null,
val contextWindow: Int? = null,
abeatrix marked this conversation as resolved.
Show resolved Hide resolved
val privateProviders: Map<String, ModelProvider>? = null,
val dotComProviders: List<ModelProvider>? = null,
val ollamaProvidersEnabled: Boolean? = null,
Expand Down
12 changes: 12 additions & 0 deletions lib/shared/src/models/dotcom.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import type { ModelProvider } from '.'
import { ModelUsage } from './types'

export const DEFAULT_CHAT_MODEL_TOKEN_LIMIT = 28000 // 7000 tokens * 4 bytes per token
abeatrix marked this conversation as resolved.
Show resolved Hide resolved
export const DEFAULT_FAST_MODEL_TOKEN_LIMIT = 10000 // 4,096 tokens * 4 bytes per token

// The models must first be added to the custom chat models list in https://sourcegraph.com/github.com/sourcegraph/sourcegraph/-/blob/internal/completions/httpapi/chat.go?L48-51
export const DEFAULT_DOT_COM_MODELS = [
{
Expand All @@ -10,6 +13,7 @@ export const DEFAULT_DOT_COM_MODELS = [
default: true,
codyProOnly: false,
usage: [ModelUsage.Chat, ModelUsage.Edit],
contextWindow: DEFAULT_CHAT_MODEL_TOKEN_LIMIT,
},
{
title: 'Claude 2.1',
Expand All @@ -18,6 +22,7 @@ export const DEFAULT_DOT_COM_MODELS = [
default: false,
codyProOnly: true,
usage: [ModelUsage.Chat, ModelUsage.Edit],
contextWindow: DEFAULT_CHAT_MODEL_TOKEN_LIMIT,
},
{
title: 'Claude Instant',
Expand All @@ -26,6 +31,7 @@ export const DEFAULT_DOT_COM_MODELS = [
default: false,
codyProOnly: true,
usage: [ModelUsage.Chat, ModelUsage.Edit],
contextWindow: DEFAULT_FAST_MODEL_TOKEN_LIMIT,
},
{
title: 'Claude 3 Haiku',
Expand All @@ -34,6 +40,7 @@ export const DEFAULT_DOT_COM_MODELS = [
default: false,
codyProOnly: true,
usage: [ModelUsage.Chat, ModelUsage.Edit],
contextWindow: DEFAULT_FAST_MODEL_TOKEN_LIMIT,
},
{
title: 'Claude 3 Sonnet',
Expand All @@ -42,6 +49,7 @@ export const DEFAULT_DOT_COM_MODELS = [
default: false,
codyProOnly: true,
usage: [ModelUsage.Chat, ModelUsage.Edit],
contextWindow: DEFAULT_CHAT_MODEL_TOKEN_LIMIT,
},
{
title: 'Claude 3 Opus',
Expand All @@ -50,6 +58,7 @@ export const DEFAULT_DOT_COM_MODELS = [
default: false,
codyProOnly: true,
usage: [ModelUsage.Chat, ModelUsage.Edit],
contextWindow: DEFAULT_CHAT_MODEL_TOKEN_LIMIT,
},
{
title: 'GPT-3.5 Turbo',
Expand All @@ -58,6 +67,7 @@ export const DEFAULT_DOT_COM_MODELS = [
default: false,
codyProOnly: true,
usage: [ModelUsage.Chat, ModelUsage.Edit],
contextWindow: DEFAULT_FAST_MODEL_TOKEN_LIMIT,
},
{
title: 'GPT-4 Turbo Preview',
Expand All @@ -66,6 +76,7 @@ export const DEFAULT_DOT_COM_MODELS = [
default: false,
codyProOnly: true,
usage: [ModelUsage.Chat, ModelUsage.Edit],
contextWindow: DEFAULT_CHAT_MODEL_TOKEN_LIMIT,
},
{
title: 'Mixtral 8x7B',
Expand All @@ -75,5 +86,6 @@ export const DEFAULT_DOT_COM_MODELS = [
codyProOnly: true,
// TODO: Improve prompt for Mixtral + Edit to see if we can use it there too.
usage: [ModelUsage.Chat],
contextWindow: DEFAULT_CHAT_MODEL_TOKEN_LIMIT,
},
] as const satisfies ModelProvider[]
20 changes: 17 additions & 3 deletions lib/shared/src/models/index.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
import { logError } from '../logger'
import { OLLAMA_DEFAULT_URL } from '../ollama'
import { isDotCom } from '../sourcegraph-api/environments'
import { DEFAULT_DOT_COM_MODELS } from './dotcom'
import {
DEFAULT_CHAT_MODEL_TOKEN_LIMIT,
DEFAULT_DOT_COM_MODELS,
DEFAULT_FAST_MODEL_TOKEN_LIMIT,
} from './dotcom'
import { ModelUsage } from './types'
import { getProviderName } from './utils'

Expand All @@ -15,16 +19,18 @@ export class ModelProvider {
public codyProOnly = false
public provider: string
public readonly title: string
public readonly contextWindow: number

constructor(
public readonly model: string,
public readonly usage: ModelUsage[],
isDefaultModel = true
tokenLimit?: number
) {
const splittedModel = model.split('/')
this.provider = getProviderName(splittedModel[0])
this.title = splittedModel[1]?.replaceAll('-', ' ')
this.default = isDefaultModel
this.default = true
this.contextWindow = tokenLimit ? tokenLimit * 4 : DEFAULT_FAST_MODEL_TOKEN_LIMIT
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be great to put tokenLimit * 4 inside a function, say tokenCountToByteCount(tokenLimit), so that when we make this accurate we don't have to hunt the codebase for the digit "4" and can instead find uses of that function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dominiccooney Thank you for pointing this out! I have renamed them to use character limit (since we already have a constant called CHARS_PER_TOKEN and a function called tokensToChars) and token limit instead and updated the code based on your feedback!
Also added some unit tests to cover the token and char limits as well.

}

// Providers available for non-dotcom instances
Expand Down Expand Up @@ -120,4 +126,12 @@ export class ModelProvider {
}
})
}

public static getContextWindow(modelID: string): number {
abeatrix marked this conversation as resolved.
Show resolved Hide resolved
return (
ModelProvider.privateProviders.get(modelID)?.contextWindow ||
ModelProvider.dotComProviders.find(model => model.model === modelID)?.contextWindow ||
DEFAULT_CHAT_MODEL_TOKEN_LIMIT
)
}
}
14 changes: 3 additions & 11 deletions vscode/src/chat/chat-view/ChatManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ import {
CODY_PASSTHROUGH_VSCODE_OPEN_COMMAND_ID,
type ChatClient,
type Guardrails,
ModelProvider,
} from '@sourcegraph/cody-shared'

import type { View } from '../../../webviews/NavBar'
Expand All @@ -19,10 +18,10 @@ import { localStorage } from '../../services/LocalStorageProvider'
import { telemetryService } from '../../services/telemetry'
import { telemetryRecorder } from '../../services/telemetry-v2'

import { ModelUsage } from '@sourcegraph/cody-shared/src/models/types'
import type { ExecuteChatArguments } from '../../commands/execute/ask'
import type { EnterpriseContextFactory } from '../../context/enterprise-context-factory'
import type { ContextRankingController } from '../../local-context/context-ranking'
import { addEnterpriseChatModel } from '../../models/utilts'
import { ChatPanelsManager } from './ChatPanelsManager'
import { SidebarViewController, type SidebarViewOptions } from './SidebarViewController'
import type { ChatSession, SimpleChatPanelProvider } from './SimpleChatPanelProvider'
Expand Down Expand Up @@ -100,15 +99,8 @@ export class ChatManager implements vscode.Disposable {
}

public async syncAuthStatus(authStatus: AuthStatus): Promise<void> {
if (authStatus?.configOverwrites?.chatModel) {
ModelProvider.add(
new ModelProvider(authStatus.configOverwrites.chatModel, [
ModelUsage.Chat,
// TODO: Add configOverwrites.editModel for separate edit support
ModelUsage.Edit,
])
)
}
addEnterpriseChatModel(authStatus)

await this.chatPanelsManager.syncAuthStatus(authStatus)
}

Expand Down
11 changes: 2 additions & 9 deletions vscode/src/chat/chat-view/ChatPanelsManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import type { ExtensionMessage } from '../protocol'
import { ModelUsage } from '@sourcegraph/cody-shared/src/models/types'
import type { EnterpriseContextFactory } from '../../context/enterprise-context-factory'
import type { ContextRankingController } from '../../local-context/context-ranking'
import { addEnterpriseChatModel } from '../../models/utilts'
import { chatHistory } from './ChatHistoryManager'
import { CodyChatPanelViewType } from './ChatManager'
import type { SidebarViewOptions } from './SidebarViewController'
Expand Down Expand Up @@ -215,15 +216,7 @@ export class ChatPanelsManager implements vscode.Disposable {
private createProvider(): SimpleChatPanelProvider {
const authProvider = this.options.authProvider
const authStatus = authProvider.getAuthStatus()
if (authStatus?.configOverwrites?.chatModel) {
ModelProvider.add(
new ModelProvider(authStatus.configOverwrites.chatModel, [
ModelUsage.Chat,
// TODO: Add configOverwrites.editModel for separate edit support
ModelUsage.Edit,
])
)
}
addEnterpriseChatModel(authStatus)
const models = ModelProvider.get(ModelUsage.Chat, authStatus.endpoint)
const isConsumer = authProvider.getAuthStatus().isDotCom

Expand Down
17 changes: 3 additions & 14 deletions vscode/src/chat/chat-view/SimpleChatPanelProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ import type { Repo } from '../../context/repo-fetcher'
import type { RemoteRepoPicker } from '../../context/repo-picker'
import type { ContextRankingController } from '../../local-context/context-ranking'
import { chatModel } from '../../models'
import { getContextWindowForModel } from '../../models/utilts'
import { addEnterpriseChatModel } from '../../models/utilts'
import { recordExposedExperimentsToSpan } from '../../services/open-telemetry/utils'
import type { MessageErrorType } from '../MessageProvider'
import { getChatContextItemsForMention } from '../context/chatContext'
Expand Down Expand Up @@ -727,15 +727,7 @@ export class SimpleChatPanelProvider implements vscode.Disposable, ChatSession {
if (!authStatus?.isLoggedIn) {
return
}
if (authStatus?.configOverwrites?.chatModel) {
ModelProvider.add(
new ModelProvider(authStatus.configOverwrites.chatModel, [
ModelUsage.Chat,
// TODO: Add configOverwrites.editModel for separate edit support
ModelUsage.Edit,
])
)
}
addEnterpriseChatModel(authStatus)
const models = ModelProvider.get(ModelUsage.Chat, authStatus.endpoint, this.chatModel.modelID)

void this.postMessage({
Expand Down Expand Up @@ -786,10 +778,7 @@ export class SimpleChatPanelProvider implements vscode.Disposable, ChatSession {
prompter: IPrompter,
sendTelemetry?: (contextSummary: any) => void
): Promise<Message[]> {
const maxChars = getContextWindowForModel(
this.authProvider.getAuthStatus(),
this.chatModel.modelID
)
const maxChars = ModelProvider.getContextWindow(this.chatModel.modelID)
const { prompt, newContextUsed } = await prompter.makePrompt(this.chatModel, maxChars)

// Update UI based on prompt construction
Expand Down
7 changes: 2 additions & 5 deletions vscode/src/edit/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { Utils } from 'vscode-uri'

import {
BotResponseMultiplexer,
ModelProvider,
Typewriter,
isAbortError,
isDotCom,
Expand All @@ -17,7 +18,6 @@ import { isNetworkError } from '../services/AuthProvider'

import { workspace } from 'vscode'
import { doesFileExist } from '../commands/utils/workspace-files'
import { getContextWindowForModel } from '../models/utilts'
import { CodyTaskState } from '../non-stop/utils'
import { telemetryService } from '../services/telemetry'
import { telemetryRecorder } from '../services/telemetry-v2'
Expand All @@ -43,10 +43,7 @@ export class EditProvider {
return wrapInActiveSpan('command.edit.start', async span => {
this.config.controller.startTask(this.config.task)
const model = this.config.task.model
const contextWindow = getContextWindowForModel(
this.config.authProvider.getAuthStatus(),
model
)
const contextWindow = ModelProvider.getContextWindow(model)
const {
messages,
stopSequences,
Expand Down
11 changes: 2 additions & 9 deletions vscode/src/edit/utils/edit-models.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,10 @@
import { type AuthStatus, ModelProvider } from '@sourcegraph/cody-shared'
import { type EditModel, ModelUsage } from '@sourcegraph/cody-shared/src/models/types'
import { addEnterpriseChatModel } from '../../models/utilts'
import type { EditIntent } from '../types'

export function getEditModelsForUser(authStatus: AuthStatus): ModelProvider[] {
if (authStatus?.configOverwrites?.chatModel) {
ModelProvider.add(
new ModelProvider(authStatus.configOverwrites.chatModel, [
ModelUsage.Chat,
// TODO: Add configOverwrites.editModel for separate edit support
ModelUsage.Edit,
])
)
}
addEnterpriseChatModel(authStatus)
return ModelProvider.get(ModelUsage.Edit, authStatus.endpoint)
}

Expand Down
42 changes: 14 additions & 28 deletions vscode/src/models/utilts.ts
Original file line number Diff line number Diff line change
@@ -1,39 +1,25 @@
import { type AuthStatus, ModelProvider } from '@sourcegraph/cody-shared'
import { ModelUsage } from '@sourcegraph/cody-shared/src/models/types'
import * as vscode from 'vscode'

import { type AuthStatus, type ChatModel, type EditModel, isDotCom } from '@sourcegraph/cody-shared'

export function getContextWindowForModel(
authStatus: AuthStatus,
modelID: EditModel | ChatModel
): number {
export function addEnterpriseChatModel(authStatus: AuthStatus): void {
// In enterprise mode, we let the sg instance dictate the token limits and allow users to
// overwrite it locally (for debugging purposes).
//
// This is similiar to the behavior we had before introducing the new chat and allows BYOK
// customers to set a model of their choice without us having to map it to a known model on
// the client.
if (authStatus.endpoint && !isDotCom(authStatus.endpoint)) {
if (authStatus?.configOverwrites?.chatModel) {
const codyConfig = vscode.workspace.getConfiguration('cody')
const tokenLimit = codyConfig.get<number>('provider.limit.prompt')
if (tokenLimit) {
return tokenLimit * 4 // bytes per token
}

if (authStatus.configOverwrites?.chatModelMaxTokens) {
return authStatus.configOverwrites.chatModelMaxTokens * 4 // bytes per token
}

return 28000 // 7000 tokens * 4 bytes per token
}

if (modelID === 'openai/gpt-4-turbo-preview') {
return 28000 // 7000 tokens * 4 bytes per token
}
if (modelID === 'openai/gpt-3.5-turbo') {
return 10000 // 4,096 tokens * < 4 bytes per token
}
if (modelID === 'fireworks/accounts/fireworks/models/mixtral-8x7b-instruct') {
return 28000 // 7000 tokens * 4 bytes per token
const tokenLimitConfig = codyConfig.get<number>('provider.limit.prompt')
const tokenLimit = tokenLimitConfig ?? authStatus.configOverwrites?.chatModelMaxTokens
ModelProvider.add(
new ModelProvider(
authStatus.configOverwrites.chatModel,
// TODO: Add configOverwrites.editModel for separate edit support
[ModelUsage.Chat, ModelUsage.Edit],
tokenLimit
)
)
}
return 28000 // assume default to Claude-2-like model
}
Loading