diff --git a/docs/cody/capabilities/supported-models.mdx b/docs/cody/capabilities/supported-models.mdx index a6756e366..902419758 100644 --- a/docs/cody/capabilities/supported-models.mdx +++ b/docs/cody/capabilities/supported-models.mdx @@ -8,19 +8,20 @@ Cody supports a variety of cutting-edge large language models for use in chat an | **Provider** | **Model** | **Free** | **Pro** | **Enterprise** | | | | | | :------------ | :-------------------------------------------------------------------------------------------------------------------------------------------- | :----------- | :----------- | :------------- | --- | --- | --- | --- | -| OpenAI | [gpt-4 turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo#:~:text=TRAINING%20DATA-,gpt%2D4%2D0125%2Dpreview,-New%20GPT%2D4) | - | ✅ | ✅ | | | | | -| OpenAI | [gpt-4o](https://platform.openai.com/docs/models#gpt-4o) | - | ✅ | ✅ | | | | | -| OpenAI | [gpt-4o-mini](https://platform.openai.com/docs/models#gpt-4o-mini) | ✅ | ✅ | ✅ | | | | | +| OpenAI | [GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo#:~:text=TRAINING%20DATA-,gpt%2D4%2D0125%2Dpreview,-New%20GPT%2D4) | - | ✅ | ✅ | | | | | +| OpenAI | [GPT-4o](https://platform.openai.com/docs/models#gpt-4o) | - | ✅ | ✅ | | | | | +| OpenAI | [GPT-4o-mini](https://platform.openai.com/docs/models#gpt-4o-mini) | ✅ | ✅ | ✅ | | | | | | OpenAI | [o3-mini-medium](https://openai.com/index/openai-o3-mini/) (experimental) | ✅ | ✅ | ✅ | | | | | | OpenAI | [o3-mini-high](https://openai.com/index/openai-o3-mini/) (experimental) | - | - | ✅ | | | | | | OpenAI | [o1](https://platform.openai.com/docs/models#o1) | - | ✅ | ✅ | | | | | -| Anthropic | [claude-3.5 Haiku](https://docs.anthropic.com/claude/docs/models-overview#model-comparison) | ✅ | ✅ | ✅ | | | | | -| Anthropic | [claude-3.5 Sonnet](https://docs.anthropic.com/claude/docs/models-overview#model-comparison) | ✅ | ✅ | ✅ | | | | | -| Google Gemini | [1.5 Pro](https://deepmind.google/technologies/gemini/pro/) | ✅ | ✅ | ✅ (beta) | | | | | -| Google Gemini | [2.0 Flash](https://deepmind.google/technologies/gemini/flash/) | ✅ | ✅ | ✅ | | | | | -| Google Gemini | [2.0 Flash-Lite Preview](https://deepmind.google/technologies/gemini/flash/) (experimental) | ✅ | ✅ | ✅ | | | | | - -To use Claude 3 Sonnet models with Cody Enterprise, make sure you've upgraded your Sourcegraph instance to the latest version. +| Anthropic | [Claude 3.5 Haiku](https://docs.anthropic.com/claude/docs/models-overview#model-comparison) | ✅ | ✅ | ✅ | | | | | +| Anthropic | [Claude 3.5 Sonnet](https://docs.anthropic.com/claude/docs/models-overview#model-comparison) | ✅ | ✅ | ✅ | | | | | +| Anthropic | [Claude 3.7 Sonnet](https://docs.anthropic.com/claude/docs/models-overview#model-comparison) | - | ✅ | ✅ | | | | | +| Google | [Gemini 1.5 Pro](https://deepmind.google/technologies/gemini/pro/) | ✅ | ✅ | ✅ (beta) | | | | | +| Google | [Gemini 2.0 Flash](https://deepmind.google/technologies/gemini/flash/) | ✅ | ✅ | ✅ | | | | | +| Google | [Gemini 2.0 Flash-Lite Preview](https://deepmind.google/technologies/gemini/flash/) (experimental) | ✅ | ✅ | ✅ | | | | | + +To use Claude 3 Sonnet models with Cody Enterprise, make sure you've upgraded your Sourcegraph instance to the latest version. Claude 3.7 Sonnet with thinking is not supported for BYOK deployments. ## Autocomplete diff --git a/docs/cody/core-concepts/token-limits.mdx b/docs/cody/core-concepts/token-limits.mdx index 4fce21773..fa4ea8fde 100644 --- a/docs/cody/core-concepts/token-limits.mdx +++ b/docs/cody/core-concepts/token-limits.mdx @@ -13,15 +13,15 @@ Here's a detailed breakdown of the token limits by model: -| **Model** | **Conversation Context** | **@-mention Context** | **Output** | -| ------------------------------------ | ------------------------ | --------------------- | ---------- | -| gpt-4o-mini | 7,000 | shared | 4,000 | -| gpt-o3-mini-medium | 7,000 | shared | 4,000 | -| claude-3.5 Haiku | 7,000 | shared | 4,000 | -| **claude-3.5 Sonnet (New)** | **15,000** | **30,000** | **4,000** | -| Google Gemini 1.5 Pro | 7,000 | shared | 4,000 | -| Google Gemini 2.0 Flash | 7,000 | shared | 4,000 | -| Google Gemini 2.0 Flash-Lite Preview | 7,000 | shared | 4,000 | +| **Model** | **Conversation Context** | **@-mention Context** | **Output** | +| ----------------------------- | ------------------------ | --------------------- | ---------- | +| GPT 4o mini | 7,000 | shared | 4,000 | +| GPT o3 mini medium | 7,000 | shared | 4,000 | +| Claude 3.5 Haiku | 7,000 | shared | 4,000 | +| **Claude 3.5 Sonnet (New)** | **15,000** | **30,000** | **4,000** | +| Gemini 1.5 Pro | 7,000 | shared | 4,000 | +| Gemini 2.0 Flash | 7,000 | shared | 4,000 | +| Gemini 2.0 Flash-Lite Preview | 7,000 | shared | 4,000 | @@ -29,18 +29,19 @@ Here's a detailed breakdown of the token limits by model: The Pro tier supports the token limits for the LLM models on Free tier, plus: -| **Model** | **Conversation Context** | **@-mention Context** | **Output** | -| ------------------------------------ | ------------------------ | --------------------- | ---------- | -| gpt-4o-mini | 7,000 | shared | 4,000 | -| gpt-o3-mini-medium | 7,000 | shared | 4,000 | -| gpt-4-turbo | 7,000 | shared | 4,000 | -| gpt-4o | 7,000 | shared | 4,000 | -| o1 | 7,000 | shared | 4,000 | -| claude-3.5 Haiku | 7,000 | shared | 4,000 | -| **claude-3.5 Sonnet (New)** | **15,000** | **30,000** | **4,000** | -| **Google Gemini 1.5 Pro** | **15,000** | **30,000** | **4,000** | -| Google Gemini 2.0 Flash | 7,000 | shared | 4,000 | -| Google Gemini 2.0 Flash-Lite Preview | 7,000 | shared | 4,000 | +| **Model** | **Conversation Context** | **@-mention Context** | **Output** | +| ----------------------------- | ------------------------ | --------------------- | ---------- | +| GPT 4o mini | 7,000 | shared | 4,000 | +| GPT o3 mini medium | 7,000 | shared | 4,000 | +| GPT 4 Turbo | 7,000 | shared | 4,000 | +| GPT 4o | 7,000 | shared | 4,000 | +| o1 | 7,000 | shared | 4,000 | +| Claude 3.5 Haiku | 7,000 | shared | 4,000 | +| **Claude 3.5 Sonnet (New)** | **15,000** | **30,000** | **4,000** | +| Claude 3.7 Sonnet | 15,000 | 30,000 | 4,000 | +| Gemini 1.5 Pro | 15,000 | 30,000 | 4,000 | +| Gemini 2.0 Flash | 7,000 | shared | 4,000 | +| Gemini 2.0 Flash-Lite Preview | 7,000 | shared | 4,000 | @@ -48,18 +49,19 @@ The Pro tier supports the token limits for the LLM models on Free tier, plus: The Enterprise tier supports the token limits for the LLM models on Free and Pro tier, plus: -| **Model** | **Conversation Context** | **@-mention Context** | **Output** | -| ------------------------------------ | ------------------------ | --------------------- | ---------- | -| gpt-4o-mini | 7,000 | shared | 4,000 | -| gpt-o3-mini-medium | 7,000 | shared | 4,000 | -| gpt-4-turbo | 7,000 | shared | 4,000 | -| gpt-4o | 7,000 | shared | 4,000 | -| o1 | 7,000 | shared | 4,000 | -| o3-mini-high | 7,000 | shared | 4,000 | -| claude-3.5 Haiku | 7,000 | shared | 4,000 | -| **claude-3.5 Sonnet (New)** | **15,000** | **30,000** | **4,000** | -| Google Gemini 2.0 Flash | 7,000 | shared | 4,000 | -| Google Gemini 2.0 Flash-Lite Preview | 7,000 | shared | 4,000 | +| **Model** | **Conversation Context** | **@-mention Context** | **Output** | +| ----------------------------- | ------------------------ | --------------------- | ---------- | +| GPT 4o mini | 7,000 | shared | 4,000 | +| GPT o3 mini medium | 7,000 | shared | 4,000 | +| GPT 4 Turbo | 7,000 | shared | 4,000 | +| GPT 4o | 7,000 | shared | 4,000 | +| o1 | 7,000 | shared | 4,000 | +| o3 mini high | 7,000 | shared | 4,000 | +| Claude 3.5 Haiku | 7,000 | shared | 4,000 | +| **Claude 3.5 Sonnet (New)** | **15,000** | **30,000** | **4,000** | +| Claude 3.7 Sonnet | 15,000 | 30,000 | 4,000 | +| Gemini 2.0 Flash | 7,000 | shared | 4,000 | +| Gemini 2.0 Flash-Lite Preview | 7,000 | shared | 4,000 | diff --git a/docs/cody/enterprise/model-configuration.mdx b/docs/cody/enterprise/model-configuration.mdx index 3a564f7d4..ad8717491 100644 --- a/docs/cody/enterprise/model-configuration.mdx +++ b/docs/cody/enterprise/model-configuration.mdx @@ -293,6 +293,41 @@ In the example above: - Sourcegraph-provided models are used for `"chat"` and `"fastChat"` (accessed via Cody Gateway) - The newly configured model, `"huggingface-codellama::v1::CodeLlama-7b-hf"`, is used for `"autocomplete"` (connecting directly to Hugging Face’s OpenAI-compatible API) +#### Example configuration with Claude 3.7 Sonnet + +```json +{ + "modelRef": "anthropic::2024-10-22::claude-3-7-sonnet-latest", + "displayName": "Claude 3.7 Sonnet", + "modelName": "claude-3-7-sonnet-latest", + "capabilities": [ + "chat", + "reasoning" + ], + "category": "accuracy", + "status": "stable", + "tier": "pro", + "contextWindow": { + "maxInputTokens": 45000, + "maxOutputTokens": 4000 + }, + "modelCost": { + "unit": "mtok", + "inputTokenPennies": 300, + "outputTokenPennies": 1500 + }, + "reasoningEffort": "high" +}, +``` + +In this modelOverrides config example: + +- The model is configured to use Claude 3.7 Sonnet with Cody Gateway +- The model is configured to use the `"chat"` and `"reasoning"` capabilities +- The `reasoningEffort` can be set to 3 different options in the Model Config. These options are `high`, `medium` and `low` +- The default `reasoningEffort` is set to `low` +- When the reasoning effort is `low`, 1024 tokens is used as the thinking budget. With `medium` and `high` the thinking budget is set via `max_tokens_to_sample/2` + Refer to the [examples page](/cody/enterprise/model-config-examples) for additional examples. ## View configuration diff --git a/src/utils/constants/supportedModelsEnt.ts b/src/utils/constants/supportedModelsEnt.ts index 010a1c2a2..e5d76a807 100644 --- a/src/utils/constants/supportedModelsEnt.ts +++ b/src/utils/constants/supportedModelsEnt.ts @@ -26,6 +26,7 @@ export const chatTableDataEnt: any = { { provider: 'Anthropic', model: 'claude-3 Sonnet', status: '✅ *(5.3.9104 and above)' }, { provider: 'Anthropic', model: 'claude-3.5 Sonnet', status: '✅ *(5.5.0 and above)' }, { provider: 'Anthropic', model: 'claude-3.5 Sonnet (Latest)', status: '✅ *(5.9 and above)' }, + { provider: 'Anthropic', model: 'claude-3.7 Sonnet', status: '✅ *(6.1.1295 and above)' }, { provider: 'Google', model: 'Gemini 1.5 Pro', status: '✅ *(5.4.5099 and above)' }, ], 'OpenAI': [ @@ -52,6 +53,7 @@ export const chatTableDataEnt: any = { { provider: 'Anthropic', model: 'claude-3 Sonnet', status: '✅ *(5.3.9104 and above)' }, { provider: 'Anthropic', model: 'claude-3.5 Sonnet', status: '✅ *(5.5.0 and above)' }, { provider: 'Anthropic', model: 'claude-3.5 Sonnet (Latest)', status: '✅ *(5.5.0 and above)' }, + { provider: 'Anthropic', model: 'claude-3.7 Sonnet', status: '✅ *(6.1.1295 and above)' }, { provider: 'Google', model: 'Gemini 1.5 Pro', status: '❌' }, ], 'Google': [