diff --git a/src/config/modelProviders/anthropic.ts b/src/config/modelProviders/anthropic.ts index f8962b4a31b..93bcfb40f74 100644 --- a/src/config/modelProviders/anthropic.ts +++ b/src/config/modelProviders/anthropic.ts @@ -1,6 +1,6 @@ import { ModelProviderCard } from '@/types/llm'; -// ref https://docs.anthropic.com/claude/docs/models-overview +// ref https://docs.anthropic.com/claude/docs/models-overview#model-comparison const Anthropic: ModelProviderCard = { chatModels: [ { diff --git a/src/config/modelProviders/azure.ts b/src/config/modelProviders/azure.ts index 8f6c0a16e5b..cef5e10f56c 100644 --- a/src/config/modelProviders/azure.ts +++ b/src/config/modelProviders/azure.ts @@ -1,5 +1,6 @@ import { ModelProviderCard } from '@/types/llm'; +// ref https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models const Azure: ModelProviderCard = { chatModels: [ { diff --git a/src/config/modelProviders/bedrock.ts b/src/config/modelProviders/bedrock.ts index bacfae5d51c..86d4782dc2e 100644 --- a/src/config/modelProviders/bedrock.ts +++ b/src/config/modelProviders/bedrock.ts @@ -56,14 +56,14 @@ const Bedrock: ModelProviderCard = { displayName: 'Llama 2 Chat 13B', enabled: true, id: 'meta.llama2-13b-chat-v1', - tokens: 4000, + tokens: 4096, }, { description: 'Llama 2 Chat 70B v1,上下文大小为 4k,Llama 2 模型的对话用例优化变体。', displayName: 'Llama 2 Chat 70B', enabled: true, id: 'meta.llama2-70b-chat-v1', - tokens: 4000, + tokens: 4096, }, ], id: 'bedrock', diff --git a/src/config/modelProviders/google.ts b/src/config/modelProviders/google.ts index f1cddda61a6..f0dc56def61 100644 --- a/src/config/modelProviders/google.ts +++ b/src/config/modelProviders/google.ts @@ -1,6 +1,7 @@ import { ModelProviderCard } from '@/types/llm'; // ref https://ai.google.dev/models/gemini +// api https://ai.google.dev/api/rest/v1beta/models/list const Google: ModelProviderCard = { chatModels: [ { @@ -8,13 +9,14 @@ const Google: ModelProviderCard = { displayName: 'PaLM 2 Chat (Legacy)', id: 'chat-bison-001', maxOutput: 1024, + // tokens: 4096 + 1024, // none tokens test }, { description: 'A legacy model that understands text and generates text as an output', displayName: 'PaLM 2 (Legacy)', id: 'text-bison-001', maxOutput: 1024, - tokens: 9220, + tokens: 8196 + 1024, }, { description: 'The best model for scaling across a wide range of tasks', @@ -22,14 +24,14 @@ const Google: ModelProviderCard = { enabled: true, id: 'gemini-pro', maxOutput: 2048, - tokens: 32_768, + tokens: 30_720 + 2048, }, { description: 'The best image understanding model to handle a broad range of applications', displayName: 'Gemini 1.0 Pro Vision', id: 'gemini-1.0-pro-vision-latest', maxOutput: 4096, - tokens: 16_384, + tokens: 12_288 + 4096, vision: true, }, { @@ -38,7 +40,7 @@ const Google: ModelProviderCard = { enabled: true, id: 'gemini-pro-vision', maxOutput: 4096, - tokens: 16_384, + tokens: 12_288 + 4096, vision: true, }, { @@ -47,7 +49,7 @@ const Google: ModelProviderCard = { displayName: 'Gemini 1.0 Pro 001 (Tuning)', id: 'gemini-1.0-pro-001', maxOutput: 2048, - tokens: 32_768, + tokens: 30_720 + 2048, }, { description: @@ -55,7 +57,7 @@ const Google: ModelProviderCard = { displayName: 'Gemini 1.0 Pro Latest', id: 'gemini-1.0-pro-latest', maxOutput: 2048, - tokens: 32_768, + tokens: 30_720 + 2048, }, { description: 'Mid-size multimodal model that supports up to 1 million tokens', @@ -63,7 +65,7 @@ const Google: ModelProviderCard = { enabled: true, id: 'gemini-1.5-pro-latest', maxOutput: 8192, - tokens: 1_056_768, + tokens: 1_048_576 + 8192, vision: true, }, { diff --git a/src/config/modelProviders/groq.ts b/src/config/modelProviders/groq.ts index e4b2f8441e6..51857c15b80 100644 --- a/src/config/modelProviders/groq.ts +++ b/src/config/modelProviders/groq.ts @@ -29,7 +29,6 @@ const Groq: ModelProviderCard = { }, { displayName: 'LLaMA2-70b-chat', - enabled: true, id: 'llama2-70b-4096', tokens: 4096, }, diff --git a/src/config/modelProviders/minimax.ts b/src/config/modelProviders/minimax.ts index a16221e1643..f3cd3a60b6b 100644 --- a/src/config/modelProviders/minimax.ts +++ b/src/config/modelProviders/minimax.ts @@ -1,7 +1,22 @@ import { ModelProviderCard } from '@/types/llm'; +// ref https://www.minimaxi.com/document/guides/chat-model/pro/api const Minimax: ModelProviderCard = { chatModels: [ + { + description: '复杂场景,例如应用题计算、科学计算等场景', + displayName: 'abab6.5', + enabled: true, + id: 'abab6.5-chat', + tokens: 8192, + }, + { + description: '通用场景', + displayName: 'abab6.5s', + enabled: true, + id: 'abab6.5s-chat', + tokens: 245_760, + }, { description: '更复杂的格式化文本生成', displayName: 'abab6', diff --git a/src/config/modelProviders/mistral.ts b/src/config/modelProviders/mistral.ts index 1ac97b07cf8..e2f870b8af8 100644 --- a/src/config/modelProviders/mistral.ts +++ b/src/config/modelProviders/mistral.ts @@ -1,6 +1,6 @@ import { ModelProviderCard } from '@/types/llm'; -// ref https://docs.mistral.ai/platform/pricing/#chat-completions-api +// ref https://docs.mistral.ai/getting-started/models/ const Mistral: ModelProviderCard = { chatModels: [ { diff --git a/src/config/modelProviders/moonshot.ts b/src/config/modelProviders/moonshot.ts index e57d317bee0..9c4916ab7c2 100644 --- a/src/config/modelProviders/moonshot.ts +++ b/src/config/modelProviders/moonshot.ts @@ -1,5 +1,6 @@ import { ModelProviderCard } from '@/types/llm'; +// ref https://platform.moonshot.cn/docs/intro#模型列表 const Moonshot: ModelProviderCard = { chatModels: [ { diff --git a/src/config/modelProviders/ollama.ts b/src/config/modelProviders/ollama.ts index 93e21550266..7c320e29c84 100644 --- a/src/config/modelProviders/ollama.ts +++ b/src/config/modelProviders/ollama.ts @@ -6,7 +6,7 @@ const Ollama: ModelProviderCard = { displayName: 'Llama3 8B', enabled: true, id: 'llama3', - tokens: 8000, + tokens: 8000, // https://huggingface.co/blog/zh/llama3#llama-3-的新进展 }, { displayName: 'Llama3 70B', @@ -17,48 +17,48 @@ const Ollama: ModelProviderCard = { displayName: 'Command R 35B', enabled: true, id: 'command-r', - tokens: 128_000, + tokens: 131_072, // https://huggingface.co/CohereForAI/c4ai-command-r-v01/blob/main/config.json }, { displayName: 'Command R+ 104B (Q2_K)', id: 'command-r-plus:104b-q2_K', - tokens: 128_000, + tokens: 131_072, // https://huggingface.co/CohereForAI/c4ai-command-r-plus/blob/main/config.json }, { displayName: 'Gemma 7B', enabled: true, id: 'gemma', - tokens: 4000, + tokens: 8192, // https://huggingface.co/google/gemma-7b-it/discussions/73#65e9678c0cda621164a95bad }, { displayName: 'Gemma 2B', id: 'gemma:2b', - tokens: 4000, + tokens: 8192, }, { displayName: 'Llama2 Chat 13B', id: 'llama2:13b', - tokens: 4000, + tokens: 4096, // https://llama.meta.com/llama2/ }, { displayName: 'Llama2 Chat 7B', id: 'llama2', - tokens: 4000, + tokens: 4096, }, { displayName: 'Llama2 Chat 70B', id: 'llama2:70b', - tokens: 4000, + tokens: 4096, }, { displayName: 'Llama2 CN 13B', id: 'llama2-chinese:13b', - tokens: 4000, + tokens: 4096, }, { displayName: 'Llama2 CN 7B', id: 'llama2-chinese', - tokens: 4000, + tokens: 4096, }, { displayName: 'WizardLM 2 7B', @@ -74,45 +74,45 @@ const Ollama: ModelProviderCard = { { displayName: 'Code Llama 7B', id: 'codellama', - tokens: 16_000, + tokens: 16_384, // https://huggingface.co/codellama/CodeLlama-7b-hf/blob/main/config.json }, { displayName: 'Code Llama 34B', id: 'codellama:34b', - tokens: 16_000, + tokens: 16_384, }, { displayName: 'Code Llama 70B', id: 'codellama:70b', - tokens: 16_000, + tokens: 16_384, }, { displayName: 'Code Llama 7B (Python)', id: 'codellama:python', - tokens: 16_000, + tokens: 16_384, }, { displayName: 'Phi3-Instruct 3.8B', enabled: true, id: 'phi3:instruct', - tokens: 128_000, + tokens: 131_072, // https://huggingface.co/microsoft/Phi-3-mini-128k-instruct/blob/main/config.json }, { displayName: 'Mistral', enabled: true, id: 'mistral', - tokens: 4800, + tokens: 32_768, // https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.2/blob/main/config.json }, { displayName: 'Mixtral 8x7B', enabled: true, id: 'mixtral', - tokens: 32_000, + tokens: 32_768, }, { displayName: 'Mixtral 8x22B', id: 'mixtral:8x22b', - tokens: 64_000, + tokens: 65_536, // https://huggingface.co/mistralai/Mixtral-8x22B-v0.1/blob/main/config.json }, { displayName: 'Qwen Chat 4B', @@ -138,19 +138,19 @@ const Ollama: ModelProviderCard = { { displayName: 'LLaVA 7B', id: 'llava', - tokens: 4000, + tokens: 4096, // https://huggingface.co/llava-hf/llava-1.5-7b-hf/blob/main/config.json vision: true, }, { displayName: 'LLaVA 13B', id: 'llava:13b', - tokens: 4000, + tokens: 4096, vision: true, }, { displayName: 'LLaVA 34B', id: 'llava:34b', - tokens: 4000, + tokens: 4096, vision: true, }, ], diff --git a/src/config/modelProviders/openai.ts b/src/config/modelProviders/openai.ts index 5a57204a27c..b6a3ae3985b 100644 --- a/src/config/modelProviders/openai.ts +++ b/src/config/modelProviders/openai.ts @@ -1,6 +1,6 @@ import { ModelProviderCard } from '@/types/llm'; -// refs to: https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4 +// ref https://platform.openai.com/docs/models const OpenAI: ModelProviderCard = { chatModels: [ { diff --git a/src/config/modelProviders/togetherai.ts b/src/config/modelProviders/togetherai.ts index be6e91ad443..9d1c2d0cc51 100644 --- a/src/config/modelProviders/togetherai.ts +++ b/src/config/modelProviders/togetherai.ts @@ -1,6 +1,6 @@ import { ModelProviderCard } from '@/types/llm'; -// ref https://api.together.xyz/models +// ref https://docs.together.ai/docs/inference-models const TogetherAI: ModelProviderCard = { chatModels: [ { diff --git a/src/config/modelProviders/zeroone.ts b/src/config/modelProviders/zeroone.ts index 109a127c2c8..1192645287b 100644 --- a/src/config/modelProviders/zeroone.ts +++ b/src/config/modelProviders/zeroone.ts @@ -1,5 +1,6 @@ import { ModelProviderCard } from '@/types/llm'; +// ref https://platform.lingyiwanwu.com/ const ZeroOne: ModelProviderCard = { chatModels: [ { @@ -7,7 +8,7 @@ const ZeroOne: ModelProviderCard = { displayName: 'YI 34B Chat', enabled: true, id: 'yi-34b-chat-0205', - tokens: 4000, + tokens: 4096, // https://huggingface.co/01-ai/Yi-34B-Chat/blob/main/config.json }, { description: @@ -15,7 +16,7 @@ const ZeroOne: ModelProviderCard = { displayName: 'YI Vision Plus', enabled: true, id: 'yi-vl-plus', - tokens: 4000, + tokens: 4096, vision: true, }, { @@ -23,7 +24,7 @@ const ZeroOne: ModelProviderCard = { displayName: 'YI 34B Chat 200k', enabled: true, id: 'yi-34b-chat-200k', - tokens: 200_000, + tokens: 200_000, // https://huggingface.co/01-ai/Yi-34B-200K/blob/main/config.json }, ], id: 'zeroone', diff --git a/src/config/modelProviders/zhipu.ts b/src/config/modelProviders/zhipu.ts index 54b048f77e8..55a51950fbf 100644 --- a/src/config/modelProviders/zhipu.ts +++ b/src/config/modelProviders/zhipu.ts @@ -1,8 +1,10 @@ import { ModelProviderCard } from '@/types/llm'; // TODO: 等待 ZhiPu 修复 API 问题后开启 functionCall -// refs: https://github.com/lobehub/lobe-chat/discussions/737#discussioncomment-8315815 // 暂时不透出 GLM 系列的 function_call 功能 +// refs https://github.com/lobehub/lobe-chat/discussions/737#discussioncomment-8315815 + +// ref https://open.bigmodel.cn/dev/howuse/model const ZhiPu: ModelProviderCard = { chatModels: [ { @@ -19,7 +21,7 @@ const ZhiPu: ModelProviderCard = { displayName: 'GLM-4 Vision', enabled: true, id: 'glm-4v', - tokens: 128_000, + tokens: 2000, vision: true, }, { diff --git a/src/migrations/FromV3ToV4/fixtures/ollama-output-v4.json b/src/migrations/FromV3ToV4/fixtures/ollama-output-v4.json index d1abf9591b0..c3f5e996d90 100644 --- a/src/migrations/FromV3ToV4/fixtures/ollama-output-v4.json +++ b/src/migrations/FromV3ToV4/fixtures/ollama-output-v4.json @@ -48,7 +48,7 @@ "displayName": "LLaVA 7B", "enabled": true, "id": "llava", - "tokens": 4000, + "tokens": 4096, "vision": true } ] diff --git a/src/store/user/slices/settings/actions/llm.test.ts b/src/store/user/slices/settings/actions/llm.test.ts index fa7943588bf..6d2e3094fb8 100644 --- a/src/store/user/slices/settings/actions/llm.test.ts +++ b/src/store/user/slices/settings/actions/llm.test.ts @@ -109,7 +109,7 @@ describe('LLMSettingsSliceAction', () => { displayName: 'LLaVA 7B', enabled: true, id: 'llava', - tokens: 4000, + tokens: 4096, vision: true, }); }); diff --git a/src/types/llm.ts b/src/types/llm.ts index 98bf8737ffe..81bfac74c76 100644 --- a/src/types/llm.ts +++ b/src/types/llm.ts @@ -32,7 +32,7 @@ export interface ChatModelCard { legacy?: boolean; maxOutput?: number; /** - * the context window + * the context window (or input + output tokens limit) */ tokens?: number; /**